Skip to content
72 changes: 46 additions & 26 deletions dataretrieval/wqp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from __future__ import annotations

import warnings
from functools import cached_property
from io import StringIO
from typing import TYPE_CHECKING

Expand All @@ -22,6 +23,10 @@
if TYPE_CHECKING:
from pandas import DataFrame

# WQP-native site key first, then legacy NWIS-style aliases. WQP_Metadata.site_info
# walks these in order and forwards whichever matched as what_sites' `siteid`.
_SITE_KEYS = ("siteid", "sites", "site", "site_no")


result_profiles_wqx3 = ["basicPhysChem", "fullPhysChem", "narrow"]
result_profiles_legacy = ["biological", "narrowResult", "resultPhysChem"]
Expand Down Expand Up @@ -147,7 +152,7 @@ def get_results(
response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_sites(
Expand Down Expand Up @@ -202,7 +207,7 @@ def what_sites(

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_organizations(
Expand Down Expand Up @@ -253,7 +258,7 @@ def what_organizations(

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_projects(ssl_check=True, legacy=True, **kwargs):
Expand Down Expand Up @@ -300,7 +305,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs):

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_activities(
Expand Down Expand Up @@ -364,7 +369,7 @@ def what_activities(

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_detection_limits(
Expand Down Expand Up @@ -422,7 +427,7 @@ def what_detection_limits(

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_habitat_metrics(
Expand Down Expand Up @@ -473,7 +478,7 @@ def what_habitat_metrics(

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_project_weights(ssl_check=True, legacy=True, **kwargs):
Expand Down Expand Up @@ -525,7 +530,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs):

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def what_activity_metrics(ssl_check=True, legacy=True, **kwargs):
Expand Down Expand Up @@ -577,7 +582,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs):

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)

return df, WQP_Metadata(response)
return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs)


def wqp_url(service):
Expand Down Expand Up @@ -615,27 +620,38 @@ class WQP_Metadata(BaseMetadata):
----------
url : str
Response url
query_time : datetme.timedelta
query_time : datetime.timedelta
Response elapsed time
header : requests.structures.CaseInsensitiveDict
Response headers
comments : None
Metadata comments. WQP does not return comments.
site_info : tuple[pd.DataFrame, NWIS_Metadata] | None
Site information if the query included `sites`, `site` or `site_no`.
comment : None
Metadata comment. WQP does not return comments.
site_info : tuple[pd.DataFrame, WQP_Metadata] | None
Site information if the query included a site filter (`siteid`,
`sites`, `site`, or `site_no`).
"""

def __init__(self, response, **parameters) -> None:
def __init__(
self, response, legacy: bool = True, ssl_check: bool = True, **parameters
) -> None:
"""Generates a standard set of metadata informed by the response with specific
metadata for WQP data.

Parameters
----------
response : Response
Response object from requests module

legacy : bool
Whether the originating request used the legacy WQX endpoint.
Forwarded to ``what_sites`` when ``site_info`` is accessed so the
resolved station metadata uses the same profile as the original
query.
ssl_check : bool
Whether the originating request verified SSL. Forwarded to
``what_sites`` for consistency.
parameters : dict
Unpacked dictionary of the parameters supplied in the request
Unpacked dictionary of the remaining parameters supplied in the
request.

Returns
-------
Expand All @@ -647,15 +663,19 @@ def __init__(self, response, **parameters) -> None:
super().__init__(response)

self._parameters = parameters

@property
def site_info(self):
if "sites" in self._parameters:
return what_sites(sites=parameters["sites"])
elif "site" in self._parameters:
return what_sites(sites=parameters["site"])
elif "site_no" in self._parameters:
return what_sites(sites=parameters["site_no"])
self._legacy = legacy
self._ssl_check = ssl_check

@cached_property
def site_info(self):
for key in _SITE_KEYS:
if key in self._parameters:
return what_sites(
siteid=self._parameters[key],
legacy=self._legacy,
ssl_check=self._ssl_check,
)
return None


def _check_kwargs(kwargs):
Expand Down
65 changes: 65 additions & 0 deletions tests/wqp_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import datetime
from unittest import mock

import pytest
from pandas import DataFrame

from dataretrieval.wqp import (
WQP_Metadata,
_check_kwargs,
get_results,
what_activities,
Expand Down Expand Up @@ -218,6 +220,69 @@ def test_check_kwargs():
kwargs = _check_kwargs(kwargs)


def test_wqp_metadata_site_info_property_resolves(requests_mock):
"""`site_info` returns the `(DataFrame, WQP_Metadata)` tuple from `what_sites`."""
results_url = (
"https://www.waterqualitydata.us/data/Result/Search?"
"siteid=WIDNR_WQX-10032762&mimeType=csv"
)
sites_url = (
"https://www.waterqualitydata.us/data/Station/Search?"
"siteid=WIDNR_WQX-10032762&mimeType=csv"
)
mock_request(requests_mock, results_url, "tests/data/wqp_results.txt")
mock_request(requests_mock, sites_url, "tests/data/wqp_sites.txt")

_df, md = get_results(siteid="WIDNR_WQX-10032762")

site_df, site_md = md.site_info
assert isinstance(site_df, DataFrame)
assert site_md.url == sites_url


def test_wqp_metadata_site_info_returns_none_without_site_filter(requests_mock):
"""`site_info` returns None when no site filter was supplied."""
results_url = (
"https://www.waterqualitydata.us/data/Result/Search?"
"characteristicName=Chloride&mimeType=csv"
)
mock_request(requests_mock, results_url, "tests/data/wqp_results.txt")
_df, md = get_results(characteristicName="Chloride")
assert md.site_info is None


def test_wqp_metadata_site_info_uses_wqx3_when_originating_query_was_wqx3(
requests_mock,
):
"""`site_info` reuses the originating legacy/WQX3.0 profile."""
results_url = (
"https://www.waterqualitydata.us/wqx3/Result/search?"
"siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=fullPhysChem"
)
sites_wqx3_url = (
"https://www.waterqualitydata.us/wqx3/Station/search?"
"siteid=UTAHDWQ_WQX-4993795&mimeType=csv"
)
mock_request(requests_mock, results_url, "tests/data/wqp3_results.txt")
mock_request(requests_mock, sites_wqx3_url, "tests/data/wqp_sites.txt")

_df, md = get_results(legacy=False, siteid="UTAHDWQ_WQX-4993795")
_site_df, site_md = md.site_info
assert site_md.url == sites_wqx3_url


@pytest.mark.parametrize("key", ["siteid", "sites", "site", "site_no"])
def test_wqp_metadata_site_info_forwards_each_alias_as_siteid(key):
"""Every alias key resolves to `what_sites(siteid=value, ...)`."""
with mock.patch("dataretrieval.wqp.what_sites") as fake_what_sites:
fake_what_sites.return_value = (DataFrame(), mock.Mock())
md = WQP_Metadata(mock.Mock(), legacy=False, ssl_check=True, **{key: "USGS-X"})
_ = md.site_info
fake_what_sites.assert_called_once_with(
siteid="USGS-X", legacy=False, ssl_check=True
)


def test_get_results_wqx3_preserves_user_dataProfile(requests_mock):
"""A valid user-supplied WQX3.0 profile must not be overwritten.

Expand Down