diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index ffc81bb2..54fe7a94 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -5,11 +5,11 @@ except PackageNotFoundError: __version__ = "version-unknown" -from dataretrieval.nadp import * -from dataretrieval.nwis import * -from dataretrieval.samples import * -from dataretrieval.streamstats import * -from dataretrieval.utils import * -from dataretrieval.waterdata import * -from dataretrieval.waterwatch import * -from dataretrieval.wqp import * +from dataretrieval.nadp import * # noqa: F403 +from dataretrieval.nwis import * # noqa: F403 +from dataretrieval.samples import * # noqa: F403 +from dataretrieval.streamstats import * # noqa: F403 +from dataretrieval.utils import * # noqa: F403 +from dataretrieval.waterdata import * # noqa: F403 +from dataretrieval.waterwatch import * # noqa: F403 +from dataretrieval.wqp import * # noqa: F403 diff --git a/dataretrieval/codes/__init__.py b/dataretrieval/codes/__init__.py index a1b0e400..eca1cc1e 100755 --- a/dataretrieval/codes/__init__.py +++ b/dataretrieval/codes/__init__.py @@ -1,2 +1,2 @@ -from .states import * -from .timezones import * +from .states import * # noqa: F403 +from .timezones import * # noqa: F403 diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index a3825704..89df8a65 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -5,8 +5,8 @@ try: import geopandas as gpd -except ImportError: - raise ImportError("Install geopandas to use the NLDI module.") +except ImportError as err: + raise ImportError("Install geopandas to use the NLDI module.") from err NLDI_API_BASE_URL = "https://api.water.usgs.gov/nldi/linked-data" _AVAILABLE_DATA_SOURCES = None @@ -281,7 +281,7 @@ def get_features( query_params = {} if lat: - err_msg = f"Error getting features for lat '{lat}'" f" and long '{long}'" + err_msg = f"Error getting features for lat '{lat}' and long '{long}'" elif feature_source: err_msg = ( f"Error getting features for feature source '{feature_source}'" diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index e4615d10..099e9ec8 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -26,7 +26,7 @@ "The 'nwis' services are deprecated and being decommissioned. " "Please use the 'waterdata' module to access the new services.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) WATERDATA_BASE_URL = "https://nwis.waterdata.usgs.gov/" @@ -141,7 +141,7 @@ def get_qwdata( """ raise NameError( "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." - ) + ) def get_discharge_measurements( @@ -157,7 +157,7 @@ def get_discharge_measurements( Parameters ---------- sites: string or list of strings, optional, default is None - start: string, optional, default is None + start: string, optional, default is None Supply date in the format: YYYY-MM-DD end: string, optional, default is None Supply date in the format: YYYY-MM-DD @@ -344,7 +344,7 @@ def get_gwlevels( if datetime_index is True: df = format_datetime(df, "lev_dt", "lev_tm", "lev_tz_cd") - + # Filter by kwarg parameterCd because the service doesn't do it if "parameterCd" in kwargs: pcodes = kwargs["parameterCd"] @@ -696,7 +696,8 @@ def get_info(ssl_check: bool = True, **kwargs) -> Tuple[pd.DataFrame, BaseMetada "refer to https://waterdata.usgs.gov.nwis/qwdata and " "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " "or email CompTools@usgs.gov." - ) + ), + stacklevel=2, ) # convert bool to string if necessary kwargs["seriesCatalogOutput"] = "True" diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index a6df85b3..1b6f7ffd 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -6,18 +6,19 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal, get_args - -import pandas as pd import warnings +from typing import TYPE_CHECKING from dataretrieval.utils import BaseMetadata if TYPE_CHECKING: from typing import Optional, Tuple, Union - from dataretrieval.waterdata import SERVICES, PROFILES + from pandas import DataFrame + from dataretrieval.waterdata import PROFILES, SERVICES + + def get_usgs_samples( ssl_check: bool = True, service: SERVICES = "results", @@ -111,7 +112,7 @@ def get_usgs_samples( A user supplied characteristic name describing one or more results. boundingBox: list of four floats, optional Filters on the the associated monitoring location's point location - by checking if it is located within the specified geographic area. + by checking if it is located within the specified geographic area. The logic is inclusive, i.e. it will include locations that overlap with the edge of the bounding box. Values are separated by commas, expressed in decimal degrees, NAD83, and longitudes west of Greenwich @@ -120,7 +121,7 @@ def get_usgs_samples( - Western-most longitude - Southern-most latitude - Eastern-most longitude - - Northern-most longitude + - Northern-most longitude Example: [-92.8,44.2,-88.9,46.0] countryFips : string or list of strings, optional Example: "US" (United States) @@ -143,7 +144,7 @@ def get_usgs_samples( usgsPCode : string or list of strings, optional 5-digit number used in the US Geological Survey computerized data system, National Water Information System (NWIS), to - uniquely identify a specific constituent. Check the + uniquely identify a specific constituent. Check the `characteristic_lookup()` function in this module for all possible inputs. Example: "00060" (Discharge, cubic feet per second) @@ -173,7 +174,7 @@ def get_usgs_samples( recordIdentifierUserSupplied : string or list of strings, optional Internal AQS record identifier that returns 1 entry. Only available for the "results" service. - + Returns ------- df : ``pandas.DataFrame`` @@ -187,8 +188,8 @@ def get_usgs_samples( >>> # Get PFAS results within a bounding box >>> df, md = dataretrieval.samples.get_usgs_samples( - ... boundingBox=[-90.2,42.6,-88.7,43.2], - ... characteristicGroup="Organics, PFAS" + ... boundingBox=[-90.2, 42.6, -88.7, 43.2], + ... characteristicGroup="Organics, PFAS", ... ) >>> # Get all activities for the Commonwealth of Virginia over a date range @@ -197,22 +198,31 @@ def get_usgs_samples( ... profile="sampact", ... activityStartDateLower="2023-10-01", ... activityStartDateUpper="2024-01-01", - ... stateFips="US:51") + ... stateFips="US:51", + ... ) >>> # Get all pH samples for two sites in Utah >>> df, md = dataretrieval.samples.get_usgs_samples( - ... monitoringLocationIdentifier=['USGS-393147111462301', 'USGS-393343111454101'], - ... usgsPCode='00400') + ... monitoringLocationIdentifier=[ + ... "USGS-393147111462301", + ... "USGS-393343111454101", + ... ], + ... usgsPCode="00400", + ... ) """ warnings.warn( - "`get_usgs_samples` is deprecated and will be removed. Use `waterdata.get_samples` instead.", + ( + "`get_usgs_samples` is deprecated and will be removed. " + "Use `waterdata.get_samples` instead." + ), DeprecationWarning, stacklevel=2, ) from dataretrieval.waterdata import get_samples + result = get_samples( ssl_check=ssl_check, service=service, @@ -242,5 +252,3 @@ def get_usgs_samples( ) return result - - diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index 1de0b74f..7cddabaa 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -158,4 +158,4 @@ def from_streamstats_json(cls, streamstats_json): def __init__(self, rcode, xlocation, ylocation): """Init method that calls the :obj:`from_streamstats_json` method.""" - self = get_watershed(rcode, xlocation, ylocation) + get_watershed(rcode, xlocation, ylocation) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 53e95acc..7923eb65 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -39,16 +39,16 @@ def to_str(listlike, delimiter=","): '0+10+42' """ - if type(listlike) == list: + if isinstance(listlike, list): return delimiter.join([str(x) for x in listlike]) - elif type(listlike) == pd.core.series.Series: + elif isinstance(listlike, pd.core.series.Series): return delimiter.join(listlike.tolist()) - elif type(listlike) == pd.core.indexes.base.Index: + elif isinstance(listlike, pd.core.indexes.base.Index): return delimiter.join(listlike.tolist()) - elif type(listlike) == str: + elif isinstance(listlike, str): return listlike @@ -91,6 +91,7 @@ def format_datetime(df, date_field, time_field, tz_field): f"Warning: {count} incomplete dates found, " + "consider setting datetime_index to False.", UserWarning, + stacklevel=2, ) return df @@ -229,6 +230,5 @@ def __init__(self, url): def __str__(self): return ( - "No sites/data found using the selection criteria specified in url: " - "{url}" + "No sites/data found using the selection criteria specified in url: {url}" ).format(url=self.url) diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index fc33a931..d10456c6 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -22,9 +22,9 @@ ) from dataretrieval.waterdata.utils import ( SAMPLES_URL, + _check_profiles, get_ogc_data, get_stats_data, - _check_profiles ) # Set up logger for this module @@ -129,8 +129,10 @@ def get_daily( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours Only features that have a last_modified that intersects the value of datetime are selected. @@ -152,8 +154,10 @@ def get_daily( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours bbox : list of numbers, optional Only features that have a geometry that intersects the bounding box are @@ -209,6 +213,7 @@ def get_daily( return get_ogc_data(args, output_id, service) + def get_continuous( monitoring_location_id: Optional[Union[str, List[str]]] = None, parameter_code: Optional[Union[str, List[str]]] = None, @@ -233,7 +238,7 @@ def get_continuous( with the continuous endpoint. If the "time" input is left blank, the service will return the most recent year of measurements. Users may request no more than three years of data with each function call. - + Continuous data are collected at a high frequency, typically 15-minute intervals. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within @@ -242,7 +247,7 @@ def get_continuous( transmit data. Continuous data are described by parameter name and parameter code (pcode). These data might also be referred to as "instantaneous values" or "IV". - + Parameters ---------- monitoring_location_id : string or list of strings, optional @@ -312,8 +317,10 @@ def get_continuous( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours Only features that have a last_modified that intersects the value of datetime are selected. @@ -330,8 +337,10 @@ def get_continuous( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours limit : numeric, optional The optional limit parameter is used to control the subset of the @@ -441,9 +450,7 @@ def get_monitoring_locations( by a hyphen (e.g. USGS-02238500). agency_code : string or list of strings, optional The agency that is reporting the data. Agency codes are fixed values - assigned by the National Water Information System (NWIS). A list of - agency codes is available at: - https://help.waterdata.usgs.gov/code/agency_cd_query?fmt=html. + assigned by the National Water Information System (NWIS). agency_name : string or list of strings, optional The name of the agency that is reporting the data. monitoring_location_number : string or list of strings, optional @@ -481,22 +488,20 @@ def get_monitoring_locations( is located. county_code : string or list of strings, optional The code for the county or county equivalent (parish, borough, etc.) in which - the monitoring location is located. A `list of codes + the monitoring location is located. A `list of codes `_ is available. county_name : string or list of strings, optional The name of the county or county equivalent (parish, borough, etc.) in which - the monitoring location is located. A `list of codes + the monitoring location is located. A `list of codes `_ is available. minor_civil_division_code : string or list of strings, optional Codes for primary governmental or administrative divisions of the county or county equivalent in which the monitoring location is located. site_type_code : string or list of strings, optional - A code describing the hydrologic setting of the monitoring location. A `list of - codes `_ is available. + A code describing the hydrologic setting of the monitoring location. Example: "US:15:001" (United States: Hawaii, Hawaii County) site_type : string or list of strings, optional - A description of the hydrologic setting of the monitoring location. A `list of - codes `_ is available. + A description of the hydrologic setting of the monitoring location. hydrologic_unit_code : string or list of strings, optional The United States is divided and sub-divided into successively smaller hydrologic units which are classified into four levels: regions, @@ -520,41 +525,31 @@ def get_monitoring_locations( topographic maps; accuracies determined in this way are generally entered as one-half of the contour interval. altitude_method_code : string or list of strings, optional - Codes representing the method used to measure altitude. A `list of - codes `_ is available. + Codes representing the method used to measure altitude. altitude_method_name : float, optional - The name of the the method used to measure altitude. A `list of - codes `_ is available. + The name of the the method used to measure altitude. vertical_datum : float, optional The datum used to determine altitude and vertical position at the - monitoring location. A `list of - codes `_ is available. + monitoring location.' vertical_datum_name : float, optional The datum used to determine altitude and vertical position at the - monitoring location. A `list of - codes `_ is available. + monitoring location. horizontal_positional_accuracy_code : string or list of strings, optional - Indicates the accuracy of the latitude longitude values. A `list of - codes `_ is available. + Indicates the accuracy of the latitude longitude values. horizontal_positional_accuracy : string or list of strings, optional - Indicates the accuracy of the latitude longitude values. A `list of - codes `_ is available. + Indicates the accuracy of the latitude longitude values. horizontal_position_method_code : string or list of strings, optional - Indicates the method used to determine latitude longitude values. A `list of - codes `_ is available. + Indicates the method used to determine latitude longitude values. horizontal_position_method_name : string or list of strings, optional - Indicates the method used to determine latitude longitude values. A `list of - codes `_ is available. + Indicates the method used to determine latitude longitude values. original_horizontal_datum : string or list of strings, optional Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine - coordinates before they were converted. A `list of - codes `_ is available. + coordinates before they were converted. original_horizontal_datum_name : string or list of strings, optional Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine coordinates - before they were converted. A `list of - codes `_ is available. + before they were converted. drainage_area : string or list of strings, optional The area enclosed by a topographic divide from which direct surface runoff from precipitation normally drains by gravity into the stream above that @@ -795,8 +790,10 @@ def get_time_series_metadata( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours end_utc : string or list of strings, optional The datetime of the most recent observation in the time series. Data returned by @@ -805,8 +802,9 @@ def get_time_series_metadata( than the time series end value reflects. Together with begin, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". We intend to update this in - version v0 to use UTC with a time zone. You can query this field using date-times - or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals + version v0 to use UTC with a time zone. + You can query this field using date-times or intervals, + adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Only features that have a end that intersects the value of datetime are selected. @@ -814,8 +812,10 @@ def get_time_series_metadata( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours unit_of_measure : string or list of strings, optional A human-readable description of the units of measurement associated @@ -882,8 +882,8 @@ def get_time_series_metadata( >>> # Get timeseries metadata information from multiple sites >>> # that begin after January 1, 1990. >>> df, md = dataretrieval.waterdata.get_time_series_metadata( - ... monitoring_location_id = ["USGS-05114000", "USGS-09423350"], - ... begin = "1990-01-01/.." + ... monitoring_location_id=["USGS-05114000", "USGS-09423350"], + ... begin="1990-01-01/..", ... ) """ service = "time-series-metadata" @@ -996,8 +996,10 @@ def get_latest_continuous( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours skip_geometry : boolean, optional This option can be used to skip response geometries for each feature. @@ -1017,8 +1019,10 @@ def get_latest_continuous( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours bbox : list of numbers, optional Only features that have a geometry that intersects the bounding box are @@ -1170,8 +1174,10 @@ def get_latest_daily( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours skip_geometry : boolean, optional This option can be used to skip response geometries for each feature. @@ -1191,8 +1197,10 @@ def get_latest_daily( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours bbox : list of numbers, optional Only features that have a geometry that intersects the bounding box are @@ -1244,6 +1252,7 @@ def get_latest_daily( return get_ogc_data(args, output_id, service) + def get_field_measurements( monitoring_location_id: Optional[Union[str, List[str]]] = None, parameter_code: Optional[Union[str, List[str]]] = None, @@ -1329,19 +1338,21 @@ def get_field_measurements( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours observing_procedure : string or list of strings, optional Water measurement or water-quality observing procedure descriptions. vertical_datum : string or list of strings, optional - The datum used to determine altitude and vertical position at the monitoring location. - A list of codes is available. + The datum used to determine altitude and vertical position at the + monitoring location. measuring_agency : string or list of strings, optional The agency performing the measurement. skip_geometry : boolean, optional - This option can be used to skip response geometries for each feature. The returning - object will be a data frame with no spatial information. + This option can be used to skip response geometries for each feature. + The returning object will be a data frame with no spatial information. Note that the USGS Water Data APIs use camelCase "skipGeometry" in CQL2 queries. time : string, optional @@ -1356,8 +1367,10 @@ def get_field_measurements( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" - * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" + * Duration objects: "P1M" for data from the past month or + "PT36H" for the last 36 hours bbox : list of numbers, optional Only features that have a geometry that intersects the bounding box are @@ -1418,14 +1431,14 @@ def get_field_measurements( def get_reference_table( - collection: str, - limit: Optional[int] = None, - query: Optional[dict] = {}, - ) -> Tuple[pd.DataFrame, BaseMetadata]: + collection: str, + limit: Optional[int] = None, + query: Optional[dict] = None, +) -> Tuple[pd.DataFrame, BaseMetadata]: """Get metadata reference tables for the USGS Water Data API. Reference tables provide the range of allowable values for parameter - arguments in the waterdata module. + arguments in the waterdata module. Parameters ---------- @@ -1457,7 +1470,7 @@ def get_reference_table( medium code values). md: :obj:`dataretrieval.utils.Metadata` A custom metadata object including the URL request and query time. - + Examples -------- .. code:: @@ -1479,7 +1492,7 @@ def get_reference_table( f"Invalid code service: '{collection}'. " f"Valid options are: {valid_code_services}." ) - + # Give ID column the collection name with underscores if collection.endswith("s") and collection != "counties": output_id = f"{collection[:-1].replace('-', '_')}" @@ -1487,12 +1500,9 @@ def get_reference_table( output_id = "county" else: output_id = f"{collection.replace('-', '_')}" - - return get_ogc_data( - args=query, - output_id=output_id, - service=collection - ) + + query_args = query or {} + return get_ogc_data(args=query_args, output_id=output_id, service=collection) def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame: @@ -1747,22 +1757,23 @@ def get_samples( return df, BaseMetadata(response) + def get_stats_por( - approval_status: Optional[str] = None, - computation_type: Optional[Union[str, list[str]]] = None, - country_code: Optional[Union[str, list[str]]] = None, - state_code: Optional[Union[str, list[str]]] = None, - county_code: Optional[Union[str, list[str]]] = None, - start_date: Optional[str] = None, - end_date: Optional[str] = None, - monitoring_location_id: Optional[Union[str, list[str]]] = None, - page_size: int = 1000, - parent_time_series_id: Optional[Union[str, list[str]]] = None, - site_type_code: Optional[Union[str, list[str]]] = None, - site_type_name: Optional[Union[str, list[str]]] = None, - parameter_code: Optional[Union[str, list[str]]] = None, - expand_percentiles: bool = True - ) -> Tuple[pd.DataFrame, BaseMetadata]: + approval_status: Optional[str] = None, + computation_type: Optional[Union[str, list[str]]] = None, + country_code: Optional[Union[str, list[str]]] = None, + state_code: Optional[Union[str, list[str]]] = None, + county_code: Optional[Union[str, list[str]]] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + monitoring_location_id: Optional[Union[str, list[str]]] = None, + page_size: int = 1000, + parent_time_series_id: Optional[Union[str, list[str]]] = None, + site_type_code: Optional[Union[str, list[str]]] = None, + site_type_name: Optional[Union[str, list[str]]] = None, + parameter_code: Optional[Union[str, list[str]]] = None, + expand_percentiles: bool = True, +) -> Tuple[pd.DataFrame, BaseMetadata]: """Get day-of-year and month-of-year water data statistics from the USGS Water Data API. This service (called the "observationNormals" endpoint on api.waterdata.usgs.gov) @@ -1771,7 +1782,7 @@ def get_stats_por( day of year and month of year. For more information regarding the calculation of statistics and other details, please visit the Statistics documentation page: https://waterdata.usgs.gov/statistics-documentation/. - + Note: This API is under active beta development and subject to change. Improved handling of significant figures will be addressed in a future release. @@ -1808,15 +1819,15 @@ def get_stats_por( The number of results to return per page, where one result represents a monitoring location. The default is 1000. parent_time_series_id: string, optional - The parent_time_series_id returns statistics tied to a particular datbase entry. + The parent_time_series_id returns statistics tied to a + particular datbase entry. site_type_code: string, optional - Site type code query parameter. You can see a list of valid site type codes here: + Site type code query parameter. + A list of valid site type codes is available at: https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. Example: "GW" (Groundwater site) site_type_name: string, optional - Site type name query parameter. You can see a list of valid site type names here: - https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. - Example: "Well" + Site type name query parameter. parameter_code : string or list of strings, optional Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes @@ -1850,7 +1861,7 @@ def get_stats_por( >>> df, md = dataretrieval.waterdata.get_stats_por( ... monitoring_location_id="USGS-05114000", ... parameter_code="00060", - ... computation_type="percentile" + ... computation_type="percentile", ... ) >>> # Get all daily and monthly statistics for the month of January @@ -1860,7 +1871,7 @@ def get_stats_por( ... monitoring_location_id="USGS-05114000", ... parameter_code=["00060", "00065"], ... start_date="01-01", - ... end_date="01-31" + ... end_date="01-31", ... ) """ params = { @@ -1868,29 +1879,28 @@ def get_stats_por( for k, v in locals().items() if k not in ["expand_percentiles"] and v is not None } - + return get_stats_data( - args=params, - service="observationNormals", - expand_percentiles=expand_percentiles - ) + args=params, service="observationNormals", expand_percentiles=expand_percentiles + ) + def get_stats_date_range( - approval_status: Optional[str] = None, - computation_type: Optional[Union[str, list[str]]] = None, - country_code: Optional[Union[str, list[str]]] = None, - state_code: Optional[Union[str, list[str]]] = None, - county_code: Optional[Union[str, list[str]]] = None, - start_date: Optional[str] = None, - end_date: Optional[str] = None, - monitoring_location_id: Optional[Union[str, list[str]]] = None, - page_size: int = 1000, - parent_time_series_id: Optional[Union[str, list[str]]] = None, - site_type_code: Optional[Union[str, list[str]]] = None, - site_type_name: Optional[Union[str, list[str]]] = None, - parameter_code: Optional[Union[str, list[str]]] = None, - expand_percentiles: bool = True - ) -> Tuple[pd.DataFrame, BaseMetadata]: + approval_status: Optional[str] = None, + computation_type: Optional[Union[str, list[str]]] = None, + country_code: Optional[Union[str, list[str]]] = None, + state_code: Optional[Union[str, list[str]]] = None, + county_code: Optional[Union[str, list[str]]] = None, + start_date: Optional[str] = None, + end_date: Optional[str] = None, + monitoring_location_id: Optional[Union[str, list[str]]] = None, + page_size: int = 1000, + parent_time_series_id: Optional[Union[str, list[str]]] = None, + site_type_code: Optional[Union[str, list[str]]] = None, + site_type_name: Optional[Union[str, list[str]]] = None, + parameter_code: Optional[Union[str, list[str]]] = None, + expand_percentiles: bool = True, +) -> Tuple[pd.DataFrame, BaseMetadata]: """Get monthly and annual water data statistics from the USGS Water Data API. This service (called the "observationIntervals" endpoint on api.waterdata.usgs.gov) provides endpoints for access to computations on the historical record regarding @@ -1898,7 +1908,7 @@ def get_stats_date_range( month-year, and water/calendar years. For more information regarding the calculation of statistics and other details, please visit the Statistics documentation page: https://waterdata.usgs.gov/statistics-documentation/. - + Note: This API is under active beta development and subject to change. Improved handling of significant figures will be addressed in a future release. @@ -1937,13 +1947,16 @@ def get_stats_date_range( The number of results to return per page, where one result represents a monitoring location. The default is 1000. parent_time_series_id: string, optional - The parent_time_series_id returns statistics tied to a particular datbase entry. + The parent_time_series_id returns statistics tied to a + particular datbase entry. site_type_code: string, optional - Site type code query parameter. You can see a list of valid site type codes here: + Site type code query parameter. + You can see a list of valid site type codes here: https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. Example: "GW" (Groundwater site) site_type_name: string, optional - Site type name query parameter. You can see a list of valid site type names here: + Site type name query parameter. + You can see a list of valid site type names here: https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. Example: "Well" parameter_code : string or list of strings, optional @@ -1977,12 +1990,12 @@ def get_stats_date_range( >>> # Get monthly and yearly medians for streamflow at streams in Rhode Island >>> # from calendar year 2024. >>> df, md = dataretrieval.waterdata.get_stats_date_range( - ... state_code="US:44", # State code for Rhode Island + ... state_code="US:44", # State code for Rhode Island ... parameter_code="00060", ... site_type_code="ST", ... start_date="2024-01-01", ... end_date="2024-12-31", - ... computation_type="median" + ... computation_type="median", ... ) >>> # Get monthly and yearly minimum and maximums for gage height at @@ -1990,7 +2003,7 @@ def get_stats_date_range( >>> df, md = dataretrieval.waterdata.get_stats_date_range( ... monitoring_location_id="USGS-05114000", ... parameter_code="00065", - ... computation_type=["minimum", "maximum"] + ... computation_type=["minimum", "maximum"], ... ) """ params = { @@ -1998,12 +2011,12 @@ def get_stats_date_range( for k, v in locals().items() if k not in ["expand_percentiles"] and v is not None } - + return get_stats_data( args=params, service="observationIntervals", - expand_percentiles=expand_percentiles - ) + expand_percentiles=expand_percentiles, + ) def get_channel( @@ -2050,7 +2063,8 @@ def get_channel( the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). field_visit_id : string or list of strings, optional - A universally unique identifier (UUID) for the field visit. Multiple measurements + A universally unique identifier (UUID) for the field visit. + Multiple measurements may be made during a single field visit. measurement_number : string or list of strings, optional Measurement number. @@ -2067,7 +2081,8 @@ def get_channel( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours channel_name : string or list of strings, optional @@ -2114,7 +2129,8 @@ def get_channel( * A date-time: "2018-02-12T23:20:50Z" * A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" - * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" + * Half-bounded intervals: "2018-02-12T00:00:00Z/.." or + "../2018-03-18T12:31:12Z" * Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 5a5bd08e..1c01915f 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -9,9 +9,8 @@ import requests from zoneinfo import ZoneInfo -from dataretrieval.utils import BaseMetadata from dataretrieval import __version__ - +from dataretrieval.utils import BaseMetadata from dataretrieval.waterdata.types import ( PROFILE_LOOKUP, PROFILES, @@ -319,12 +318,18 @@ def _error_body(resp: requests.Response): """ status = resp.status_code if status == 429: - return "429: Too many requests made. Please obtain an API token or try again later." + return ( + "429: Too many requests made. Please obtain an API token " + "or try again later." + ) elif status == 403: - return "403: Query request denied. Possible reasons include query exceeding server limits." + return ( + "403: Query request denied. Possible reasons include " + "query exceeding server limits." + ) j_txt = resp.json() return ( - f"{status}: {j_txt.get('code', 'Unknown type')}. " + f"{status}: {j_txt.get('code', 'Unknown type')}. " f"{j_txt.get('description', 'No description provided')}." ) @@ -373,7 +378,7 @@ def _construct_api_requests( - The function sets appropriate headers for GET and POST requests. """ service_url = f"{OGC_API_URL}/collections/{service}/items" - + # Single parameters can only have one value single_params = {"datetime", "last_modified", "begin", "end", "time"} @@ -388,12 +393,10 @@ def _construct_api_requests( params = {k: v for k, v in kwargs.items() if k not in post_params} # Set skipGeometry parameter (API expects camelCase) params["skipGeometry"] = skip_geometry - + # If limit is none or greater than 50000, then set limit to max results. Otherwise, # use the limit - params["limit"] = ( - 50000 if limit is None or limit > 50000 else limit - ) + params["limit"] = 50000 if limit is None or limit > 50000 else limit # Indicate if function needs to perform POST conversion POST = bool(post_params) @@ -481,9 +484,11 @@ def _get_resp_data(resp: requests.Response, geopd: bool) -> pd.DataFrame: Parameters ---------- resp : requests.Response - The HTTP response object expected to contain a JSON body with a "features" key. + The HTTP response object expected to contain a JSON body + with a "features" key. geopd : bool - Indicates whether geopandas is installed and should be used to handle geometries. + Indicates whether geopandas is installed and should be used to + handle geometries. Returns ------- @@ -527,7 +532,8 @@ def _walk_pages( client: Optional[requests.Session] = None, ) -> Tuple[pd.DataFrame, requests.Response]: """ - Iterates through paginated API responses and aggregates the results into a single DataFrame. + Iterates through paginated API responses and aggregates the results + into a single DataFrame. Parameters ---------- @@ -556,7 +562,8 @@ def _walk_pages( if not geopd: logger.warning( - "Geopandas not installed. Geometries will be flattened into pandas DataFrames." + "Geopandas not installed. Geometries will be flattened " + "into pandas DataFrames." ) # Get first response from client @@ -586,14 +593,16 @@ def _walk_pages( curr_url, headers=headers, data=content if method == "POST" else None, - ) + ) df1 = _get_resp_data(resp, geopd=geopd) dfs = pd.concat([dfs, df1], ignore_index=True) curr_url = _next_req_url(resp) except Exception: error_text = _error_body(resp) logger.error("Request incomplete. %s", error_text) - logger.warning("Request failed for URL: %s. Data download interrupted.", curr_url) + logger.warning( + "Request failed for URL: %s. Data download interrupted.", curr_url + ) curr_url = None return dfs, initial_response finally: @@ -608,7 +617,8 @@ def _deal_with_empty( Handles empty DataFrame results by returning a DataFrame with appropriate columns. If `return_list` is empty, determines the column names to use: - - If `properties` is not provided or contains only NaN values, retrieves the schema properties from the specified service. + - If `properties` is not provided or contains only NaN values, + retrieves schema properties from the specified service. - Otherwise, uses the provided `properties` list as column names. Parameters @@ -623,7 +633,8 @@ def _deal_with_empty( Returns ------- pd.DataFrame - The original DataFrame if not empty, otherwise an empty DataFrame with the appropriate columns. + The original DataFrame if not empty, otherwise an empty + DataFrame with the appropriate columns. """ if return_list.empty: if not properties or all(pd.isna(properties)): @@ -637,21 +648,24 @@ def _arrange_cols( df: pd.DataFrame, properties: Optional[List[str]], output_id: str ) -> pd.DataFrame: """ - Rearranges and renames columns in a DataFrame based on provided properties and service's output id. + Rearranges and renames columns in a DataFrame based on provided + properties and the service output id. Parameters ---------- df : pd.DataFrame The input DataFrame whose columns are to be rearranged or renamed. properties : Optional[List[str]] - A list of column names to possibly rename. If None or contains only NaN, the function will rename 'id' to output_id. + A list of column names to possibly rename. If None or contains + only NaN, the function renames 'id' to output_id. output_id : str The name to which the 'id' column should be renamed if applicable. Returns ------- pd.DataFrame or gpd.GeoDataFrame - The DataFrame with columns rearranged and/or renamed according to the specified properties and output_id. + The DataFrame with columns rearranged and/or renamed according + to the specified properties and output_id. """ # Rename id column to output_id @@ -661,32 +675,36 @@ def _arrange_cols( # plus geometry if skip_geometry is False if properties and not all(pd.isna(properties)): # Make sure geometry stays in the dataframe if skip_geometry is False - if 'geometry' in df.columns and 'geometry' not in properties: - properties.append('geometry') + if "geometry" in df.columns and "geometry" not in properties: + properties.append("geometry") # id is technically a valid column from the service, but these # functions make the name more specific. So, if someone requests # 'id', give them the output_id column - if 'id' in properties: - properties[properties.index('id')] = output_id + if "id" in properties: + properties[properties.index("id")] = output_id df = df.loc[:, [col for col in properties if col in df.columns]] # Move meaningless-to-user, extra id columns to the end # of the dataframe, if they exist - extra_id_col = set(df.columns).intersection({ - "latest_continuous_id", - "latest_daily_id", - "daily_id", - "continuous_id", - "field_measurement_id" - }) + extra_id_col = set(df.columns).intersection( + { + "latest_continuous_id", + "latest_daily_id", + "daily_id", + "continuous_id", + "field_measurement_id", + } + ) # If the arbitrary id column is returned (either due to properties # being none or NaN), then move it to the end of the dataframe, but # if part of properties, keep in requested order if extra_id_col and (properties is None or all(pd.isna(properties))): - id_col_order = [col for col in df.columns if col not in extra_id_col] + list(extra_id_col) + id_col_order = [col for col in df.columns if col not in extra_id_col] + list( + extra_id_col + ) df = df.loc[:, id_col_order] - + return df @@ -714,17 +732,17 @@ def _type_cols(df: pd.DataFrame) -> pd.DataFrame: "hole_constructed_depth", "value", "well_constructed_depth", - ] + ] time_cols = [ "begin", "begin_utc", "construction_date", "end", "end_utc", - "datetime", # unused + "datetime", # unused "last_modified", "time", - ] + ] for col in cols.intersection(time_cols): df[col] = pd.to_datetime(df[col], errors="coerce") @@ -752,16 +770,10 @@ def _sort_rows(df: pd.DataFrame) -> pd.DataFrame: """ if "time" in df.columns and "monitoring_location_id" in df.columns: - df = df.sort_values( - by=["time", "monitoring_location_id"], - ignore_index=True - ) + df = df.sort_values(by=["time", "monitoring_location_id"], ignore_index=True) elif "time" in df.columns: - df = df.sort_values( - by="time", - ignore_index=True - ) - + df = df.sort_values(by="time", ignore_index=True) + return df @@ -769,10 +781,12 @@ def get_ogc_data( args: Dict[str, Any], output_id: str, service: str ) -> Tuple[pd.DataFrame, BaseMetadata]: """ - Retrieves OGC (Open Geospatial Consortium) data from a specified water data endpoint and returns it as a pandas DataFrame with metadata. + Retrieves OGC (Open Geospatial Consortium) data from a specified + endpoint and returns it as a pandas DataFrame with metadata. - This function prepares request arguments, constructs API requests, handles pagination, processes the results, - and formats the output DataFrame according to the specified parameters. + This function prepares request arguments, constructs API requests, + handles pagination, processes the results, and formats output + according to the specified parameters. Parameters ---------- @@ -812,9 +826,7 @@ def get_ogc_data( # Build API request req = _construct_api_requests(**args) # Run API request and iterate through pages if needed - return_list, response = _walk_pages( - geopd=GEOPANDAS, req=req - ) + return_list, response = _walk_pages(geopd=GEOPANDAS, req=req) # Manage some aspects of the returned dataset return_list = _deal_with_empty(return_list, properties, service) @@ -828,9 +840,10 @@ def get_ogc_data( metadata = BaseMetadata(response) return return_list, metadata + def _handle_stats_nesting( - body: Dict[str, Any], - geopd: bool = False, + body: Dict[str, Any], + geopd: bool = False, ) -> pd.DataFrame: """ Takes nested json from stats service and flattens into a dataframe with @@ -847,23 +860,26 @@ def _handle_stats_nesting( A DataFrame containing the flattened statistical data. """ if body is None: - return pd.DataFrame() - + return pd.DataFrame() + if not geopd: logger.info( - "Geopandas not installed. Geometries will be flattened into pandas DataFrames." + "Geopandas not installed. Geometries will be flattened " + "into pandas DataFrames." ) # If geopandas not installed, return a pandas dataframe # otherwise return a geodataframe if not geopd: - df = pd.json_normalize(body['features']).drop(columns=['type', 'properties.data']) - df.columns = df.columns.str.split('.').str[-1] + df = pd.json_normalize(body["features"]).drop( + columns=["type", "properties.data"] + ) + df.columns = df.columns.str.split(".").str[-1] else: - df = gpd.GeoDataFrame.from_features(body["features"]).drop(columns=['data']) - + df = gpd.GeoDataFrame.from_features(body["features"]).drop(columns=["data"]) + # Unnest json features, properties, data, and values while retaining necessary - # metadata to merge with main dataframe. + # metadata to merge with main dataframe. dat = pd.json_normalize( body, record_path=["features", "properties", "data", "values"], @@ -872,14 +888,14 @@ def _handle_stats_nesting( ["features", "properties", "data", "parameter_code"], ["features", "properties", "data", "unit_of_measure"], ["features", "properties", "data", "parent_time_series_id"], - #["features", "geometry", "coordinates"], - ], - meta_prefix="", - errors="ignore", - ) - dat.columns = dat.columns.str.split('.').str[-1] + # ["features", "geometry", "coordinates"], + ], + meta_prefix="", + errors="ignore", + ) + dat.columns = dat.columns.str.split(".").str[-1] - return df.merge(dat, on='monitoring_location_id', how='left') + return df.merge(dat, on="monitoring_location_id", how="left") def _expand_percentiles(df: pd.DataFrame) -> pd.DataFrame: @@ -902,33 +918,44 @@ def _expand_percentiles(df: pd.DataFrame) -> pd.DataFrame: A DataFrame containing the flattened percentile data. """ if len(df) > 0: - - if "percentile" in df['computation'].unique(): + if "percentile" in df["computation"].unique(): # Explode percentile lists into rows called "value" and "percentile" - percentiles = df.loc[df['computation'] == "percentile"] - percentiles_explode = percentiles[['computation_id', 'values', 'percentiles']].explode(['values', 'percentiles'], ignore_index=True) - percentiles_explode = percentiles_explode.loc[percentiles_explode['values']!="nan"] - percentiles_explode['value'] = pd.to_numeric(percentiles_explode['values']) - percentiles_explode['percentile'] = pd.to_numeric(percentiles_explode['percentiles']) - percentiles_explode = percentiles_explode.drop(columns=['values', 'percentiles']) - + percentiles = df.loc[df["computation"] == "percentile"] + percentiles_explode = percentiles[ + ["computation_id", "values", "percentiles"] + ].explode(["values", "percentiles"], ignore_index=True) + percentiles_explode = percentiles_explode.loc[ + percentiles_explode["values"] != "nan" + ] + percentiles_explode["value"] = pd.to_numeric(percentiles_explode["values"]) + percentiles_explode["percentile"] = pd.to_numeric( + percentiles_explode["percentiles"] + ) + percentiles_explode = percentiles_explode.drop( + columns=["values", "percentiles"] + ) + # Merge exploded values back to other metadata/geometry - percentiles = percentiles.drop(columns=['values', 'percentiles', 'value'], errors="ignore").merge(percentiles_explode, on='computation_id', how='left') - + percentiles = percentiles.drop( + columns=["values", "percentiles", "value"], errors="ignore" + ).merge(percentiles_explode, on="computation_id", how="left") + # Concatenate back to original - dfs = pd.concat([df.loc[df['computation'] != "percentile"], percentiles]).drop(columns=['values', 'percentiles']) + dfs = pd.concat( + [df.loc[df["computation"] != "percentile"], percentiles] + ).drop(columns=["values", "percentiles"]) else: dfs = df - dfs['percentile'] = pd.NA - + dfs["percentile"] = pd.NA + # Give min, max, median a percentile value - dfs.loc[dfs['computation'] == "maximum", 'percentile'] = 100 - dfs.loc[dfs['computation'] == "minimum", 'percentile'] = 0 - dfs.loc[dfs['computation'] == "median", 'percentile'] = 50 + dfs.loc[dfs["computation"] == "maximum", "percentile"] = 100 + dfs.loc[dfs["computation"] == "minimum", "percentile"] = 0 + dfs.loc[dfs["computation"] == "median", "percentile"] = 50 # Make sure numeric - dfs['percentile'] = pd.to_numeric(dfs['percentile']) - + dfs["percentile"] = pd.to_numeric(dfs["percentile"]) + # Move percentile column cols = dfs.columns.tolist() cols.remove("percentile") @@ -936,32 +963,37 @@ def _expand_percentiles(df: pd.DataFrame) -> pd.DataFrame: cols.insert(col_index, "percentile") return dfs[cols] - + else: return df + def get_stats_data( args: Dict[str, Any], service: str, expand_percentiles: bool, client: Optional[requests.Session] = None, - ) -> Tuple[pd.DataFrame, BaseMetadata]: +) -> Tuple[pd.DataFrame, BaseMetadata]: """ - Retrieves statistical data from a specified water data endpoint and returns it as a pandas DataFrame with metadata. + Retrieves statistical data from a specified endpoint and returns it + as a pandas DataFrame with metadata. - This function prepares request arguments, constructs API requests, handles pagination, processes the results, - and formats the output DataFrame according to the specified parameters. + This function prepares request arguments, constructs API requests, + handles pagination, processes results, and formats output according + to the specified parameters. Parameters ---------- args : Dict[str, Any] Dictionary of request arguments for the statistics service. service : str - The statistics service type (e.g., "observationNormals", "observationIntervals"). + The statistics service type (for example, + "observationNormals" or "observationIntervals"). expand_percentiles : bool - Determines whether the percentiles column is expanded so that each percentile gets its own row in the - returned dataframe. If set to True and user requests a computation_type other than percentiles, a - percentile column will be returned with the dataset. + Determines whether the percentiles column is expanded so that + each percentile gets its own row in the returned dataframe. If + True and user requests a computation_type other than + percentiles, a percentile column is still returned. Returns ------- @@ -976,11 +1008,11 @@ def get_stats_data( headers = _default_headers() request = requests.Request( - method="GET", - url=url, - headers=headers, - params=args, - ) + method="GET", + url=url, + headers=headers, + params=args, + ) req = request.prepare() logger.info("Request: %s", req.url) @@ -1006,10 +1038,10 @@ def get_stats_data( dfs = _handle_stats_nesting(body, geopd=GEOPANDAS) # Look for a next code in the response body - next_token = body['next'] + next_token = body["next"] while next_token: - args['next_token'] = next_token + args["next_token"] = next_token try: resp = client.request( @@ -1017,18 +1049,20 @@ def get_stats_data( url=url, params=args, headers=headers, - ) + ) body = resp.json() df1 = _handle_stats_nesting(body, geopd=False) dfs = pd.concat([dfs, df1], ignore_index=True) - next_token = body['next'] + next_token = body["next"] except Exception: error_text = _error_body(resp) logger.error("Request incomplete. %s", error_text) - logger.warning("Request failed for URL: %s. Data download interrupted.", resp.url) + logger.warning( + "Request failed for URL: %s. Data download interrupted.", resp.url + ) next_token = None - #. If expand percentiles is True, make each percentile + # . If expand percentiles is True, make each percentile # its own row in the returned dataset. if expand_percentiles: dfs = _expand_percentiles(dfs) @@ -1066,4 +1100,3 @@ def _check_profiles( f"Invalid profile: '{profile}' for service '{service}'. " f"Valid options are: {valid_profiles}." ) - diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 0194c3eb..7722df0b 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -24,7 +24,7 @@ result_profiles_wqx3 = ["basicPhysChem", "fullPhysChem", "narrow"] -result_profiles_legacy = ["biological", "narrowResult","resultPhysChem"] +result_profiles_legacy = ["biological", "narrowResult", "resultPhysChem"] activity_profiles_legacy = ["activityAll"] services_wqx3 = ["Activity", "Result", "Station"] services_legacy = [ @@ -712,7 +712,7 @@ def _warn_wqx3_use(): "Support for the WQX3.0 profiles is experimental. " "Queries may be slow or fail intermitttently." ) - warnings.warn(message, UserWarning) + warnings.warn(message, UserWarning, stacklevel=2) def _warn_legacy_use(): @@ -723,4 +723,4 @@ def _warn_legacy_use(): "information on updated WQX3.0 profiles. Setting `legacy=False` " "will remove this warning." ) - warnings.warn(message, DeprecationWarning) + warnings.warn(message, DeprecationWarning, stacklevel=2) diff --git a/demos/NWIS_demo_1.ipynb b/demos/NWIS_demo_1.ipynb index 5827bd49..e68415ce 100755 --- a/demos/NWIS_demo_1.ipynb +++ b/demos/NWIS_demo_1.ipynb @@ -25,13 +25,11 @@ "metadata": {}, "outputs": [], "source": [ - "from scipy import stats\n", - "import pandas as pd\n", "import numpy as np\n", + "import pandas as pd\n", + "from scipy import stats\n", "\n", - "\n", - "\n", - "from dataretrieval import nwis, utils, codes" + "from dataretrieval import nwis" ] }, { @@ -49,11 +47,11 @@ "outputs": [], "source": [ "# download annual peaks from a single site\n", - "df = nwis.get_record(sites='03339000', service='peaks', start='1970-01-01')\n", + "df = nwis.get_record(sites=\"03339000\", service=\"peaks\", start=\"1970-01-01\")\n", "df.head()\n", "\n", "# alternatively information for the entire state of illiois can be downloaded using\n", - "#df = nwis.get_record(state_cd='il', service='peaks', start='1970-01-01')" + "# df = nwis.get_record(state_cd='il', service='peaks', start='1970-01-01')" ] }, { @@ -81,23 +79,31 @@ "outputs": [], "source": [ "def peak_trend_regression(df):\n", - " \"\"\"\n", - " \"\"\"\n", - " #convert datetimes to days for regression\n", + " \"\"\" \"\"\"\n", + " # convert datetimes to days for regression\n", " peak_date = df.index\n", " peak_date = pd.to_datetime(df.index.get_level_values(1))\n", - " df['peak_d'] = (peak_date - peak_date.min()) / np.timedelta64(1,'D')\n", - " #df['peak_d'] = (df['peak_dt'] - df['peak_dt'].min()) / np.timedelta64(1,'D')\n", - " \n", - " #normalize the peak discharge values\n", - " df['peak_va'] = (df['peak_va'] - df['peak_va'].mean())/df['peak_va'].std()\n", - " \n", - " slope, intercept, r_value, p_value, std_error = stats.linregress(df['peak_d'], df['peak_va'])\n", - " \n", - " #df_out = pd.DataFrame({'slope':slope,'intercept':intercept,'p_value':p_value},index=df['site_no'].iloc[0])\n", - " \n", - " #return df_out\n", - " return pd.Series({'slope':slope,'intercept':intercept,'p_value': p_value,'std_error':std_error})" + " df[\"peak_d\"] = (peak_date - peak_date.min()) / np.timedelta64(1, \"D\")\n", + " # df['peak_d'] = (df['peak_dt'] - df['peak_dt'].min()) / np.timedelta64(1,'D')\n", + "\n", + " # normalize the peak discharge values\n", + " df[\"peak_va\"] = (df[\"peak_va\"] - df[\"peak_va\"].mean()) / df[\"peak_va\"].std()\n", + "\n", + " slope, intercept, r_value, p_value, std_error = stats.linregress(\n", + " df[\"peak_d\"], df[\"peak_va\"]\n", + " )\n", + "\n", + " # df_out = pd.DataFrame({'slope':slope,'intercept':intercept,'p_value':p_value},index=df['site_no'].iloc[0])\n", + "\n", + " # return df_out\n", + " return pd.Series(\n", + " {\n", + " \"slope\": slope,\n", + " \"intercept\": intercept,\n", + " \"p_value\": p_value,\n", + " \"std_error\": std_error,\n", + " }\n", + " )" ] }, { @@ -116,9 +122,9 @@ "def peak_trend_analysis(states, start_date):\n", " \"\"\"\n", " states : list\n", - " a list containing the two-letter codes for each state to include in the \n", + " a list containing the two-letter codes for each state to include in the\n", " analysis.\n", - " \n", + "\n", " start_date : string\n", " the date to use a the beginning of the analysis.\n", " \"\"\"\n", @@ -126,24 +132,25 @@ "\n", " for state in states:\n", " # download annual peak discharge records\n", - " df = nwis.get_record(state_cd=state, start=start_date, service='peaks')\n", + " df = nwis.get_record(state_cd=state, start=start_date, service=\"peaks\")\n", " # group the data by site and apply our regression\n", - " temp = df.groupby('site_no').apply(peak_trend_regression).dropna()\n", + " temp = df.groupby(\"site_no\").apply(peak_trend_regression).dropna()\n", " # drop any insignificant results\n", - " temp = temp[temp['p_value']<0.05]\n", - " \n", + " temp = temp[temp[\"p_value\"] < 0.05]\n", + "\n", " # now download metadata for each site, which we'll use later to plot the sites\n", " # on a map\n", - " site_df = nwis.get_record(sites=temp.index, service='site')\n", - " \n", + " site_df = nwis.get_record(sites=temp.index, service=\"site\")\n", + "\n", " if final_df.empty:\n", - " final_df = pd.merge(site_df, temp, right_index=True, left_on='site_no')\n", - " \n", + " final_df = pd.merge(site_df, temp, right_index=True, left_on=\"site_no\")\n", + "\n", " else:\n", - " final_df = final_df.append( pd.merge(site_df, temp, right_index=True, left_on='site_no') )\n", - " \n", - " return final_df\n", - " \n" + " final_df = final_df.append(\n", + " pd.merge(site_df, temp, right_index=True, left_on=\"site_no\")\n", + " )\n", + "\n", + " return final_df" ] }, { @@ -162,10 +169,10 @@ "# Warning these lines will download a large dataset from the web and\n", "# will take few minutes to run.\n", "\n", - "#start = '1970-01-01'\n", - "#states = codes.state_codes\n", - "#final_df = peak_trend_analysis(states=states, start_date=start)\n", - "#final_df.to_csv('datasets/peak_discharge_trends.csv')" + "# start = '1970-01-01'\n", + "# states = codes.state_codes\n", + "# final_df = peak_trend_analysis(states=states, start_date=start)\n", + "# final_df.to_csv('datasets/peak_discharge_trends.csv')" ] }, { @@ -181,7 +188,7 @@ "metadata": {}, "outputs": [], "source": [ - "final_df = pd.read_csv('datasets/peak_discharge_trends.csv')\n", + "final_df = pd.read_csv(\"datasets/peak_discharge_trends.csv\")\n", "final_df.head()" ] }, @@ -206,7 +213,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Currently commented out as there isn't an easy way to install mpl_toolkits \n", + "# Currently commented out as there isn't an easy way to install mpl_toolkits\n", "# on a remote machine without spinning up a full geospatial stack.\n", "\n", "# from mpl_toolkits.basemap import Basemap, cm\n", @@ -227,7 +234,7 @@ "# m.drawmeridians(np.arange(-180.,181.,15.),labels=[False,False,False,True],dashes=[2,2])\n", "\n", "# # add boundaries and rivers\n", - "# m.drawcountries(linewidth=1, linestyle='solid', color='k' ) \n", + "# m.drawcountries(linewidth=1, linestyle='solid', color='k' )\n", "# m.drawstates(linewidth=0.5, linestyle='solid', color='k')\n", "# m.drawrivers(linewidth=0.5, linestyle='solid', color='cornflowerblue')\n", "\n", @@ -238,13 +245,13 @@ "# #x,y = m(lons, lats)\n", "\n", "# # categorical plots get a little ugly in basemap\n", - "# m.scatter(increasing['dec_long_va'].tolist(), \n", - "# increasing['dec_lat_va'].tolist(), \n", + "# m.scatter(increasing['dec_long_va'].tolist(),\n", + "# increasing['dec_lat_va'].tolist(),\n", "# label='increasing', s=2, color='red',\n", "# latlon=True)\n", "\n", - "# m.scatter(decreasing['dec_long_va'].tolist(), \n", - "# decreasing['dec_lat_va'].tolist(), \n", + "# m.scatter(decreasing['dec_long_va'].tolist(),\n", + "# decreasing['dec_lat_va'].tolist(),\n", "# label='increasing', s=2, color='blue',\n", "# latlon=True)" ] diff --git a/demos/R Python Vignette equivalents.ipynb b/demos/R Python Vignette equivalents.ipynb index 12cd52e3..318abb99 100755 --- a/demos/R Python Vignette equivalents.ipynb +++ b/demos/R Python Vignette equivalents.ipynb @@ -13,9 +13,7 @@ "metadata": {}, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from dataretrieval import waterdata\n", - "from dataretrieval import wqp" + "from dataretrieval import nwis, waterdata, wqp" ] }, { @@ -33,7 +31,7 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "library(dataRetrieval)\n", "# Choptank River near Greensboro, MD\n", "siteNumber <- \"01491000\"\n", @@ -50,19 +48,26 @@ " activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", "\n", "pCode <- readNWISpCode(parameterCd)\n", - "'''\n", + "\"\"\"\n", "\n", "# Choptank River near Greensboro, MD\n", - "siteNumber = '01491000'\n", + "siteNumber = \"01491000\"\n", "chop_tank_info, md = nwis.get_info(sites=siteNumber)\n", - "parameterCd = '00060'\n", + "parameterCd = \"00060\"\n", "\n", "# raw daily data\n", - "rawDailyData, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd, start=\"1980-01-01\", end=\"2010-01-01\")\n", + "rawDailyData, md = nwis.get_dv(\n", + " sites=siteNumber, parameterCd=parameterCd, start=\"1980-01-01\", end=\"2010-01-01\"\n", + ")\n", "\n", "# sample data Nitrate:\n", "parameterCd = \"00618\"\n", - "samples_data, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd, activityStartDateLower=\"1980-01-01\", activityStartDateUpper=\"2010-01-01\")\n", + "samples_data, md = waterdata.get_samples(\n", + " monitoringLocationIdentifier=f\"USGS-{siteNumber}\",\n", + " usgsPCode=parameterCd,\n", + " activityStartDateLower=\"1980-01-01\",\n", + " activityStartDateUpper=\"2010-01-01\",\n", + ")\n", "\n", "pCode, md = nwis.get_pmcodes(parameterCd=parameterCd)" ] @@ -73,12 +78,12 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "{r getSite, echo=TRUE, eval=FALSE}\n", "siteNumbers <- c(\"01491000\",\"01645000\")\n", "siteINFO <- readNWISsite(siteNumbers)\n", - "'''\n", - "siteNumbers = [\"01491000\",\"01645000\"]\n", + "\"\"\"\n", + "siteNumbers = [\"01491000\", \"01645000\"]\n", "siteINFO, md = nwis.get_iv(sites=siteNumbers)" ] }, @@ -88,12 +93,12 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "# Continuing from the previous example:\n", "# This pulls out just the daily, mean data:\n", "dailyDataAvailable <- whatNWISdata(siteNumbers,\n", " service=\"dv\", statCd=\"00003\")\n", - "'''\n", + "\"\"\"\n", "\n", "dailyDataAvailable, md = nwis.get_dv(sites=siteNumbers, statCd=\"00003\")" ] @@ -108,11 +113,11 @@ }, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "# Using defaults:\n", "parameterCd <- \"00618\" \n", "parameterINFO <- readNWISpCode(parameterCd)\n", - "'''\n", + "\"\"\"\n", "\n", "pCode, md = nwis.get_pmcodes(parameterCd=\"00618\")" ] @@ -123,7 +128,7 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "# Choptank River near Greensboro, MD:\n", "siteNumber <- \"01491000\"\n", "parameterCd <- \"00060\" # Discharge\n", @@ -132,14 +137,16 @@ "\n", "discharge <- readNWISdv(siteNumber, \n", " parameterCd, startDate, endDate)\n", - "'''\n", + "\"\"\"\n", "# Choptank River near Greensboro, MD:\n", "siteNumber = \"01491000\"\n", "parameterCd = \"00060\" # Discharge\n", - "startDate = \"2009-10-01\" \n", - "endDate = \"2012-09-30\" \n", + "startDate = \"2009-10-01\"\n", + "endDate = \"2012-09-30\"\n", "\n", - "discharge, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd, start=startDate, end=endDate)" + "discharge, md = nwis.get_dv(\n", + " sites=siteNumber, parameterCd=parameterCd, start=startDate, end=endDate\n", + ")" ] }, { @@ -148,7 +155,7 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "siteNumber <- \"01491000\"\n", "parameterCd <- c(\"00010\",\"00060\") # Temperature and discharge\n", "statCd <- c(\"00001\",\"00003\") # Mean and maximum\n", @@ -157,15 +164,20 @@ "\n", "temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, \n", " startDate, endDate, statCd=statCd)\n", - "'''\n", + "\"\"\"\n", "siteNumber = \"01491000\"\n", - "parameterCd = [\"00010\",\"00060\"] # Temperature and discharge\n", - "statCd = [\"00001\",\"00003\"] # Mean and maximum\n", + "parameterCd = [\"00010\", \"00060\"] # Temperature and discharge\n", + "statCd = [\"00001\", \"00003\"] # Mean and maximum\n", "startDate = \"2012-01-01\"\n", "endDate = \"2012-05-01\"\n", "\n", - "temperatureAndFlow, md = nwis.get_dv(sites=siteNumber, parameterCd=parameterCd,\n", - " start=startDate, end=endDate, statCd=statCd)" + "temperatureAndFlow, md = nwis.get_dv(\n", + " sites=siteNumber,\n", + " parameterCd=parameterCd,\n", + " start=startDate,\n", + " end=endDate,\n", + " statCd=statCd,\n", + ")" ] }, { @@ -174,19 +186,20 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "parameterCd <- \"00060\" # Discharge\n", "startDate <- \"2012-05-12\" \n", "endDate <- \"2012-05-13\" \n", "dischargeUnit <- readNWISuv(siteNumber, parameterCd, \n", " startDate, endDate)\n", - "'''\n", + "\"\"\"\n", "siteNumber = \"01491000\"\n", "parameterCd = \"00060\" # Discharge\n", - "startDate = \"2012-05-12\" \n", - "endDate = \"2012-05-13\" \n", - "dischargeUnit, md = nwis.get_iv(sites=siteNumber, parameterCd=parameterCd,\n", - " start=startDate, end=endDate)" + "startDate = \"2012-05-12\"\n", + "endDate = \"2012-05-13\"\n", + "dischargeUnit, md = nwis.get_iv(\n", + " sites=siteNumber, parameterCd=parameterCd, start=startDate, end=endDate\n", + ")" ] }, { @@ -195,20 +208,24 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "# Dissolved Nitrate parameter codes:\n", "parameterCd <- c(\"00618\",\"71851\")\n", "startDate <- \"1985-10-01\"\n", "endDate <- \"2012-09-30\"\n", "dfLong <- read_USGS_samples(monitoringLocationIdentifier=sprintf(\"USGS-%s\", siteNumber), usgsPCode=parameterCd, \n", " activityStartDateLower=startDate, activityStartDateUpper=endDate)\n", - "'''\n", + "\"\"\"\n", "siteNumber = \"01491000\"\n", - "parameterCd = [\"00618\",\"71851\"]\n", + "parameterCd = [\"00618\", \"71851\"]\n", "startDate = \"1985-10-01\"\n", "endDate = \"2012-09-30\"\n", - "dfLong, md = waterdata.get_samples(monitoringLocationIdentifier=f\"USGS-{siteNumber}\", usgsPCode=parameterCd,\n", - " activityStartDateLower=startDate, activityStartDateUpper=endDate)" + "dfLong, md = waterdata.get_samples(\n", + " monitoringLocationIdentifier=f\"USGS-{siteNumber}\",\n", + " usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate,\n", + " activityStartDateUpper=endDate,\n", + ")" ] }, { @@ -217,10 +234,10 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "siteNumber <- \"434400121275801\"\n", "groundWater <- readNWISgwl(siteNumber)\n", - "'''\n", + "\"\"\"\n", "siteNumber = \"434400121275801\"\n", "groundWater, md = nwis.get_gwlevels(sites=siteNumber)" ] @@ -231,11 +248,11 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "siteNumber <- '01594440'\n", "peakData <- readNWISpeak(siteNumber)\n", - "'''\n", - "siteNumber = '01594440'\n", + "\"\"\"\n", + "siteNumber = \"01594440\"\n", "peakData, md = nwis.get_discharge_peaks(sites=siteNumber)" ] }, @@ -245,11 +262,11 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "ratingData <- readNWISrating(siteNumber, \"base\")\n", "attr(ratingData, \"RATING\")\n", - "'''\n", - "ratings_data, md = nwis.get_ratings(site='01594440', file_type=\"base\")" + "\"\"\"\n", + "ratings_data, md = nwis.get_ratings(site=\"01594440\", file_type=\"base\")" ] }, { @@ -258,8 +275,8 @@ "metadata": {}, "outputs": [], "source": [ - "'''surfaceData <- readNWISmeas(siteNumber)'''\n", - "siteNumber = '01594440'\n", + "\"\"\"surfaceData <- readNWISmeas(siteNumber)\"\"\"\n", + "siteNumber = \"01594440\"\n", "surface_data, md = nwis.get_discharge_measurements(sites=siteNumber)" ] }, @@ -269,13 +286,13 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "allegheny <- readNWISuse(stateCd = \"Pennsylvania\",\n", " countyCd = \"Allegheny\")\n", "national <- readNWISuse(stateCd = NULL, \n", " countyCd = NULL, \n", " transform = TRUE)\n", - "'''\n", + "\"\"\"\n", "allegheny, md = nwis.get_water_use(state=\"PA\", counties=\"003\")\n", "\n", "national, md = nwis.get_water_use()" @@ -287,12 +304,14 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "discharge_stats <- readNWISstat(siteNumbers=c(\"02319394\"),\n", " parameterCd=c(\"00060\"),\n", " statReportType=\"annual\") \n", - "'''\n", - "discharge_stats, md = nwis.get_stats(sites='02319394', parameterCd=\"00060\", statReportType='annual', statTypeCd='all')" + "\"\"\"\n", + "discharge_stats, md = nwis.get_stats(\n", + " sites=\"02319394\", parameterCd=\"00060\", statReportType=\"annual\", statTypeCd=\"all\"\n", + ")" ] }, { @@ -347,7 +366,7 @@ "outputs": [], "source": [ "# '''\n", - "# dataPH <- readWQPdata(statecode=\"US:55\", \n", + "# dataPH <- readWQPdata(statecode=\"US:55\",\n", "# characteristicName=\"pH\")\n", "# '''\n", "# dataPH, md = wqp.what_sites(statecode=\"US:55\", characteristicName=\"pH\")" @@ -373,7 +392,7 @@ "metadata": {}, "outputs": [], "source": [ - "'''site <- whatWQPsamples(siteid=\"USGS-01594440\")'''\n", + "\"\"\"site <- whatWQPsamples(siteid=\"USGS-01594440\")\"\"\"\n", "\n", "site, md = wqp.what_sites(siteid=\"USGS-01594440\")" ] @@ -389,12 +408,12 @@ }, "outputs": [], "source": [ - "'''\n", + "\"\"\"\n", "type <- \"Stream\"\n", "sites <- whatWQPmetrics(countycode=\"US:55:025\",siteType=type)\n", - "'''\n", + "\"\"\"\n", "streamType = \"Stream\"\n", - "sites, md = wqp.what_sites(countycode=\"US:55:025\",siteType=streamType)" + "sites, md = wqp.what_sites(countycode=\"US:55:025\", siteType=streamType)" ] }, { diff --git a/demos/WaterData_demo.ipynb b/demos/WaterData_demo.ipynb index 40f5d561..47a10ff8 100644 --- a/demos/WaterData_demo.ipynb +++ b/demos/WaterData_demo.ipynb @@ -107,14 +107,15 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", + "from datetime import date, datetime, timedelta\n", + "\n", "import matplotlib.dates as mdates\n", + "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as mtick\n", - "from IPython.display import display\n", - "from datetime import datetime, timedelta\n", - "from datetime import date\n", + "import pandas as pd\n", "from dateutil.relativedelta import relativedelta\n", + "from IPython.display import display\n", + "\n", "from dataretrieval import waterdata" ] }, @@ -133,7 +134,7 @@ "metadata": {}, "outputs": [], "source": [ - "pcodes,metadata = waterdata.get_reference_table(\"parameter-codes\")\n", + "pcodes, metadata = waterdata.get_reference_table(\"parameter-codes\")\n", "display(pcodes.head())" ] }, @@ -152,8 +153,10 @@ "metadata": {}, "outputs": [], "source": [ - "streamflow_pcodes = pcodes[pcodes['parameter_name'].str.contains('streamflow|discharge', case=False, na=False)]\n", - "display(streamflow_pcodes[['parameter_code', 'parameter_name']])" + "streamflow_pcodes = pcodes[\n", + " pcodes[\"parameter_name\"].str.contains(\"streamflow|discharge\", case=False, na=False)\n", + "]\n", + "display(streamflow_pcodes[[\"parameter_code\", \"parameter_name\"]])" ] }, { @@ -183,7 +186,7 @@ "metadata": {}, "outputs": [], "source": [ - "ten_years_ago =(date.today() - relativedelta(years=10)).strftime(\"%Y-%m-%d\")\n", + "ten_years_ago = (date.today() - relativedelta(years=10)).strftime(\"%Y-%m-%d\")\n", "one_week_ago = (datetime.now() - timedelta(days=7)).date().strftime(\"%Y-%m-%d\")" ] }, @@ -202,12 +205,12 @@ "metadata": {}, "outputs": [], "source": [ - "NE_discharge,_ = waterdata.get_time_series_metadata(\n", + "NE_discharge, _ = waterdata.get_time_series_metadata(\n", " state_name=\"Nebraska\",\n", - " parameter_code='00060',\n", + " parameter_code=\"00060\",\n", " begin=f\"../{ten_years_ago}\",\n", " end=f\"{one_week_ago}/..\",\n", - " skip_geometry=True\n", + " skip_geometry=True,\n", ")" ] }, @@ -219,7 +222,9 @@ "outputs": [], "source": [ "display(NE_discharge.sort_values(\"monitoring_location_id\").head())\n", - "print(f\"There are {len(NE_discharge['monitoring_location_id'].unique())} sites with recent discharge data available in the state of Nebraska\")" + "print(\n", + " f\"There are {len(NE_discharge['monitoring_location_id'].unique())} sites with recent discharge data available in the state of Nebraska\"\n", + ")" ] }, { @@ -248,22 +253,25 @@ "metadata": {}, "outputs": [], "source": [ - "chunk_size=50\n", - "site_list = NE_discharge['monitoring_location_id'].unique().tolist()\n", - "chunks = [site_list[i:i + chunk_size] for i in range(0, len(site_list), chunk_size)]\n", + "chunk_size = 50\n", + "site_list = NE_discharge[\"monitoring_location_id\"].unique().tolist()\n", + "chunks = [site_list[i : i + chunk_size] for i in range(0, len(site_list), chunk_size)]\n", "NE_locations = pd.DataFrame()\n", "for site_group in chunks:\n", - " try:\n", - " chunk_data,_ = waterdata.get_monitoring_locations(\n", - " monitoring_location_id=site_group,\n", - " site_type_code=\"ST\"\n", - " )\n", - " if not chunk_data.empty:\n", - " NE_locations = pd.concat([NE_locations, chunk_data])\n", - " except Exception as e:\n", - " print(f\"Chunk failed: {e}\")\n", + " try:\n", + " chunk_data, _ = waterdata.get_monitoring_locations(\n", + " monitoring_location_id=site_group, site_type_code=\"ST\"\n", + " )\n", + " if not chunk_data.empty:\n", + " NE_locations = pd.concat([NE_locations, chunk_data])\n", + " except Exception as e:\n", + " print(f\"Chunk failed: {e}\")\n", "\n", - "display(NE_locations[[\"monitoring_location_id\", \"monitoring_location_name\", \"hydrologic_unit_code\"]].head())" + "display(\n", + " NE_locations[\n", + " [\"monitoring_location_id\", \"monitoring_location_name\", \"hydrologic_unit_code\"]\n", + " ].head()\n", + ")" ] }, { @@ -281,13 +289,20 @@ "metadata": {}, "outputs": [], "source": [ - "NE_locations,_ = waterdata.get_monitoring_locations(\n", - " state_name=\"Nebraska\",\n", - " site_type_code=\"ST\"\n", - " )\n", + "NE_locations, _ = waterdata.get_monitoring_locations(\n", + " state_name=\"Nebraska\", site_type_code=\"ST\"\n", + ")\n", "\n", - "NE_locations_discharge = NE_locations.loc[NE_locations['monitoring_location_id'].isin(NE_discharge['monitoring_location_id'].unique().tolist())]\n", - "display(NE_locations_discharge[[\"monitoring_location_id\", \"monitoring_location_name\", \"hydrologic_unit_code\"]].head())" + "NE_locations_discharge = NE_locations.loc[\n", + " NE_locations[\"monitoring_location_id\"].isin(\n", + " NE_discharge[\"monitoring_location_id\"].unique().tolist()\n", + " )\n", + "]\n", + "display(\n", + " NE_locations_discharge[\n", + " [\"monitoring_location_id\", \"monitoring_location_name\", \"hydrologic_unit_code\"]\n", + " ].head()\n", + ")" ] }, { @@ -326,10 +341,10 @@ "metadata": {}, "outputs": [], "source": [ - "latest_dv,_ = waterdata.get_latest_daily(\n", - " monitoring_location_id=NE_locations_discharge['monitoring_location_id'].tolist(),\n", + "latest_dv, _ = waterdata.get_latest_daily(\n", + " monitoring_location_id=NE_locations_discharge[\"monitoring_location_id\"].tolist(),\n", " parameter_code=\"00060\",\n", - " statistic_id=\"00003\"\n", + " statistic_id=\"00003\",\n", ")\n", "display(latest_dv.head())" ] @@ -349,8 +364,12 @@ "metadata": {}, "outputs": [], "source": [ - "latest_dv['date'] = latest_dv['time'].astype(str)\n", - "latest_dv[['geometry', 'monitoring_location_id', 'date', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', tiles='CartoDB dark matter', cmap='YlOrRd', scheme=None, legend=True)" + "latest_dv[\"date\"] = latest_dv[\"time\"].astype(str)\n", + "latest_dv[\n", + " [\"geometry\", \"monitoring_location_id\", \"date\", \"value\", \"unit_of_measure\"]\n", + "].set_crs(crs=\"WGS84\").explore(\n", + " column=\"value\", tiles=\"CartoDB dark matter\", cmap=\"YlOrRd\", scheme=None, legend=True\n", + ")" ] }, { @@ -368,13 +387,15 @@ "metadata": {}, "outputs": [], "source": [ - "latest_instantaneous,_ = waterdata.get_latest_continuous(\n", - " monitoring_location_id=NE_locations_discharge['monitoring_location_id'].tolist(),\n", - " parameter_code=\"00060\"\n", + "latest_instantaneous, _ = waterdata.get_latest_continuous(\n", + " monitoring_location_id=NE_locations_discharge[\"monitoring_location_id\"].tolist(),\n", + " parameter_code=\"00060\",\n", ")\n", "\n", - "latest_instantaneous['datetime'] = latest_instantaneous['time'].astype(str)\n", - "latest_instantaneous[['geometry', 'monitoring_location_id', 'datetime', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', cmap='YlOrRd', scheme=None, legend=True)" + "latest_instantaneous[\"datetime\"] = latest_instantaneous[\"time\"].astype(str)\n", + "latest_instantaneous[\n", + " [\"geometry\", \"monitoring_location_id\", \"datetime\", \"value\", \"unit_of_measure\"]\n", + "].set_crs(crs=\"WGS84\").explore(column=\"value\", cmap=\"YlOrRd\", scheme=None, legend=True)" ] }, { @@ -395,15 +416,21 @@ "metadata": {}, "outputs": [], "source": [ - "missouri_river_sites = NE_locations_discharge.loc[NE_locations_discharge['monitoring_location_name'].str.contains(\"Missouri\")]\n", - "display(missouri_river_sites[[\n", - " 'county_name',\n", - " 'site_type',\n", - " 'monitoring_location_id',\n", - " 'monitoring_location_name',\n", - " 'drainage_area',\n", - " 'altitude'\n", - " ]])" + "missouri_river_sites = NE_locations_discharge.loc[\n", + " NE_locations_discharge[\"monitoring_location_name\"].str.contains(\"Missouri\")\n", + "]\n", + "display(\n", + " missouri_river_sites[\n", + " [\n", + " \"county_name\",\n", + " \"site_type\",\n", + " \"monitoring_location_id\",\n", + " \"monitoring_location_name\",\n", + " \"drainage_area\",\n", + " \"altitude\",\n", + " ]\n", + " ]\n", + ")" ] }, { @@ -422,8 +449,8 @@ "outputs": [], "source": [ "one_year_ago = (date.today() - relativedelta(years=1)).strftime(\"%Y-%m-%d\")\n", - "missouri_site_ids = missouri_river_sites['monitoring_location_id'].tolist()\n", - "missouri_site_names = missouri_river_sites['monitoring_location_name'].tolist()" + "missouri_site_ids = missouri_river_sites[\"monitoring_location_id\"].tolist()\n", + "missouri_site_names = missouri_river_sites[\"monitoring_location_name\"].tolist()" ] }, { @@ -433,12 +460,12 @@ "metadata": {}, "outputs": [], "source": [ - "daily_values,_ = waterdata.get_daily(\n", + "daily_values, _ = waterdata.get_daily(\n", " monitoring_location_id=missouri_site_ids,\n", " parameter_code=\"00060\",\n", - " statistic_id=\"00003\", # mean daily value\n", + " statistic_id=\"00003\", # mean daily value\n", " time=f\"{one_year_ago}/..\",\n", - " skip_geometry=True\n", + " skip_geometry=True,\n", ")" ] }, @@ -449,10 +476,10 @@ "metadata": {}, "outputs": [], "source": [ - "instantaneous_values,_ = waterdata.get_continuous(\n", + "instantaneous_values, _ = waterdata.get_continuous(\n", " monitoring_location_id=missouri_site_ids,\n", " parameter_code=\"00060\",\n", - " time=f\"{one_year_ago}T00:00:00Z/..\"\n", + " time=f\"{one_year_ago}T00:00:00Z/..\",\n", ")" ] }, @@ -475,23 +502,30 @@ "axes = axes.ravel()\n", "\n", "# Y-axis formatter (with thousands separators)\n", - "tick_fmt = mtick.StrMethodFormatter('{x:,.0f}')\n", + "tick_fmt = mtick.StrMethodFormatter(\"{x:,.0f}\")\n", "\n", "for ax, site, site_name in zip(axes, missouri_site_ids, missouri_site_names):\n", " # Filter per site & sort by time\n", - " inst = instantaneous_values.loc[instantaneous_values['monitoring_location_id'] == site, [\"time\", \"value\"]].sort_values(\"time\")\n", - " daily = daily_values.loc[daily_values['monitoring_location_id'] == site, [\"time\", \"value\"]].sort_values(\"time\")\n", + " inst = instantaneous_values.loc[\n", + " instantaneous_values[\"monitoring_location_id\"] == site, [\"time\", \"value\"]\n", + " ].sort_values(\"time\")\n", + " daily = daily_values.loc[\n", + " daily_values[\"monitoring_location_id\"] == site, [\"time\", \"value\"]\n", + " ].sort_values(\"time\")\n", "\n", " # Instantaneous (line)\n", " ax.plot(\n", - " inst[\"time\"], inst[\"value\"],\n", - " color=\"#1f77b4\", lw=1.0, label=\"Instantaneous\", zorder=1\n", + " inst[\"time\"],\n", + " inst[\"value\"],\n", + " color=\"#1f77b4\",\n", + " lw=1.0,\n", + " label=\"Instantaneous\",\n", + " zorder=1,\n", " )\n", "\n", " # Daily mean (black dots)\n", " ax.scatter(\n", - " daily[\"time\"], daily[\"value\"],\n", - " c=\"black\", s=2, label=\"Daily mean\", zorder=2\n", + " daily[\"time\"], daily[\"value\"], c=\"black\", s=2, label=\"Daily mean\", zorder=2\n", " )\n", "\n", " # Axes styling\n", @@ -512,8 +546,8 @@ "\n", "handles, labels = axes[-1].get_legend_handles_labels()\n", "fig.legend(handles, labels, loc=\"lower center\", ncol=2, frameon=False)\n", - "fig.suptitle(f\"Missouri River sites - Daily Mean vs Instantaneous Discharge\")\n", - "fig.autofmt_xdate()\n" + "fig.suptitle(\"Missouri River sites - Daily Mean vs Instantaneous Discharge\")\n", + "fig.autofmt_xdate()" ] }, { @@ -532,10 +566,10 @@ "metadata": {}, "outputs": [], "source": [ - "field_measurements,_ = waterdata.get_field_measurements(\n", + "field_measurements, _ = waterdata.get_field_measurements(\n", " monitoring_location_id=missouri_site_ids,\n", " parameter_code=\"00060\",\n", - " time=f\"{one_year_ago}T00:00:00Z/..\"\n", + " time=f\"{one_year_ago}T00:00:00Z/..\",\n", ")\n", "display(field_measurements.head())" ] @@ -557,13 +591,10 @@ "source": [ "for ax, site in zip(axes, missouri_site_ids):\n", " field = field_measurements.loc[\n", - " field_measurements['monitoring_location_id'] == site, [\"time\", \"value\"]\n", + " field_measurements[\"monitoring_location_id\"] == site, [\"time\", \"value\"]\n", " ].sort_values(\"time\")\n", "\n", - " ax.scatter(\n", - " field[\"time\"], field[\"value\"],\n", - " c=\"red\", s=4, label=\"Field\", zorder=3\n", - " )\n", + " ax.scatter(field[\"time\"], field[\"value\"], c=\"red\", s=4, label=\"Field\", zorder=3)\n", "\n", "# Remove any existing figure-level legends\n", "for leg in fig.legends:\n", @@ -573,7 +604,7 @@ "\n", "# Redraw the figure\n", "fig.canvas.draw_idle()\n", - "fig\n" + "fig" ] }, { diff --git a/demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb index 1c3cf92e..f4cee321 100644 --- a/demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_DailyValues_Examples.ipynb @@ -49,8 +49,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -72,15 +73,15 @@ }, { "cell_type": "markdown", - "source": [ - "Example 1: Get daily value data for a specific parameter at a single USGS NWIS monitoring site between a begin and end date." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Example 1: Get daily value data for a specific parameter at a single USGS NWIS monitoring site between a begin and end date." + ] }, { "cell_type": "code", @@ -93,13 +94,15 @@ "outputs": [], "source": [ "# Set the parameters needed to retrieve data\n", - "siteNumber = \"10109000\" # LOGAN RIVER ABOVE STATE DAM, NEAR LOGAN, UT\n", - "parameterCode = \"00060\" # Discharge\n", + "siteNumber = \"10109000\" # LOGAN RIVER ABOVE STATE DAM, NEAR LOGAN, UT\n", + "parameterCode = \"00060\" # Discharge\n", "startDate = \"2020-10-01\"\n", "endDate = \"2021-09-30\"\n", "\n", "# Retrieve the data\n", - "dailyStreamflow = nwis.get_dv(sites=siteNumber, parameterCd=parameterCode, start=startDate, end=endDate) \n", + "dailyStreamflow = nwis.get_dv(\n", + " sites=siteNumber, parameterCd=parameterCode, start=startDate, end=endDate\n", + ")\n", "print(\"Retrieved \" + str(len(dailyStreamflow[0])) + \" data values.\")" ] }, @@ -130,29 +133,29 @@ }, { "cell_type": "markdown", - "source": [ - "Show the data types of the columns in the resulting data frame." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Show the data types of the columns in the resulting data frame." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(dailyStreamflow[0].dtypes)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(dailyStreamflow[0].dtypes)" + ] }, { "cell_type": "markdown", @@ -168,16 +171,16 @@ { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "dailyStreamflow[0].describe()" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "dailyStreamflow[0].describe()" + ] }, { "cell_type": "markdown", @@ -192,19 +195,19 @@ }, { "cell_type": "code", - "source": [ - "ax = dailyStreamflow[0].plot(y='00060_Mean')\n", - "ax.set_xlabel('Date')\n", - "ax.set_ylabel('Streamflow (cfs)')" - ], + "execution_count": null, "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "ax = dailyStreamflow[0].plot(y=\"00060_Mean\")\n", + "ax.set_xlabel(\"Date\")\n", + "ax.set_ylabel(\"Streamflow (cfs)\")" + ] }, { "cell_type": "markdown", @@ -223,7 +226,9 @@ }, "outputs": [], "source": [ - "print(\"The query URL used to retrieve the data from NWIS was: \" + dailyStreamflow[1].url)" + "print(\n", + " \"The query URL used to retrieve the data from NWIS was: \" + dailyStreamflow[1].url\n", + ")" ] }, { @@ -258,9 +263,13 @@ "outputs": [], "source": [ "siteID = \"04085427\"\n", - "dailyQAndT = nwis.get_dv(sites=siteID, parameterCd=[\"00010\", \"00060\"],\n", - " start=startDate, end=endDate,\n", - " statCd=[\"00001\", \"00003\"])\n", + "dailyQAndT = nwis.get_dv(\n", + " sites=siteID,\n", + " parameterCd=[\"00010\", \"00060\"],\n", + " start=startDate,\n", + " end=endDate,\n", + " statCd=[\"00001\", \"00003\"],\n", + ")\n", "display(dailyQAndT[0])" ] }, @@ -281,25 +290,35 @@ }, "outputs": [], "source": [ - "dailyMultiSites = nwis.get_dv(sites=[\"01491000\", \"01645000\"], parameterCd=[\"00010\", \"00060\"],\n", - " start=\"2012-01-01\", end=\"2012-06-30\", statCd=[\"00001\",\"00003\"])\n", + "dailyMultiSites = nwis.get_dv(\n", + " sites=[\"01491000\", \"01645000\"],\n", + " parameterCd=[\"00010\", \"00060\"],\n", + " start=\"2012-01-01\",\n", + " end=\"2012-06-30\",\n", + " statCd=[\"00001\", \"00003\"],\n", + ")\n", "display(dailyMultiSites[0])" ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "dailyMultiSites = nwis.get_dv(sites=[\"01491000\", \"01645000\"], parameterCd=[\"00010\", \"00060\"],\n", - " start=\"2012-01-01\", end=\"2012-06-30\", statCd=[\"00001\",\"00003\"],\n", - " multi_index=False)\n", + "dailyMultiSites = nwis.get_dv(\n", + " sites=[\"01491000\", \"01645000\"],\n", + " parameterCd=[\"00010\", \"00060\"],\n", + " start=\"2012-01-01\",\n", + " end=\"2012-06-30\",\n", + " statCd=[\"00001\", \"00003\"],\n", + " multi_index=False,\n", + ")\n", "display(dailyMultiSites[0])" ] }, @@ -321,7 +340,9 @@ "outputs": [], "source": [ "siteID = \"05212700\"\n", - "notActive = nwis.get_dv(sites=siteID, parameterCd=\"00060\", start=\"2014-01-01\", end=\"2014-01-07\")\n", + "notActive = nwis.get_dv(\n", + " sites=siteID, parameterCd=\"00060\", start=\"2014-01-01\", end=\"2014-01-07\"\n", + ")\n", "display(notActive[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb index 5c31853b..7768317e 100644 --- a/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_GroundwaterLevels_Examples.ipynb @@ -59,8 +59,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -199,7 +200,7 @@ }, "outputs": [], "source": [ - "data[0]['lev_va'].describe()" + "data[0][\"lev_va\"].describe()" ] }, { @@ -225,9 +226,9 @@ }, "outputs": [], "source": [ - "ax = data[0].plot(x = 'lev_dt', y='lev_va')\n", - "ax.set_xlabel('Date')\n", - "ax.set_ylabel('Water Level (feet below land surface)')" + "ax = data[0].plot(x=\"lev_dt\", y=\"lev_va\")\n", + "ax.set_xlabel(\"Date\")\n", + "ax.set_ylabel(\"Water Level (feet below land surface)\")" ] }, { @@ -287,15 +288,15 @@ ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "site_ids = [\"434400121275801\", \"375907091432201\"]\n", "data2 = nwis.get_gwlevels(sites=site_ids, multi_index=False)\n", @@ -389,7 +390,7 @@ "outputs": [], "source": [ "data4 = nwis.get_gwlevels(sites=site_id, start=\"1980-01-01\", end=\"2000-12-31\")\n", - "print(\"Retrieved \" + str(len(data4[0])) + \" data values.\")\n" + "print(\"Retrieved \" + str(len(data4[0])) + \" data values.\")" ] } ], diff --git a/demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb index 19f8c729..62ad7d36 100644 --- a/demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Measurements_Examples.ipynb @@ -16,55 +16,59 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Install the Package\n", "\n", "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "!pip install dataretrieval" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "!pip install dataretrieval" + ] }, { "cell_type": "markdown", - "source": [ - "Load the package so you can use it along with other packages used in this notebook." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Load the package so you can use it along with other packages used in this notebook." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Basic Usage\n", "\n", @@ -75,174 +79,173 @@ "* **sites** (list of strings): A list of USGS site codes to retrieve data for. If the qwdata parameter site_no is supplied, it will overwrite the sites parameter.\n", "* **start** (string): The beginning date of a period for which to retrieve measurements. If the qwdata parameter begin_date is supplied, it will overwrite the start parameter.\n", "* **end** (string): The ending date of a period for which to retrieve measurements. If the qwdata parameter end_date is supplied, it will overwrite the end parameter." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Example 1: Get all of the surface water measurements for a single site" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Example 1: Get all of the surface water measurements for a single site" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "measurements1 = nwis.get_discharge_measurements(sites=\"10109000\")\n", - "print(\"Retrieved \" + str(len(measurements1[0])) + \" data values.\")" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "measurements1 = nwis.get_discharge_measurements(sites=\"10109000\")\n", + "print(\"Retrieved \" + str(len(measurements1[0])) + \" data values.\")" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Interpreting the Result\n", "\n", "The result of calling the `get_discharge_measurements()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the discharge measurements for the time period requested.\n", "\n", "Once you've got the data frame, there's several useful things you can do to explore the data." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Display the data frame as a table" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Display the data frame as a table" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "display(measurements1[0])" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "display(measurements1[0])" + ] }, { "cell_type": "markdown", - "source": [ - "Show the data types of the columns in the resulting data frame." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Show the data types of the columns in the resulting data frame." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(measurements1[0].dtypes)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(measurements1[0].dtypes)" + ] }, { "cell_type": "markdown", - "source": [ - "The other part of the result returned from the `get_discharge_measurements()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "The other part of the result returned from the `get_discharge_measurements()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(\"The query URL used to retrieve the data from NWIS was: \" + measurements1[1].url)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(\"The query URL used to retrieve the data from NWIS was: \" + measurements1[1].url)" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Additional Examples\n", "\n", "Example 2: Get all of the surface water measurements between a start and end date" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "measurements2 = nwis.get_discharge_measurements(sites=\"10109000\", start=\"2019-01-01\", end=\"2019-12-31\")\n", - "print(\"Retrieved \" + str(len(measurements2[0])) + \" data values.\")\n", - "display(measurements2[0])" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "measurements2 = nwis.get_discharge_measurements(\n", + " sites=\"10109000\", start=\"2019-01-01\", end=\"2019-12-31\"\n", + ")\n", + "print(\"Retrieved \" + str(len(measurements2[0])) + \" data values.\")\n", + "display(measurements2[0])" + ] }, { "cell_type": "markdown", - "source": [ - "Example 3: Get all of the surface water measurements for multiple sites" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Example 3: Get all of the surface water measurements for multiple sites" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "measurements3 = nwis.get_discharge_measurements(sites=[\"01594440\", \"040851325\"])\n", - "print(\"Retrieved \" + str(len(measurements3[0])) + \" data values.\")\n", - "display(measurements3[0])" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "measurements3 = nwis.get_discharge_measurements(sites=[\"01594440\", \"040851325\"])\n", + "print(\"Retrieved \" + str(len(measurements3[0])) + \" data values.\")\n", + "display(measurements3[0])" + ] } ], "metadata": { diff --git a/demos/hydroshare/USGS_dataretrieval_NLDI_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_NLDI_Examples.ipynb index c627e452..2237a465 100644 --- a/demos/hydroshare/USGS_dataretrieval_NLDI_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_NLDI_Examples.ipynb @@ -49,9 +49,10 @@ "metadata": {}, "outputs": [], "source": [ - "from dataretrieval import nldi\n", + "import folium\n", "from IPython.display import display\n", - "import folium" + "\n", + "from dataretrieval import nldi" ] }, { @@ -104,8 +105,8 @@ "outputs": [], "source": [ "# set the parameters needed to retrieve data\n", - "feat_source = 'WQP'\n", - "feat_id = 'USGS-01031500'" + "feat_source = \"WQP\"\n", + "feat_id = \"USGS-01031500\"" ] }, { @@ -142,7 +143,9 @@ "metadata": {}, "outputs": [], "source": [ - "basin_json_data = nldi.get_basin(feature_source=feat_source, feature_id=feat_id, as_json=True)\n", + "basin_json_data = nldi.get_basin(\n", + " feature_source=feat_source, feature_id=feat_id, as_json=True\n", + ")\n", "print(basin_json_data)" ] }, @@ -161,22 +164,22 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the feature associated with the monitoring site \n", + "# Get the feature associated with the monitoring site\n", "# More examples of how to use the get_features() function are given below\n", "site_gdf = nldi.get_features(feature_source=feat_source, feature_id=feat_id)\n", "\n", - "# Set the Coordinate Reference System (CRS) for the GeoDataFrames \n", + "# Set the Coordinate Reference System (CRS) for the GeoDataFrames\n", "# containing the basin boundary coordinates and the monitoring site\n", "# epsg='4326' for WGS84\n", - "basin_gdf.set_crs(epsg='4326', inplace=True)\n", - "site_gdf.set_crs(epsg='4326', inplace=True)\n", + "basin_gdf.set_crs(epsg=\"4326\", inplace=True)\n", + "site_gdf.set_crs(epsg=\"4326\", inplace=True)\n", "\n", "# Create a base map using folium\n", "m = folium.Map(location=[site_gdf.geometry.x[0], site_gdf.geometry.y[0]], zoom_start=10)\n", "\n", "# Add the selected monitoring location and basin features to the map\n", - "folium.GeoJson(site_gdf, name='Monitoring Location').add_to(m)\n", - "folium.GeoJson(basin_gdf, name='Basin Boundary', color='red').add_to(m)\n", + "folium.GeoJson(site_gdf, name=\"Monitoring Location\").add_to(m)\n", + "folium.GeoJson(basin_gdf, name=\"Basin Boundary\", color=\"red\").add_to(m)\n", "\n", "# Zoom the map to the bounds of the data\n", "bounds = m.get_bounds()\n", @@ -226,10 +229,12 @@ "metadata": {}, "outputs": [], "source": [ - "feat_source = 'WQP'\n", - "feat_id = 'USGS-01031500'\n", + "feat_source = \"WQP\"\n", + "feat_id = \"USGS-01031500\"\n", "\n", - "flowlines_gdf = nldi.get_flowlines(navigation_mode='UT', feature_source=feat_source, feature_id=feat_id, distance=100)\n", + "flowlines_gdf = nldi.get_flowlines(\n", + " navigation_mode=\"UT\", feature_source=feat_source, feature_id=feat_id, distance=100\n", + ")\n", "display(flowlines_gdf)" ] }, @@ -248,7 +253,9 @@ "metadata": {}, "outputs": [], "source": [ - "flowlines_json_data = nldi.get_flowlines(navigation_mode='UT', feature_source=feat_source, feature_id=feat_id, as_json=True)\n", + "flowlines_json_data = nldi.get_flowlines(\n", + " navigation_mode=\"UT\", feature_source=feat_source, feature_id=feat_id, as_json=True\n", + ")\n", "print(flowlines_json_data)" ] }, @@ -267,23 +274,23 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the feature associated with the monitoring site \n", + "# Get the feature associated with the monitoring site\n", "# More examples of how to use the get_features() function are given below\n", "site_gdf = nldi.get_features(feature_source=feat_source, feature_id=feat_id)\n", "\n", - "# Set the Coordinate Reference System (CRS) for the GeoDataFrames \n", + "# Set the Coordinate Reference System (CRS) for the GeoDataFrames\n", "# containing the basin boundary coordinates and the monitoring site\n", "# epsg='4326' for WGS84\n", - "site_gdf.set_crs(epsg='4326', inplace=True)\n", - "flowlines_gdf.set_crs(epsg='4326', inplace=True)\n", + "site_gdf.set_crs(epsg=\"4326\", inplace=True)\n", + "flowlines_gdf.set_crs(epsg=\"4326\", inplace=True)\n", "\n", "# Create a base map using folium\n", "m = folium.Map(location=[site_gdf.geometry.x[0], site_gdf.geometry.y[0]], zoom_start=10)\n", "\n", "# Add the selected monitoring location and basin features to the map\n", - "folium.GeoJson(site_gdf, name='Monitoring Location').add_to(m)\n", - "folium.GeoJson(basin_gdf, name='Basin Boundary', color='red').add_to(m)\n", - "folium.GeoJson(flowlines_gdf, name='Flowlines', color='blue').add_to(m)\n", + "folium.GeoJson(site_gdf, name=\"Monitoring Location\").add_to(m)\n", + "folium.GeoJson(basin_gdf, name=\"Basin Boundary\", color=\"red\").add_to(m)\n", + "folium.GeoJson(flowlines_gdf, name=\"Flowlines\", color=\"blue\").add_to(m)\n", "\n", "# Zoom the map to the bounds of the data\n", "bounds = m.get_bounds()\n", @@ -312,7 +319,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = nldi.get_flowlines(navigation_mode='UM', comid=13294314)\n", + "gdf = nldi.get_flowlines(navigation_mode=\"UM\", comid=13294314)\n", "display(gdf)" ] }, @@ -331,7 +338,9 @@ "metadata": {}, "outputs": [], "source": [ - "flowlines_json_data = nldi.get_flowlines(navigation_mode='UM', comid=13294314, as_json=True)\n", + "flowlines_json_data = nldi.get_flowlines(\n", + " navigation_mode=\"UM\", comid=13294314, as_json=True\n", + ")\n", "print(flowlines_json_data)" ] }, @@ -386,9 +395,14 @@ "metadata": {}, "outputs": [], "source": [ - "feat_source = 'WQP'\n", - "feat_id = 'USGS-01031500'\n", - "features_gdf = nldi.get_features(data_source='nwissite', navigation_mode='UM', feature_source=feat_source, feature_id=feat_id)\n", + "feat_source = \"WQP\"\n", + "feat_id = \"USGS-01031500\"\n", + "features_gdf = nldi.get_features(\n", + " data_source=\"nwissite\",\n", + " navigation_mode=\"UM\",\n", + " feature_source=feat_source,\n", + " feature_id=feat_id,\n", + ")\n", "display(features_gdf)" ] }, @@ -407,24 +421,24 @@ "metadata": {}, "outputs": [], "source": [ - "# Get the feature associated with the monitoring site \n", + "# Get the feature associated with the monitoring site\n", "# More examples of how to use the get_features() function are given below\n", "site_gdf = nldi.get_features(feature_source=feat_source, feature_id=feat_id)\n", "\n", - "# Set the Coordinate Reference System (CRS) for the GeoDataFrames \n", + "# Set the Coordinate Reference System (CRS) for the GeoDataFrames\n", "# containing the basin boundary coordinates and the monitoring site\n", "# epsg='4326' for WGS84\n", - "site_gdf.set_crs(epsg='4326', inplace=True)\n", - "features_gdf.set_crs(epsg='4326', inplace=True)\n", + "site_gdf.set_crs(epsg=\"4326\", inplace=True)\n", + "features_gdf.set_crs(epsg=\"4326\", inplace=True)\n", "\n", "# Create a base map using folium\n", "m = folium.Map(location=[site_gdf.geometry.x[0], site_gdf.geometry.y[0]], zoom_start=10)\n", "\n", "# Add the selected monitoring location and basin features to the map\n", - "folium.GeoJson(site_gdf, name='Monitoring Location').add_to(m)\n", - "folium.GeoJson(basin_gdf, name='Basin Boundary', color='red').add_to(m)\n", - "folium.GeoJson(flowlines_gdf, name='Flowlines', color='blue').add_to(m)\n", - "folium.GeoJson(features_gdf, name='Features', color='green').add_to(m)\n", + "folium.GeoJson(site_gdf, name=\"Monitoring Location\").add_to(m)\n", + "folium.GeoJson(basin_gdf, name=\"Basin Boundary\", color=\"red\").add_to(m)\n", + "folium.GeoJson(flowlines_gdf, name=\"Flowlines\", color=\"blue\").add_to(m)\n", + "folium.GeoJson(features_gdf, name=\"Features\", color=\"green\").add_to(m)\n", "\n", "# Zoom the map to the bounds of the data\n", "bounds = m.get_bounds()\n", @@ -452,7 +466,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = nldi.get_features(data_source='nwissite', navigation_mode='UM', comid=13294314)\n", + "gdf = nldi.get_features(data_source=\"nwissite\", navigation_mode=\"UM\", comid=13294314)\n", "display(gdf)" ] }, @@ -472,7 +486,7 @@ "metadata": {}, "outputs": [], "source": [ - "gdf = nldi.get_features(feature_source='WQP', feature_id='USGS-01031500')\n", + "gdf = nldi.get_features(feature_source=\"WQP\", feature_id=\"USGS-01031500\")\n", "display(gdf)" ] }, @@ -537,8 +551,8 @@ "outputs": [], "source": [ "# set the parameters needed to retrieve data\n", - "feat_source = 'WQP'\n", - "feat_id = 'USGS-01031500'" + "feat_source = \"WQP\"\n", + "feat_id = \"USGS-01031500\"" ] }, { @@ -568,7 +582,12 @@ "metadata": {}, "outputs": [], "source": [ - "flowlines_data = nldi.search(navigation_mode='UM', feature_source=feat_source, feature_id=feat_id, find='flowlines')\n", + "flowlines_data = nldi.search(\n", + " navigation_mode=\"UM\",\n", + " feature_source=feat_source,\n", + " feature_id=feat_id,\n", + " find=\"flowlines\",\n", + ")\n", "print(flowlines_data)" ] }, @@ -588,8 +607,13 @@ "metadata": {}, "outputs": [], "source": [ - "features_data = nldi.search(data_source='nwissite', navigation_mode='UM', feature_source=feat_source,\n", - " feature_id=feat_id, find='features')\n", + "features_data = nldi.search(\n", + " data_source=\"nwissite\",\n", + " navigation_mode=\"UM\",\n", + " feature_source=feat_source,\n", + " feature_id=feat_id,\n", + " find=\"features\",\n", + ")\n", "print(features_data)" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb index dd98cb95..c688e47c 100644 --- a/demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_ParameterCodes_Examples.ipynb @@ -19,55 +19,59 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Install the Package\n", "\n", "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "!pip install dataretrieval" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "!pip install dataretrieval" + ] }, { "cell_type": "markdown", - "source": [ - "Load the package so you can use it along with other packages used in this notebook." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Load the package so you can use it along with other packages used in this notebook." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Basic Usage\n", "\n", @@ -76,62 +80,61 @@ "Arguments (Additional arguments, if supplied, will be used as query parameters)\n", "\n", "* **parameterCd** (string): A string containing the parameter code for which information is to be retrieved." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Example 1: Retrieve information for a set of USGS NWIS parameter codes." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Example 1: Retrieve information for a set of USGS NWIS parameter codes." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "parameter_codes = nwis.get_pmcodes(['00400'])\n", - "print(\"Retrieved information about \" + str(len(parameter_codes[0])) + \" parameter code.\")" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "parameter_codes = nwis.get_pmcodes([\"00400\"])\n", + "print(\n", + " \"Retrieved information about \" + str(len(parameter_codes[0])) + \" parameter code.\"\n", + ")" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Interpreting the Result\n", "\n", "The result of calling the `get_pmcodes()` function is an object that contains a Pandas data frame object and an associated metadata object. The Pandas data frame contains the parameter code information requested.\n", "\n", "Once you've got the data frame, you can explore the data." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "# Display the data frame as a table\n", - "display(parameter_codes[0])\n" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "# Display the data frame as a table\n", + "display(parameter_codes[0])" + ] } ], "metadata": { diff --git a/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb index 236eff05..543dee44 100644 --- a/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb @@ -60,8 +60,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -100,7 +101,7 @@ }, "outputs": [], "source": [ - "site_ids = ['01594440', '040851325']\n", + "site_ids = [\"01594440\", \"040851325\"]\n", "peak_data = nwis.get_discharge_peaks(site_ids)\n", "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")" ] @@ -184,17 +185,17 @@ ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "site_ids = ['01594440', '040851325']\n", + "site_ids = [\"01594440\", \"040851325\"]\n", "peak_data = nwis.get_discharge_peaks(site_ids, multi_index=False)\n", "print(\"Retrieved \" + str(len(peak_data[0])) + \" data values.\")" ] @@ -252,7 +253,7 @@ }, "outputs": [], "source": [ - "data4 = nwis.get_discharge_peaks(stations, start='1953-01-01', end='1960-01-01')\n", + "data4 = nwis.get_discharge_peaks(stations, start=\"1953-01-01\", end=\"1960-01-01\")\n", "display(data4[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb index 2d67150b..c834b2a2 100644 --- a/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb @@ -2,66 +2,70 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# USGS dataretrieval Python Package `get_ratings()` Examples\n", "\n", "This notebook provides examples of using the Python dataretrieval package to retrieve rating curve data for a United States Geological Survey (USGS) streamflow gage. The dataretrieval package provides a collection of functions to get data from the USGS National Water Information System (NWIS) and other online sources of hydrology and water quality data, including the United States Environmental Protection Agency (USEPA)." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Install the Package\n", "\n", "Use the following code to install the package if it doesn't exist already within your Jupyter Python environment." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "!pip install dataretrieval" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "!pip install dataretrieval" + ] }, { "cell_type": "markdown", - "source": [ - "Load the package so you can use it along with other packages used in this notebook." - ], "metadata": { "collapsed": false - } + }, + "source": [ + "Load the package so you can use it along with other packages used in this notebook." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" + ] }, { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "### Basic Usage\n", "\n", @@ -75,22 +79,19 @@ "* **categories** (Listlike): List or comma delimited string of Two-letter category abbreviations\n", "\n", "NOTE: Not all active USGS streamflow gages have traditional rating curves that relate stage to flow." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "Example 1: Get rating data for an NWIS Site" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Example 1: Get rating data for an NWIS Site" + ] }, { "cell_type": "code", @@ -110,6 +111,12 @@ }, { "cell_type": "markdown", + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Interpreting the Result\n", "\n", @@ -128,107 +135,101 @@ "* INDEP - typically gage height in feet\n", "* CORR - the correction for that value\n", "* CORRINDEP - the corrected value for CORR" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "display(ratingData[0])" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "display(ratingData[0])" + ] }, { "cell_type": "markdown", - "source": [ - "Show the data types of the columns in the resulting data frame" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Show the data types of the columns in the resulting data frame" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(ratingData[0].dtypes)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(ratingData[0].dtypes)" + ] }, { "cell_type": "markdown", - "source": [ - "The other part of the result returned from the `get_ratings()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "The other part of the result returned from the `get_ratings()` function is a metadata object that contains information about the query that was executed to return the data. For example, you can access the URL that was assembled to retrieve the requested data from the USGS web service. The USGS web service responses contain a descriptive header that defines and can be helpful in interpreting the contents of the response." + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "print(\"The query URL used to retrieve the data from NWIS was: \" + ratingData[1].url)" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "print(\"The query URL used to retrieve the data from NWIS was: \" + ratingData[1].url)" + ] }, { "cell_type": "markdown", - "source": [ - "Example 2: Get rating data for a different NWIS site by changing the site_id" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } - } + }, + "source": [ + "Example 2: Get rating data for a different NWIS site by changing the site_id" + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [ - "site_id = '01594440'\n", - "data = nwis.get_ratings(site=site_id, file_type=\"base\")\n", - "print(\"Retrieved \" + str(len(data[0])) + \" data values.\")" - ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } - } + }, + "outputs": [], + "source": [ + "site_id = \"01594440\"\n", + "data = nwis.get_ratings(site=site_id, file_type=\"base\")\n", + "print(\"Retrieved \" + str(len(data[0])) + \" data values.\")" + ] } ], "metadata": { diff --git a/demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb index 51007f9b..9891685f 100644 --- a/demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_SiteInfo_Examples.ipynb @@ -60,8 +60,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -251,7 +252,7 @@ "outputs": [], "source": [ "# Get the site information for a state\n", - "siteINFO_state = nwis.get_info(stateCd='UT')\n", + "siteINFO_state = nwis.get_info(stateCd=\"UT\")\n", "display(siteINFO_state[0])" ] }, @@ -269,10 +270,10 @@ "outputs": [], "source": [ "# Create a list of hucs for which to query sites\n", - "huc_list = ['16010203']\n", + "huc_list = [\"16010203\"]\n", "\n", "# Get the site information - limit to stream sites\n", - "siteINFO_huc = nwis.get_info(huc=huc_list, siteType='ST')\n", + "siteINFO_huc = nwis.get_info(huc=huc_list, siteType=\"ST\")\n", "display(siteINFO_huc[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb index d8a2ceb1..35ce1e97 100644 --- a/demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_SiteInventory_Examples.ipynb @@ -60,8 +60,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -177,7 +178,7 @@ }, "outputs": [], "source": [ - "print('The query URL used to retrieve the data from NWIS was: ' + siteListPhos[1].url)" + "print(\"The query URL used to retrieve the data from NWIS was: \" + siteListPhos[1].url)" ] }, { @@ -203,7 +204,7 @@ }, "outputs": [], "source": [ - "oneSite = nwis.what_sites(sites='05114000')\n", + "oneSite = nwis.what_sites(sites=\"05114000\")\n", "display(oneSite[0])" ] }, @@ -220,7 +221,7 @@ "metadata": {}, "outputs": [], "source": [ - "oneSite = nwis.what_sites(sites='05114000', siteOutput='expanded')\n", + "oneSite = nwis.what_sites(sites=\"05114000\", siteOutput=\"expanded\")\n", "display(oneSite[0])" ] }, @@ -249,7 +250,9 @@ }, "outputs": [], "source": [ - "UTsites = nwis.what_sites(stateCd='UT', outputDataTypeCd='dv', startDT='1971-07-01', endDT='2021-07-28')\n", + "UTsites = nwis.what_sites(\n", + " stateCd=\"UT\", outputDataTypeCd=\"dv\", startDT=\"1971-07-01\", endDT=\"2021-07-28\"\n", + ")\n", "display(UTsites[0])" ] }, @@ -268,7 +271,7 @@ "metadata": {}, "outputs": [], "source": [ - "oneSite = nwis.what_sites(sites='05114000', seriesCatalogOutput='true')\n", + "oneSite = nwis.what_sites(sites=\"05114000\", seriesCatalogOutput=\"true\")\n", "display(oneSite[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb index f67f1510..fa9d9056 100644 --- a/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb @@ -60,9 +60,10 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", "from IPython.display import display\n", - "from matplotlib import ticker" + "from matplotlib import ticker\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -104,10 +105,12 @@ "source": [ "# Set the parameters needed to retrieve data\n", "siteNumber = \"02319394\"\n", - "parameterCode = \"00060\" # Discharge\n", + "parameterCode = \"00060\" # Discharge\n", "\n", "# Retrieve the statistics\n", - "x1 = nwis.get_stats(sites=siteNumber, parameterCd=parameterCode, statReportType=\"annual\")\n", + "x1 = nwis.get_stats(\n", + " sites=siteNumber, parameterCd=parameterCode, statReportType=\"annual\"\n", + ")\n", "print(\"Retrieved \" + str(len(x1[0])) + \" data values.\")" ] }, @@ -185,10 +188,10 @@ }, "outputs": [], "source": [ - "ax = x1[0].plot(x='year_nu', y='mean_va')\n", - "ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))\n", - "ax.set_xlabel('Year')\n", - "ax.set_ylabel('Annual mean discharge (cfs)')" + "ax = x1[0].plot(x=\"year_nu\", y=\"mean_va\")\n", + "ax.xaxis.set_major_formatter(ticker.FormatStrFormatter(\"%d\"))\n", + "ax.set_xlabel(\"Year\")\n", + "ax.set_ylabel(\"Annual mean discharge (cfs)\")" ] }, { @@ -249,8 +252,11 @@ }, "outputs": [], "source": [ - "x2 = nwis.get_stats(sites=[\"02319394\", \"02171500\"], parameterCd=[\"00010\", \"00060\"],\n", - " statReportType=\"annual\")\n", + "x2 = nwis.get_stats(\n", + " sites=[\"02319394\", \"02171500\"],\n", + " parameterCd=[\"00010\", \"00060\"],\n", + " statReportType=\"annual\",\n", + ")\n", "display(x2[0])" ] }, @@ -281,9 +287,14 @@ }, "outputs": [], "source": [ - "x3 = nwis.get_stats(sites=\"02171500\", parameterCd=[\"00010\", \"00060\"],\n", - " statReportType=\"daily\", statTypeCd=[\"mean\", \"median\"],\n", - " startDt=\"2000\", endDt=\"2007\")\n", + "x3 = nwis.get_stats(\n", + " sites=\"02171500\",\n", + " parameterCd=[\"00010\", \"00060\"],\n", + " statReportType=\"daily\",\n", + " statTypeCd=[\"mean\", \"median\"],\n", + " startDt=\"2000\",\n", + " endDt=\"2007\",\n", + ")\n", "display(x3[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb index c24fc587..b7f39f62 100644 --- a/demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_UnitValues_Examples.ipynb @@ -60,9 +60,11 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", + "from datetime import date\n", + "\n", "from IPython.display import display\n", - "from datetime import date" + "\n", + "from dataretrieval import nwis" ] }, { @@ -101,14 +103,16 @@ "outputs": [], "source": [ "# Set the parameters needed for the web service call\n", - "siteID = '10109000' # LOGAN RIVER ABOVE STATE DAM, NEAR LOGAN, UT\n", - "parameterCode = '00060' # Discharge\n", - "startDate = '2021-09-01'\n", - "endDate = '2021-09-30'\n", + "siteID = \"10109000\" # LOGAN RIVER ABOVE STATE DAM, NEAR LOGAN, UT\n", + "parameterCode = \"00060\" # Discharge\n", + "startDate = \"2021-09-01\"\n", + "endDate = \"2021-09-30\"\n", "\n", "# Get the data\n", - "discharge = nwis.get_iv(sites=siteID, parameterCd=parameterCode, start=startDate, end=endDate)\n", - "print('Retrieved ' + str(len(discharge[0])) + ' data values.')" + "discharge = nwis.get_iv(\n", + " sites=siteID, parameterCd=parameterCode, start=startDate, end=endDate\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge[0])) + \" data values.\")" ] }, { @@ -209,9 +213,9 @@ }, "outputs": [], "source": [ - "ax = discharge[0].plot(y='00060')\n", - "ax.set_xlabel('Date')\n", - "ax.set_ylabel('Streamflow (cfs)')" + "ax = discharge[0].plot(y=\"00060\")\n", + "ax.set_xlabel(\"Date\")\n", + "ax.set_ylabel(\"Streamflow (cfs)\")" ] }, { @@ -235,7 +239,7 @@ }, "outputs": [], "source": [ - "print('The query URL used to retrieve the data from NWIS was: ' + discharge[1].url)" + "print(\"The query URL used to retrieve the data from NWIS was: \" + discharge[1].url)" ] }, { @@ -263,12 +267,14 @@ }, "outputs": [], "source": [ - "site_id = '05114000'\n", - "startDate = '2014-10-10'\n", - "endDate = '2014-10-10'\n", + "site_id = \"05114000\"\n", + "startDate = \"2014-10-10\"\n", + "endDate = \"2014-10-10\"\n", "\n", - "discharge2 = nwis.get_iv(sites=site_id, parameterCd=parameterCode, start=startDate, end=endDate)\n", - "print('Retrieved ' + str(len(discharge2[0])) + ' data values.')\n", + "discharge2 = nwis.get_iv(\n", + " sites=site_id, parameterCd=parameterCode, start=startDate, end=endDate\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge2[0])) + \" data values.\")\n", "display(discharge2[0])" ] }, @@ -298,8 +304,10 @@ "outputs": [], "source": [ "today = str(date.today())\n", - "discharge_today = nwis.get_iv(sites=site_id, parameterCd=parameterCode, start=today, end=today)\n", - "print('Retrieved ' + str(len(discharge_today[0])) + ' data values.')\n", + "discharge_today = nwis.get_iv(\n", + " sites=site_id, parameterCd=parameterCode, start=today, end=today\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge_today[0])) + \" data values.\")\n", "display(discharge_today[0])" ] }, @@ -330,9 +338,13 @@ }, "outputs": [], "source": [ - "discharge_UTC = nwis.get_iv(sites=site_id, parameterCd=parameterCode,\n", - " start='2014-10-10T00:00Z', end='2014-10-10T23:59Z')\n", - "print('Retrieved ' + str(len(discharge_UTC[0])) + ' data values.')\n", + "discharge_UTC = nwis.get_iv(\n", + " sites=site_id,\n", + " parameterCd=parameterCode,\n", + " start=\"2014-10-10T00:00Z\",\n", + " end=\"2014-10-10T23:59Z\",\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge_UTC[0])) + \" data values.\")\n", "display(discharge_UTC[0])" ] }, @@ -361,26 +373,35 @@ }, "outputs": [], "source": [ - "discharge_multisite = nwis.get_iv(sites=['04024430', '04024000'], parameterCd=parameterCode,\n", - " start='2013-10-01', end='2013-10-01')\n", - "print('Retrieved ' + str(len(discharge_multisite[0])) + ' data values.')\n", + "discharge_multisite = nwis.get_iv(\n", + " sites=[\"04024430\", \"04024000\"],\n", + " parameterCd=parameterCode,\n", + " start=\"2013-10-01\",\n", + " end=\"2013-10-01\",\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge_multisite[0])) + \" data values.\")\n", "display(discharge_multisite[0])" ] }, { - "metadata": {}, "cell_type": "markdown", + "metadata": {}, "source": "The following example is the same as the previous example but with multi index turned off (multi_index=False)" }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "discharge_multisite = nwis.get_iv(sites=['04024430', '04024000'], parameterCd=parameterCode,\n", - " start='2013-10-01', end='2013-10-01', multi_index=False)\n", - "print('Retrieved ' + str(len(discharge_multisite[0])) + ' data values.')\n", + "discharge_multisite = nwis.get_iv(\n", + " sites=[\"04024430\", \"04024000\"],\n", + " parameterCd=parameterCode,\n", + " start=\"2013-10-01\",\n", + " end=\"2013-10-01\",\n", + " multi_index=False,\n", + ")\n", + "print(\"Retrieved \" + str(len(discharge_multisite[0])) + \" data values.\")\n", "display(discharge_multisite[0])" ] } diff --git a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb index 55ccc084..a79ad774 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterSamples_Examples.ipynb @@ -60,8 +60,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import waterdata\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import waterdata" ] }, { @@ -206,9 +207,9 @@ }, "outputs": [], "source": [ - "siteID = 'USGS-10109000'\n", + "siteID = \"USGS-10109000\"\n", "wq_data = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", - "print('Retrieved data for ' + str(len(wq_data[0])) + ' samples.')" + "print(\"Retrieved data for \" + str(len(wq_data[0])) + \" samples.\")" ] }, { @@ -295,7 +296,9 @@ }, "outputs": [], "source": [ - "print('The query URL used to retrieve the data from USGS Samples was: ' + wq_data[1].url)" + "print(\n", + " \"The query URL used to retrieve the data from USGS Samples was: \" + wq_data[1].url\n", + ")" ] }, { @@ -321,10 +324,12 @@ }, "outputs": [], "source": [ - "site_ids = ['USGS-04024430', 'USGS-04024000']\n", - "parameter_code = '00065'\n", - "wq_multi_site = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", + "site_ids = [\"USGS-04024430\", \"USGS-04024000\"]\n", + "parameter_code = \"00065\"\n", + "wq_multi_site = waterdata.get_samples(\n", + " monitoringLocationIdentifier=site_ids, usgsPCode=parameter_code\n", + ")\n", + "print(\"Retrieved data for \" + str(len(wq_multi_site[0])) + \" samples.\")\n", "display(wq_multi_site[0])" ] }, @@ -353,13 +358,16 @@ }, "outputs": [], "source": [ - "site_ids = ['USGS-04024430', 'USGS-04024000']\n", - "parameterCd = ['34247', '30234', '32104', '34220']\n", - "startDate = '2012-01-01'\n", - "wq_data2 = waterdata.get_samples(monitoringLocationIdentifier=site_ids, usgsPCode=parameterCd,\n", - " activityStartDateLower=startDate)\n", - "print('Retrieved data for ' + str(len(wq_multi_site[0])) + ' samples.')\n", - "display(wq_data2[0])\n" + "site_ids = [\"USGS-04024430\", \"USGS-04024000\"]\n", + "parameterCd = [\"34247\", \"30234\", \"32104\", \"34220\"]\n", + "startDate = \"2012-01-01\"\n", + "wq_data2 = waterdata.get_samples(\n", + " monitoringLocationIdentifier=site_ids,\n", + " usgsPCode=parameterCd,\n", + " activityStartDateLower=startDate,\n", + ")\n", + "print(\"Retrieved data for \" + str(len(wq_multi_site[0])) + \" samples.\")\n", + "display(wq_data2[0])" ] }, { @@ -377,13 +385,20 @@ "metadata": {}, "outputs": [], "source": [ - "siteID = 'USGS-10109000'\n", - "wq_data,_ = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", - "print('Retrieved data for ' + str(len(wq_data)) + ' sample results.')\n", + "siteID = \"USGS-10109000\"\n", + "wq_data, _ = waterdata.get_samples(monitoringLocationIdentifier=siteID)\n", + "print(\"Retrieved data for \" + str(len(wq_data)) + \" sample results.\")\n", "\n", - "wq_data[\"characteristic_unit\"] = wq_data[\"Result_Characteristic\"] + \", \" + wq_data[\"Result_MeasureUnit\"]\n", - "wq_data_wide = wq_data.pivot_table(index=['Location_Identifier', 'Activity_StartDate', 'Activity_StartTime'], columns=\"characteristic_unit\", values=\"Result_Measure\", aggfunc='first')\n", - "display(wq_data_wide)\n" + "wq_data[\"characteristic_unit\"] = (\n", + " wq_data[\"Result_Characteristic\"] + \", \" + wq_data[\"Result_MeasureUnit\"]\n", + ")\n", + "wq_data_wide = wq_data.pivot_table(\n", + " index=[\"Location_Identifier\", \"Activity_StartDate\", \"Activity_StartTime\"],\n", + " columns=\"characteristic_unit\",\n", + " values=\"Result_Measure\",\n", + " aggfunc=\"first\",\n", + ")\n", + "display(wq_data_wide)" ] } ], diff --git a/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb index 2b37816e..51064044 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb @@ -60,8 +60,9 @@ }, "outputs": [], "source": [ - "from dataretrieval import nwis\n", - "from IPython.display import display" + "from IPython.display import display\n", + "\n", + "from dataretrieval import nwis" ] }, { @@ -105,8 +106,8 @@ }, "outputs": [], "source": [ - "pennsylvania = nwis.get_water_use(state='PA')\n", - "print('Retrieved ' + str(len(pennsylvania[0])) + ' water use records.')" + "pennsylvania = nwis.get_water_use(state=\"PA\")\n", + "print(\"Retrieved \" + str(len(pennsylvania[0])) + \" water use records.\")" ] }, { @@ -199,8 +200,8 @@ }, "outputs": [], "source": [ - "ohio = nwis.get_water_use(years=[2000, 2005, 2010], state='OH')\n", - "print('Retrieved ' + str(len(ohio[0])) + ' water use records.')\n", + "ohio = nwis.get_water_use(years=[2000, 2005, 2010], state=\"OH\")\n", + "print(\"Retrieved \" + str(len(ohio[0])) + \" water use records.\")\n", "display(ohio[0])" ] }, @@ -230,9 +231,9 @@ "outputs": [], "source": [ "# Get water use data for livestock (LI) and irrigation (IT)\n", - "kansas = nwis.get_water_use(state='KS', categories=['IT', 'LI'])\n", - "print('Retrieved ' + str(len(kansas[0])) + ' water use records.')\n", - "display(kansas[0])\n" + "kansas = nwis.get_water_use(state=\"KS\", categories=[\"IT\", \"LI\"])\n", + "print(\"Retrieved \" + str(len(kansas[0])) + \" water use records.\")\n", + "display(kansas[0])" ] } ], diff --git a/demos/nwqn_data_pull/retrieve_nwqn_samples.py b/demos/nwqn_data_pull/retrieve_nwqn_samples.py index ceb8ff5c..13a45456 100644 --- a/demos/nwqn_data_pull/retrieve_nwqn_samples.py +++ b/demos/nwqn_data_pull/retrieve_nwqn_samples.py @@ -1,15 +1,16 @@ # Retrieve data from the National Water Quality Assessment Program (NAWQA) -import lithops import math import os -import pandas as pd - from random import randint from time import sleep + +import lithops +import pandas as pd + from dataretrieval import nldi, nwis, wqp -DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET') +DESTINATION_BUCKET = os.environ.get("DESTINATION_BUCKET") PROJECT = "National Water Quality Assessment Program (NAWQA)" # some sites are not found in NLDI, avoid them for now NOT_FOUND_SITES = [ @@ -38,17 +39,20 @@ def map_retrieval(site): # reformat for wqp site_list = [f"USGS-{site}" for site in site_list] - df, _ = wqp_get_results(siteid=site_list, - project=PROJECT, - ) + df, _ = wqp_get_results( + siteid=site_list, + project=PROJECT, + ) try: # merge sites - df['MonitoringLocationIdentifier'] = f"USGS-{site}" - df.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-samples.parquet', - engine='pyarrow', - partition_cols=['MonitoringLocationIdentifier'], - compression='zstd') + df["MonitoringLocationIdentifier"] = f"USGS-{site}" + df.astype(str).to_parquet( + f"s3://{DESTINATION_BUCKET}/nwqn-samples.parquet", + engine="pyarrow", + partition_cols=["MonitoringLocationIdentifier"], + compression="zstd", + ) # optionally, `return df` for further processing except Exception as e: @@ -57,6 +61,7 @@ def map_retrieval(site): def exponential_backoff(max_retries=5, base_delay=1): """Exponential backoff decorator with configurable retries and base delay""" + def decorator(func): def wrapper(*args, **kwargs): attempts = 0 @@ -67,10 +72,12 @@ def wrapper(*args, **kwargs): attempts += 1 if attempts > max_retries: raise e - wait_time = base_delay * (2 ** attempts) + wait_time = base_delay * (2**attempts) print(f"Retrying in {wait_time} seconds...") sleep(wait_time) + return wrapper + return decorator @@ -116,17 +123,17 @@ def find_neighboring_sites(site, search_factor=0.1, fudge_factor=3.0): navigation_mode=mode, distance=search_distance, data_source="nwissite", - ) + ) for mode in ["UM", "DM"] # upstream and downstream ] features = pd.concat(gdfs, ignore_index=True) - df, _ = nwis_get_info(sites=list(features.identifier.str.strip('USGS-'))) + df, _ = nwis_get_info(sites=list(features.identifier.str.removeprefix("USGS-"))) # drop sites with disimilar different drainage areas df = df.where( (df["drain_area_va"] / drain_area_sq_mi) > search_factor, - ).dropna(how="all") + ).dropna(how="all") site_list = df["site_no"].to_list() @@ -160,13 +167,13 @@ def _estimate_watershed_length_km(drain_area_sq_mi): project = "National Water Quality Assessment Program (NAWQA)" site_df = pd.read_csv( - 'NWQN_sites.csv', - comment='#', - dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str}, - ) + "NWQN_sites.csv", + comment="#", + dtype={"SITE_QW_ID": str, "SITE_FLOW_ID": str}, + ) - site_list = site_df['SITE_QW_ID'].to_list() - #site_list = site_list[:2] # prune for testing + site_list = site_df["SITE_QW_ID"].to_list() + # site_list = site_list[:2] # prune for testing fexec = lithops.FunctionExecutor(config_file="lithops.yaml") futures = fexec.map(map_retrieval, site_list) diff --git a/demos/nwqn_data_pull/retrieve_nwqn_streamflow.py b/demos/nwqn_data_pull/retrieve_nwqn_streamflow.py index 30eed0e0..fc6a5cfb 100644 --- a/demos/nwqn_data_pull/retrieve_nwqn_streamflow.py +++ b/demos/nwqn_data_pull/retrieve_nwqn_streamflow.py @@ -1,21 +1,21 @@ # Retrieve data from the National Water Quality Assessment Program (NAWQA) -import lithops import os +from random import randint +from time import sleep + +import lithops import numpy as np import pandas as pd - +from retrieve_nwqn_samples import BAD_NLDI_SITES, find_neighboring_sites from dataretrieval import nwis -from random import randint -from time import sleep -from retrieve_nwqn_samples import find_neighboring_sites, BAD_NLDI_SITES - -DESTINATION_BUCKET = os.environ.get('DESTINATION_BUCKET') +DESTINATION_BUCKET = os.environ.get("DESTINATION_BUCKET") START_DATE = "1991-01-01" END_DATE = "2023-12-31" + def map_retrieval(site): """Map function to pull data from NWIS and WQP""" print(f"Retrieving daily streamflow from site {site}") @@ -48,7 +48,7 @@ def map_retrieval(site): # fill missing codes to enable string operations df["00060_Mean_cd"] = df["00060_Mean_cd"].fillna("M") df = df[df["00060_Mean_cd"].str.contains("A")] - df['00060_Mean'] = df['00060_Mean'].replace(-999999, np.nan) + df["00060_Mean"] = df["00060_Mean"].replace(-999999, np.nan) site_info, _ = nwis.get_info(sites=site_list) # USACE sites may have same site_no, which creates index conflicts later @@ -60,12 +60,16 @@ def map_retrieval(site): # compute fraction of drainage area site_info = site_info[["drain_area_va"]].copy() - site_info["drain_fraction"] = site_info["drain_area_va"] / main_site_drainage_area + site_info["drain_fraction"] = ( + site_info["drain_area_va"] / main_site_drainage_area + ) site_info["fraction_diff"] = np.abs(1 - site_info["drain_fraction"]) # apply drainage area fraction df = pd.merge(df, site_info, left_index=True, right_index=True) - df["00060_Mean"] *= site_info.loc[df.index.get_level_values("site_no"), "drain_fraction"].values + df["00060_Mean"] *= site_info.loc[ + df.index.get_level_values("site_no"), "drain_fraction" + ].values # order sites by the difference in drainage area fraction fill_order = site_info.sort_values("fraction_diff", ascending=True) @@ -82,7 +86,9 @@ def map_retrieval(site): fill_data = df.loc[fill_site] output = update_dataframe(output, fill_data) - output = output.drop(columns=["drain_area_va", "drain_fraction", "fraction_diff"]) + output = output.drop( + columns=["drain_area_va", "drain_fraction", "fraction_diff"] + ) output["site_no"] = site else: @@ -91,10 +97,12 @@ def map_retrieval(site): try: # merge sites - output.astype(str).to_parquet(f's3://{DESTINATION_BUCKET}/nwqn-streamflow.parquet', - engine='pyarrow', - partition_cols=['site_no'], - compression='zstd') + output.astype(str).to_parquet( + f"s3://{DESTINATION_BUCKET}/nwqn-streamflow.parquet", + engine="pyarrow", + partition_cols=["site_no"], + compression="zstd", + ) # optionally, `return df` for further processing except Exception as e: @@ -102,9 +110,9 @@ def map_retrieval(site): def update_dataframe( - original_df: pd.DataFrame, - new_df: pd.DataFrame, - overwrite: bool = False, + original_df: pd.DataFrame, + new_df: pd.DataFrame, + overwrite: bool = False, ) -> pd.DataFrame: """Update a DataFrame with values from another DataFrame. @@ -123,12 +131,12 @@ def update_dataframe( project = "National Water Quality Assessment Program (NAWQA)" site_df = pd.read_csv( - 'NWQN_sites.csv', - comment='#', - dtype={'SITE_QW_ID': str, 'SITE_FLOW_ID': str}, - ) + "NWQN_sites.csv", + comment="#", + dtype={"SITE_QW_ID": str, "SITE_FLOW_ID": str}, + ) - site_list = site_df['SITE_QW_ID'].to_list() + site_list = site_df["SITE_QW_ID"].to_list() # site_list = site_list[:4] # prune for testing fexec = lithops.FunctionExecutor(config_file="lithops.yaml") diff --git a/docs/source/conf.py b/docs/source/conf.py index 40bab739..276bbd98 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,38 +9,38 @@ from importlib.metadata import version # path to repository head -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath("../..")) # Project Information -project = 'dataretrieval' +project = "dataretrieval" release = version(project) -version = '.'.join(release.split('.')[:2]) -author = 'Hodson et al' +version = ".".join(release.split(".")[:2]) +author = "Hodson et al" # -- General configuration ------------------------------------------------ # Add any Sphinx extension module names here, as strings. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.autosummary', - 'sphinx.ext.napoleon', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'nbsphinx', - 'nbsphinx_link', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", + "sphinx.ext.githubpages", + "nbsphinx", + "nbsphinx_link", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # suffix of source documents -source_suffix = '.rst' +source_suffix = ".rst" # The main toctree document. -main_doc = 'index' +main_doc = "index" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -51,15 +51,15 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'en' +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'default' +pygments_style = "default" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -81,48 +81,48 @@ autosummary_generate = True automodapi_inheritance_diagram = False autodoc_default_options = { - 'members': True, - 'inherited-members': False, - 'private-members': True, + "members": True, + "inherited-members": False, + "private-members": True, } # doctest -doctest_global_setup = ''' +doctest_global_setup = """ import dataretrieval import numpy as np import pandas as pd import matplotlib -''' +""" # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - 'logo_only': False, - 'display_version': True, + "logo_only": False, + "display_version": True, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # -- Options for linkcheck ------------------------------------------- # Links to not "check" because they are problematic for the link checker linkcheck_ignore = [ - r'https://streamstats.usgs.gov/streamstatsservices/#/', - r'https://www.waterqualitydata.us/public_srsnames/', - r'https://waterqualitydata.us', - r'https://github.com/USGS-python/dataretrieval/tree/main/demos/hydroshare', + r"https://streamstats.usgs.gov/streamstatsservices/#/", + r"https://www.waterqualitydata.us/public_srsnames/", + r"https://waterqualitydata.us", + r"https://github.com/USGS-python/dataretrieval/tree/main/demos/hydroshare", ] # Some notebooks have warnings, which nbsphinx should ignore diff --git a/pyproject.toml b/pyproject.toml index 7ff69af4..3f26ff48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,17 +47,12 @@ doc = [ "ipython", "ipykernel", "matplotlib", - "folium>=0.12", - "mapclassify" + "folium>=0.12" ] nldi = [ 'geopandas>=0.10' ] -waterdata = [ - 'geopandas>=0.10', -] - [project.urls] homepage = "https://github.com/DOI-USGS/dataretrieval-python" documentation = "https://doi-usgs.github.io/dataretrieval-python/" @@ -86,3 +81,6 @@ extend-select = [ "E231", "E252", "E261", "E262", "E303", "E501", ] +[tool.ruff.lint.per-file-ignores] +"demos/*.ipynb" = ["E501", "W291"] + diff --git a/setup.py b/setup.py index fc1f76c8..60684932 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,3 @@ from setuptools import setup -setup() \ No newline at end of file +setup() diff --git a/tests/nldi_test.py b/tests/nldi_test.py index 9993a899..462a5755 100644 --- a/tests/nldi_test.py +++ b/tests/nldi_test.py @@ -94,7 +94,9 @@ def test_features_by_feature_source_with_navigation(requests_mock): request_url = ( f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50" ) - response_file_path = "tests/data/nldi_get_features_by_feature_source_with_nav_mode.json" + response_file_path = ( + "tests/data/nldi_get_features_by_feature_source_with_nav_mode.json" + ) mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) @@ -207,7 +209,9 @@ def test_search_for_features_by_feature_source_with_navigation(requests_mock): request_url = ( f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50" ) - response_file_path = "tests/data/nldi_get_features_by_feature_source_with_nav_mode.json" + response_file_path = ( + "tests/data/nldi_get_features_by_feature_source_with_nav_mode.json" + ) mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) diff --git a/tests/nwis_test.py b/tests/nwis_test.py index 375284d4..fe6ff537 100644 --- a/tests/nwis_test.py +++ b/tests/nwis_test.py @@ -7,12 +7,12 @@ from dataretrieval.nwis import ( NWIS_Metadata, + get_gwlevels, get_info, get_iv, get_record, preformat_peaks_response, what_sites, - get_gwlevels ) START_DATE = "2018-01-24" @@ -101,7 +101,7 @@ def test_inc_date_01(): with pytest.warns(UserWarning): df = get_record(site, "1980-01-01", "1990-01-01", service="gwlevels") # assert that there are indeed incomplete dates - assert any(pd.isna(df.index) == True) + assert pd.isna(df.index).any() # assert that the datetime index is there assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there @@ -121,7 +121,7 @@ def test_inc_date_02(): with pytest.warns(UserWarning): df = get_record(site, "1900-01-01", "2013-01-01", service="gwlevels") # assert that there are indeed incomplete dates - assert any(pd.isna(df.index) == True) + assert pd.isna(df.index).any() # assert that the datetime index is there assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there @@ -141,7 +141,7 @@ def test_inc_date_03(): with pytest.warns(UserWarning): df = get_record(site, "1975-01-01", "2000-01-01", service="gwlevels") # assert that there are indeed incomplete dates - assert any(pd.isna(df.index) == True) + assert pd.isna(df.index).any() # assert that the datetime index is there assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there @@ -298,24 +298,27 @@ def test_set_metadata_info_countyCd(self): # assert that site_info is implemented assert md.site_info + class Testgwlevels: """Tests of get_gwlevels function Notes ----- - gwlevels moved to a new web service endpoint in 2024 - - The new endpoint has quirks and doesn't recognize the + - The new endpoint has quirks and doesn't recognize the parameterCd kwarg advertisted by the service. """ + def test_gwlevels_one_parameterCd(self): pcode = "72019" - df,_ = get_gwlevels(sites="434400121275801", start = "2010-01-01", parameterCd=pcode) - assert set(df['parameter_cd'].unique().tolist()) == set([pcode]) + df, _ = get_gwlevels( + sites="434400121275801", start="2010-01-01", parameterCd=pcode + ) + assert set(df["parameter_cd"].unique().tolist()) == set([pcode]) def test_gwlevels_two_parameterCds(self): pcode = ["72019", "62610"] - df,_ = get_gwlevels(sites="434400121275801", start = "2010-01-01", parameterCd=pcode) - assert set(df['parameter_cd'].unique().tolist()) == set(pcode) - - - + df, _ = get_gwlevels( + sites="434400121275801", start="2010-01-01", parameterCd=pcode + ) + assert set(df["parameter_cd"].unique().tolist()) == set(pcode) diff --git a/tests/utils_test.py b/tests/utils_test.py index 711e5886..4a946035 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -4,10 +4,7 @@ import pytest -from dataretrieval import ( - utils, - nwis -) +from dataretrieval import nwis, utils class Test_query: @@ -21,7 +18,10 @@ def test_url_too_long(self): # all sites in MD sites, _ = nwis.what_sites(stateCd="MD") # expected error message - _msg = "Request URL too long. Modify your query to use fewer sites. API response reason: Request-URI Too Long" + _msg = ( + "Request URL too long. Modify your query to use fewer sites. " + "API response reason: Request-URI Too Long" + ) # raise error by trying to query them all, so URL is way too long with pytest.raises(ValueError, match=_msg): nwis.get_iv(sites=sites.site_no.values.tolist()) @@ -55,6 +55,6 @@ def test_init_with_response(self): ## Test NotImplementedError parameters with pytest.raises(NotImplementedError): - md.site_info + _ = md.site_info with pytest.raises(NotImplementedError): - md.variable_info + _ = md.variable_info diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 1d37628f..f3a2ea6a 100755 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -1,26 +1,28 @@ import datetime import sys + import pytest from pandas import DataFrame if sys.version_info < (3, 10): pytest.skip("Skip entire module on Python < 3.10", allow_module_level=True) -from dataretrieval.waterdata.utils import _check_profiles from dataretrieval.waterdata import ( - get_samples, - get_daily, + get_channel, get_continuous, - get_monitoring_locations, + get_daily, + get_field_measurements, get_latest_continuous, get_latest_daily, - get_field_measurements, - get_time_series_metadata, + get_monitoring_locations, get_reference_table, - get_stats_por, + get_samples, get_stats_date_range, - get_channel + get_stats_por, + get_time_series_metadata, ) +from dataretrieval.waterdata.utils import _check_profiles + def mock_request(requests_mock, request_url, file_path): """Mock request code""" @@ -29,6 +31,7 @@ def mock_request(requests_mock, request_url, file_path): request_url, text=text.read(), headers={"mock_header": "value"} ) + def test_mock_get_samples(requests_mock): """Tests USGS Samples query""" request_url = ( @@ -45,7 +48,7 @@ def test_mock_get_samples(requests_mock): activityStartDateLower="2020-01-01", activityStartDateUpper="2024-12-31", monitoringLocationIdentifier="USGS-05406500", - ) + ) assert type(df) is DataFrame assert df.size == 12127 assert md.url == request_url @@ -53,6 +56,7 @@ def test_mock_get_samples(requests_mock): assert md.header == {"mock_header": "value"} assert md.comment is None + def test_check_profiles(): """Tests that correct errors are raised for invalid profiles.""" with pytest.raises(ValueError): @@ -60,69 +64,76 @@ def test_check_profiles(): with pytest.raises(ValueError): _check_profiles(service="results", profile="foo") + def test_samples_results(): """Test results call for proper columns""" - df,_ = get_samples( + df, _ = get_samples( service="results", profile="narrow", monitoringLocationIdentifier="USGS-05288705", activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24" - ) - assert all(col in df.columns for col in ["Location_Identifier", "Activity_ActivityIdentifier"]) + activityStartDateUpper="2025-04-24", + ) + assert all( + col in df.columns + for col in ["Location_Identifier", "Activity_ActivityIdentifier"] + ) assert len(df) > 0 + def test_samples_activity(): """Test activity call for proper columns""" - df,_ = get_samples( + df, _ = get_samples( service="activities", profile="sampact", - monitoringLocationIdentifier="USGS-06719505" - ) + monitoringLocationIdentifier="USGS-06719505", + ) assert len(df) > 0 assert len(df.columns) == 95 assert "Location_HUCTwelveDigitCode" in df.columns + def test_samples_locations(): """Test locations call for proper columns""" - df,_ = get_samples( + df, _ = get_samples( service="locations", profile="site", stateFips="US:55", activityStartDateLower="2024-10-01", activityStartDateUpper="2025-04-24", - usgsPCode="00010" - ) - assert all(col in df.columns for col in ["Location_Identifier", "Location_Latitude"]) + usgsPCode="00010", + ) + assert all( + col in df.columns for col in ["Location_Identifier", "Location_Latitude"] + ) assert len(df) > 0 + def test_samples_projects(): """Test projects call for proper columns""" - df,_ = get_samples( + df, _ = get_samples( service="projects", profile="project", stateFips="US:15", activityStartDateLower="2024-10-01", - activityStartDateUpper="2025-04-24" - ) + activityStartDateUpper="2025-04-24", + ) assert all(col in df.columns for col in ["Org_Identifier", "Project_Identifier"]) assert len(df) > 0 + def test_samples_organizations(): """Test organizations call for proper columns""" - df,_ = get_samples( - service="organizations", - profile="count", - stateFips="US:01" - ) + df, _ = get_samples(service="organizations", profile="count", stateFips="US:01") assert len(df) == 1 assert df.size == 3 + def test_get_daily(): df, md = get_daily( monitoring_location_id="USGS-05427718", parameter_code="00060", - time="2025-01-01/.." + time="2025-01-01/..", ) assert "daily_id" in df.columns assert "geometry" in df.columns @@ -132,184 +143,232 @@ def test_get_daily(): assert df.monitoring_location_id.unique().tolist() == ["USGS-05427718"] assert df["time"].apply(lambda x: isinstance(x, datetime.date)).all() assert df["time"].iloc[0] < df["time"].iloc[-1] - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") assert df["value"].dtype == "float64" + def test_get_daily_properties(): - df,_ = get_daily( + df, _ = get_daily( monitoring_location_id="USGS-05427718", parameter_code="00060", time="2025-01-01/..", - properties=["daily_id", "monitoring_location_id", "parameter_code", "time", "value", "geometry"] + properties=[ + "daily_id", + "monitoring_location_id", + "parameter_code", + "time", + "value", + "geometry", + ], ) assert "daily_id" == df.columns[0] assert "geometry" == df.columns[-1] assert df.shape[1] == 6 assert df.parameter_code.unique().tolist() == ["00060"] + def test_get_daily_properties_id(): - df,_ = get_daily( + df, _ = get_daily( monitoring_location_id="USGS-05427718", parameter_code="00060", time="2025-01-01/..", - properties=["monitoring_location_id", "id", "parameter_code", "time", "value", "geometry"] + properties=[ + "monitoring_location_id", + "id", + "parameter_code", + "time", + "value", + "geometry", + ], ) assert "daily_id" == df.columns[1] + def test_get_daily_no_geometry(): - df,_ = get_daily( + df, _ = get_daily( monitoring_location_id="USGS-05427718", parameter_code="00060", time="2025-01-01/..", - skip_geometry=True + skip_geometry=True, ) assert "geometry" not in df.columns assert df.shape[1] == 11 assert isinstance(df, DataFrame) + def test_get_continuous(): - df,_ = get_continuous( + df, _ = get_continuous( monitoring_location_id="USGS-06904500", parameter_code="00065", - time="2025-01-01/2025-12-31" + time="2025-01-01/2025-12-31", ) assert isinstance(df, DataFrame) assert "geometry" not in df.columns assert df.shape[1] == 11 - assert df['time'].dtype == 'datetime64[ns, UTC]' + assert df["time"].dtype == "datetime64[ns, UTC]" assert "continuous_id" in df.columns + def test_get_monitoring_locations(): - df, md = get_monitoring_locations( - state_name="Connecticut", - site_type_code="GW" - ) + df, md = get_monitoring_locations(state_name="Connecticut", site_type_code="GW") assert df.site_type_code.unique().tolist() == ["GW"] - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_monitoring_locations_hucs(): - df,_ = get_monitoring_locations( + df, _ = get_monitoring_locations( hydrologic_unit_code=["010802050102", "010802050103"] ) - assert set(df.hydrologic_unit_code.unique().tolist()) == {"010802050102", "010802050103"} + assert set(df.hydrologic_unit_code.unique().tolist()) == { + "010802050102", + "010802050103", + } + def test_get_latest_continuous(): df, md = get_latest_continuous( monitoring_location_id=["USGS-05427718", "USGS-05427719"], - parameter_code=["00060", "00065"] + parameter_code=["00060", "00065"], ) assert "latest_continuous_id" == df.columns[-1] assert df.shape[0] <= 4 assert df.statistic_id.unique().tolist() == ["00011"] - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') - assert df['time'].dtype == 'datetime64[ns, UTC]' + assert hasattr(md, "url") + assert hasattr(md, "query_time") + assert df["time"].dtype == "datetime64[ns, UTC]" + def test_get_latest_daily(): df, md = get_latest_daily( monitoring_location_id=["USGS-05427718", "USGS-05427719"], - parameter_code=["00060", "00065"] + parameter_code=["00060", "00065"], ) assert "latest_daily_id" in df.columns assert df.shape[1] == 12 - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_latest_daily_properties_geometry(): df, md = get_latest_daily( monitoring_location_id=["USGS-05427718", "USGS-05427719"], parameter_code=["00060", "00065"], - properties=['monitoring_location_id', 'parameter_code', 'time', 'value', 'unit_of_measure'] + properties=[ + "monitoring_location_id", + "parameter_code", + "time", + "value", + "unit_of_measure", + ], ) assert "geometry" in df.columns assert df.shape[1] == 6 + def test_get_field_measurements(): df, md = get_field_measurements( monitoring_location_id="USGS-05427718", unit_of_measure="ft^3/s", time="2025-01-01/2025-10-01", - skip_geometry=True + skip_geometry=True, ) assert "field_measurement_id" in df.columns assert "geometry" not in df.columns assert df.unit_of_measure.unique().tolist() == ["ft^3/s"] - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_time_series_metadata(): df, md = get_time_series_metadata( - bbox=[-89.840355,42.853411,-88.818626,43.422598], + bbox=[-89.840355, 42.853411, -88.818626, 43.422598], parameter_code=["00060", "00065", "72019"], - skip_geometry=True + skip_geometry=True, ) - assert set(df['parameter_name'].unique().tolist()) == {"Gage height", "Water level, depth LSD", "Discharge"} - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert set(df["parameter_name"].unique().tolist()) == { + "Gage height", + "Water level, depth LSD", + "Discharge", + } + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_reference_table(): df, md = get_reference_table("agency-codes") assert "agency_code" in df.columns assert df.shape[0] > 0 - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_reference_table_with_query(): query = {"id": "AK001,AK008"} df, md = get_reference_table("agency-codes", query=query) assert "agency_code" in df.columns assert df.shape[0] == 2 - assert hasattr(md, 'url') - assert hasattr(md, 'query_time') + assert hasattr(md, "url") + assert hasattr(md, "query_time") + def test_get_reference_table_wrong_name(): with pytest.raises(ValueError): get_reference_table("agency-cod") + def test_get_stats_por(): - df,_ = get_stats_por( + df, _ = get_stats_por( monitoring_location_id="USGS-12451000", parameter_code="00060", start_date="01-01", - end_date="01-01" + end_date="01-01", + ) + assert ( + df["computation"] + .isin(["median", "maximum", "minimum", "arithmetic_mean", "percentile"]) + .all() ) - assert df['computation'].isin(['median', 'maximum', 'minimum', 'arithmetic_mean', 'percentile']).all() - assert df['time_of_year'].isin(['01-01', '01']).all() - assert df.loc[df['computation'] == "minimum", "percentile"].unique().tolist() == [0.0] - assert df.loc[df['computation'] == "arithmetic_mean", "percentile"].isnull().all() + assert df["time_of_year"].isin(["01-01", "01"]).all() + assert df.loc[df["computation"] == "minimum", "percentile"].unique().tolist() == [ + 0.0 + ] + assert df.loc[df["computation"] == "arithmetic_mean", "percentile"].isnull().all() + def test_get_stats_por_expanded_false(): - df,_ = get_stats_por( + df, _ = get_stats_por( monitoring_location_id="USGS-12451000", parameter_code="00060", start_date="01-01", end_date="01-01", expand_percentiles=False, - computation_type=["minimum", "percentile"] + computation_type=["minimum", "percentile"], ) assert df.shape[0] == 4 - assert df.shape[1] == 20 # if geopandas installed, 21 columns if not + assert df.shape[1] == 20 # if geopandas installed, 21 columns if not assert "percentile" not in df.columns assert "percentiles" in df.columns - assert type(df['percentiles'][2]) is list - assert df.loc[~df['percentiles'].isna(), "value"].isnull().all() + assert type(df["percentiles"][2]) is list + assert df.loc[~df["percentiles"].isna(), "value"].isnull().all() + def test_get_stats_date_range(): - df,_ = get_stats_date_range( + df, _ = get_stats_date_range( monitoring_location_id="USGS-12451000", parameter_code="00060", start_date="2025-01-01", end_date="2025-01-01", - computation_type="maximum" + computation_type="maximum", ) assert df.shape[0] == 3 - assert df.shape[1] == 20 # if geopandas installed, 21 columns if not + assert df.shape[1] == 20 # if geopandas installed, 21 columns if not assert "interval_type" in df.columns assert "percentile" in df.columns - assert df['interval_type'].isin(['month', 'calendar_year', 'water_year']).all() + assert df["interval_type"].isin(["month", "calendar_year", "water_year"]).all() + def test_get_channel(): df, _ = get_channel(monitoring_location_id="USGS-02238500") diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index 449650aa..d603d36c 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -86,7 +86,7 @@ def test_get_record_validation(): def test_get_dv(requests_mock): - """Tests get_dv method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_dv builds the expected request URL and returns a DataFrame.""" format = "json" site = "01491000%2C01645000" request_url = ( @@ -129,7 +129,7 @@ def test_get_dv_site_value_types(requests_mock, site_input_type_list): def test_get_iv(requests_mock): - """Tests get_iv method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_iv builds the expected request URL and returns a DataFrame.""" format = "json" site = "01491000%2C01645000" request_url = ( @@ -173,7 +173,7 @@ def test_get_iv_site_value_types(requests_mock, site_input_type_list): def test_get_info(requests_mock): """ - Tests get_info method correctly generates the request url and returns the result in a DataFrame. + Verify get_info builds the expected request URL and returns a DataFrame. Note that only sites and format are passed as query params """ size = 24 @@ -203,7 +203,7 @@ def test_get_info(requests_mock): def test_get_gwlevels(requests_mock): - """Tests get_gwlevels method correctly generates the request url and returns the result in a DataFrame.""" + """Verify get_gwlevels builds the expected request URL and returns a DataFrame.""" format = "rdb" site = "434400121275801" request_url = ( @@ -242,7 +242,7 @@ def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list): def test_get_discharge_peaks(requests_mock): - """Tests get_discharge_peaks method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_discharge_peaks builds the expected URL and returns a DataFrame.""" format = "rdb" site = "01594440" request_url = ( @@ -284,8 +284,10 @@ def test_get_discharge_peaks_sites_value_types(requests_mock, site_input_type_li def test_get_discharge_measurements(requests_mock): - """Tests get_discharge_measurements method correctly generates the request url and returns the result in a - DataFrame""" + """Verify get_discharge_measurements builds the expected URL. + + Confirm it returns a DataFrame. + """ format = "rdb" site = "01594440" request_url = ( @@ -308,7 +310,7 @@ def test_get_discharge_measurements(requests_mock): def test_get_discharge_measurements_sites_value_types( requests_mock, site_input_type_list ): - """Tests get_discharge_measurements method for valid input types for 'sites' parameter""" + """Verify get_discharge_measurements accepts valid sites input types.""" format = "rdb" site = "01594440" request_url = ( @@ -330,8 +332,7 @@ def test_get_discharge_measurements_sites_value_types( def test_get_pmcodes(requests_mock): - """Tests get_pmcodes method correctly generates the request url and returns the result in a - DataFrame""" + """Verify get_pmcodes builds the expected request URL and returns DataFrame.""" format = "rdb" request_url = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?fmt=rdb&parm_nm_cd=%2500618%25" response_file_path = "tests/data/waterdata_pmcodes.txt" @@ -365,8 +366,7 @@ def test_get_pmcodes_parameterCd_value_types( def test_get_water_use_national(requests_mock): - """Tests get_discharge_measurements method correctly generates the request url and returns the result in a - DataFrame""" + """Verify get_water_use builds the national request URL and returns DataFrame.""" format = "rdb" request_url = ( "https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL" @@ -425,10 +425,10 @@ def test_get_water_use_national_county_value_types( @pytest.mark.parametrize("category_input_type_list", [True, False]) -def test_get_water_use_national_county_value_types( +def test_get_water_use_national_category_value_types( requests_mock, category_input_type_list ): - """Tests get_water_use method for valid input types for the 'categories' parameter""" + """Verify get_water_use accepts valid categories input types.""" _format = "rdb" category = "ALL" request_url = ( @@ -448,8 +448,7 @@ def test_get_water_use_national_county_value_types( def test_get_water_use_allegheny(requests_mock): - """Tests get_discharge_measurements method correctly generates the request url and returns the result in a - DataFrame""" + """Verify get_water_use builds county request URL and returns DataFrame.""" format = "rdb" request_url = ( "https://nwis.waterdata.usgs.gov/PA/nwis/water_use?rdb_compression=value&format=rdb&wu_year=ALL" @@ -465,7 +464,7 @@ def test_get_water_use_allegheny(requests_mock): def test_get_ratings_validation(): - """Tests get_ratings method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_ratings validates file_type values.""" site = "01594440" with pytest.raises(ValueError) as value_error: get_ratings(site=site, file_type="BAD") @@ -475,7 +474,7 @@ def test_get_ratings_validation(): def test_get_ratings(requests_mock): - """Tests get_ratings method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_ratings builds the expected URL and returns a DataFrame.""" format = "rdb" site = "01594440" request_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no={}&file_type=base".format( @@ -492,7 +491,7 @@ def test_get_ratings(requests_mock): def test_what_sites(requests_mock): - """Tests what_sites method correctly generates the request url and returns the result in a DataFrame""" + """Verify what_sites builds the expected URL and returns a DataFrame.""" size = 2472 format = "rdb" parameter_cd = "00010%2C00060" @@ -529,7 +528,7 @@ def test_what_sites(requests_mock): def test_get_stats(requests_mock): - """Tests get_stats method correctly generates the request url and returns the result in a DataFrame""" + """Verify get_stats builds the expected URL and returns a DataFrame.""" format = "rdb" request_url = "https://waterservices.usgs.gov/nwis/stat?sites=01491000%2C01645000&format={}".format( format