diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 160c433..bd8856e 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -140,38 +140,15 @@ def preformat_peaks_response(df: pd.DataFrame) -> pd.DataFrame: return df -def get_qwdata( - sites: list[str] | str | None = None, - start: str | None = None, - end: str | None = None, - multi_index: bool = True, - wide_format: bool = True, - datetime_index: bool = True, - ssl_check: bool = True, - **kwargs, -) -> tuple[pd.DataFrame, BaseMetadata]: - """ - This function is defunct, use `get_samples()` - in the waterdata module. - - """ +def get_qwdata(**kwargs): + """Defunct: use ``waterdata.get_samples()``.""" raise NameError( "`nwis.get_qwdata` has been replaced with `waterdata.get_samples()`." ) -def get_discharge_measurements( - sites: list[str] | str | None = None, - start: str | None = None, - end: str | None = None, - ssl_check: bool = True, - **kwargs, -) -> tuple[pd.DataFrame, BaseMetadata]: - """ - This function is defunct, use `get_field_measurements()` - in the waterdata module. - - """ +def get_discharge_measurements(**kwargs): + """Defunct: use ``waterdata.get_field_measurements()``.""" raise NameError( "`nwis.get_discharge_measurements` has been replaced " "with `waterdata.get_field_measurements`." @@ -247,20 +224,8 @@ def get_discharge_peaks( ) -def get_gwlevels( - sites: list[str] | str | None = None, - start: str = "1851-01-01", - end: str | None = None, - multi_index: bool = True, - datetime_index: bool = True, - ssl_check: bool = True, - **kwargs, -) -> tuple[pd.DataFrame, BaseMetadata]: - """ - This function is defunct, use `get_field_measurements()` - in the waterdata module. - - """ +def get_gwlevels(**kwargs): + """Defunct: use ``waterdata.get_field_measurements()``.""" raise NameError( "`nwis.get_gwlevels` has been replaced " "with `waterdata.get_field_measurements()`." @@ -692,33 +657,16 @@ def get_iv( return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) -def get_pmcodes( - parameterCd: str | list[str] = "All", - partial: bool = True, - ssl_check: bool = True, -) -> tuple[pd.DataFrame, BaseMetadata]: - """ - This function is defunct, use - `get_reference_table(collection="parameter-codes")`. - - """ +def get_pmcodes(**kwargs): + """Defunct: use ``get_reference_table(collection='parameter-codes')``.""" raise NameError( "`nwis.get_pmcodes` has been replaced " "with `get_reference_table(collection='parameter-codes')`." ) -def get_water_use( - years: str | list[str] = "ALL", - state: str | None = None, - counties: str | list[str] = "ALL", - categories: str | list[str] = "ALL", - ssl_check: bool = True, -) -> tuple[pd.DataFrame, BaseMetadata]: - """ - This function is defunct and currently has no replacement. - - """ +def get_water_use(**kwargs): + """Defunct: no current replacement.""" raise NameError("`nwis.get_water_use` is defunct.") @@ -950,6 +898,17 @@ def get_record( df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) return df + elif service == "peaks": + df, _ = get_discharge_peaks( + sites=sites, + start=start, + end=end, + multi_index=multi_index, + ssl_check=ssl_check, + **kwargs, + ) + return df + elif service == "ratings": df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs) return df @@ -979,7 +938,7 @@ def _read_json(json): A custom metadata object """ - merged_df = pd.DataFrame(columns=["site_no", "datetime"]) + all_site_dfs = [] site_list = [ ts["sourceInfo"]["siteCode"][0]["value"] for ts in json["value"]["timeSeries"] @@ -1008,14 +967,11 @@ def _read_json(json): # check whether min, max, mean record XXX option = timeseries["variable"]["options"]["option"][0].get("value") - # loop through each parameter in timeseries, then concat to the merged_df for parameter in timeseries["values"]: col_name = param_cd method = parameter["method"][0]["methodDescription"] - # if len(timeseries['values']) > 1 and method: if method: - # get method, format it, and append to column name method = method.strip("[]()").lower() col_name = f"{col_name}_{method}" @@ -1025,22 +981,15 @@ def _read_json(json): record_json = parameter["value"] if not record_json: - # no data in record continue - # should be able to avoid this by dumping - record_json = str(record_json).replace("'", '"') - - # read json, converting all values to float64 and all qualifiers - # Lists can't be hashed, thus we cannot df.merge on a list column - record_df = pd.read_json( - StringIO(record_json), - orient="records", - dtype={"value": "float64", "qualifiers": "unicode"}, - convert_dates=False, - ) + record_df = pd.DataFrame(record_json) + record_df["value"] = pd.to_numeric(record_df["value"], errors="coerce") record_df["qualifiers"] = ( - record_df["qualifiers"].str.strip("[]").str.replace("'", "") + record_df["qualifiers"] + .astype(str) + .str.strip("[]") + .str.replace("'", "") ) record_df.rename( @@ -1054,11 +1003,14 @@ def _read_json(json): site_df = site_df.merge(record_df, how="outer", on="datetime") - # end of site loop site_df["site_no"] = site_no - merged_df = pd.concat([merged_df, site_df]) + all_site_dfs.append(site_df) + + if not all_site_dfs: + return pd.DataFrame(columns=["site_no", "datetime"]) + + merged_df = pd.concat(all_site_dfs, ignore_index=True) - # convert to datetime, normalizing the timezone to UTC when doing so if "datetime" in merged_df.columns: merged_df["datetime"] = pd.to_datetime(merged_df["datetime"], utc=True) diff --git a/dataretrieval/samples.py b/dataretrieval/samples.py index 82e6942..2259969 100644 --- a/dataretrieval/samples.py +++ b/dataretrieval/samples.py @@ -7,246 +7,21 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING -from dataretrieval.utils import BaseMetadata -if TYPE_CHECKING: - from pandas import DataFrame - - from dataretrieval.waterdata import PROFILES, SERVICES - - -def get_usgs_samples( - ssl_check: bool = True, - service: SERVICES = "results", - profile: PROFILES = "fullphyschem", - activityMediaName: str | list[str] | None = None, - activityStartDateLower: str | None = None, - activityStartDateUpper: str | None = None, - activityTypeCode: str | list[str] | None = None, - characteristicGroup: str | list[str] | None = None, - characteristic: str | list[str] | None = None, - characteristicUserSupplied: str | list[str] | None = None, - boundingBox: list[float] | None = None, - countryFips: str | list[str] | None = None, - stateFips: str | list[str] | None = None, - countyFips: str | list[str] | None = None, - siteTypeCode: str | list[str] | None = None, - siteTypeName: str | list[str] | None = None, - usgsPCode: str | list[str] | None = None, - hydrologicUnit: str | list[str] | None = None, - monitoringLocationIdentifier: str | list[str] | None = None, - organizationIdentifier: str | list[str] | None = None, - pointLocationLatitude: float | None = None, - pointLocationLongitude: float | None = None, - pointLocationWithinMiles: float | None = None, - projectIdentifier: str | list[str] | None = None, - recordIdentifierUserSupplied: str | list[str] | None = None, -) -> tuple[DataFrame, BaseMetadata]: - """Search Samples database for USGS water quality data. - This is a wrapper function for the Samples database API. All potential - filters are provided as arguments to the function, but please do not - populate all possible filters; leave as many as feasible with their default - value (None). This is important because overcomplicated web service queries - can bog down the database's ability to return an applicable dataset before - it times out. - - The web GUI for the Samples database can be found here: - https://waterdata.usgs.gov/download-samples/#dataProfile=site - - If you would like more details on feasible query parameters (complete with - examples), please visit the Samples database swagger docs, here: - https://api.waterdata.usgs.gov/samples-data/docs#/ - - Parameters - ---------- - ssl_check : bool, optional - Check the SSL certificate. - service : string - One of the available Samples services: "results", "locations", "activities", - "projects", or "organizations". Defaults to "results". - profile : string - One of the available profiles associated with a service. Options for each - service are: - results - "fullphyschem", "basicphyschem", - "fullbio", "basicbio", "narrow", - "resultdetectionquantitationlimit", - "labsampleprep", "count" - locations - "site", "count" - activities - "sampact", "actmetric", - "actgroup", "count" - projects - "project", "projectmonitoringlocationweight" - organizations - "organization", "count" - activityMediaName : string or list of strings, optional - Name or code indicating environmental medium in which sample was taken. - Check the `activityMediaName_lookup()` function in this module for all - possible inputs. - Example: "Water". - activityStartDateLower : string, optional - The start date if using a date range. Takes the format YYYY-MM-DD. - The logic is inclusive, i.e. it will also return results that - match the date. If left as None, will pull all data on or before - activityStartDateUpper, if populated. - activityStartDateUpper : string, optional - The end date if using a date range. Takes the format YYYY-MM-DD. - The logic is inclusive, i.e. it will also return results that - match the date. If left as None, will pull all data after - activityStartDateLower up to the most recent available results. - activityTypeCode : string or list of strings, optional - Text code that describes type of field activity performed. - Example: "Sample-Routine, regular". - characteristicGroup : string or list of strings, optional - Characteristic group is a broad category of characteristics - describing one or more results. Check the `characteristicGroup_lookup()` - function in this module for all possible inputs. - Example: "Organics, PFAS" - characteristic : string or list of strings, optional - Characteristic is a specific category describing one or more results. - Check the `characteristic_lookup()` function in this module for all - possible inputs. - Example: "Suspended Sediment Discharge" - characteristicUserSupplied : string or list of strings, optional - A user supplied characteristic name describing one or more results. - boundingBox: list of four floats, optional - Filters on the the associated monitoring location's point location - by checking if it is located within the specified geographic area. - The logic is inclusive, i.e. it will include locations that overlap - with the edge of the bounding box. Values are separated by commas, - expressed in decimal degrees, NAD83, and longitudes west of Greenwich - are negative. - The format is a string consisting of: - - Western-most longitude - - Southern-most latitude - - Eastern-most longitude - - Northern-most longitude - Example: [-92.8,44.2,-88.9,46.0] - countryFips : string or list of strings, optional - Example: "US" (United States) - stateFips : string or list of strings, optional - Check the `stateFips_lookup()` function in this module for all - possible inputs. - Example: "US:15" (United States: Hawaii) - countyFips : string or list of strings, optional - Check the `countyFips_lookup()` function in this module for all - possible inputs. - Example: "US:15:001" (United States: Hawaii, Hawaii County) - siteTypeCode : string or list of strings, optional - An abbreviation for a certain site type. Check the `siteType_lookup()` - function in this module for all possible inputs. - Example: "GW" (Groundwater site) - siteTypeName : string or list of strings, optional - A full name for a certain site type. Check the `siteType_lookup()` - function in this module for all possible inputs. - Example: "Well" - usgsPCode : string or list of strings, optional - 5-digit number used in the US Geological Survey computerized - data system, National Water Information System (NWIS), to - uniquely identify a specific constituent. Check the - `characteristic_lookup()` function in this module for all possible - inputs. - Example: "00060" (Discharge, cubic feet per second) - hydrologicUnit : string or list of strings, optional - Max 12-digit number used to describe a hydrologic unit. - Example: "070900020502" - monitoringLocationIdentifier : string or list of strings, optional - A monitoring location identifier has two parts: the agency code - and the location number, separated by a dash (-). - Example: "USGS-040851385" - organizationIdentifier : string or list of strings, optional - Designator used to uniquely identify a specific organization. - Currently only accepting the organization "USGS". - pointLocationLatitude : float, optional - Latitude for a point/radius query (decimal degrees). Must be used - with pointLocationLongitude and pointLocationWithinMiles. - pointLocationLongitude : float, optional - Longitude for a point/radius query (decimal degrees). Must be used - with pointLocationLatitude and pointLocationWithinMiles. - pointLocationWithinMiles : float, optional - Radius for a point/radius query. Must be used with - pointLocationLatitude and pointLocationLongitude - projectIdentifier : string or list of strings, optional - Designator used to uniquely identify a data collection project. Project - identifiers are specific to an organization (e.g. USGS). - Example: "ZH003QW03" - recordIdentifierUserSupplied : string or list of strings, optional - Internal AQS record identifier that returns 1 entry. Only available - for the "results" service. - - Returns - ------- - df : ``pandas.DataFrame`` - Formatted data returned from the API query. - md : :obj:`dataretrieval.utils.Metadata` - Custom ``dataretrieval`` metadata object pertaining to the query. - - Examples - -------- - .. code:: - - >>> # Get PFAS results within a bounding box - >>> df, md = dataretrieval.samples.get_usgs_samples( - ... boundingBox=[-90.2, 42.6, -88.7, 43.2], - ... characteristicGroup="Organics, PFAS", - ... ) - - >>> # Get all activities for the Commonwealth of Virginia over a date range - >>> df, md = dataretrieval.samples.get_usgs_samples( - ... service="activities", - ... profile="sampact", - ... activityStartDateLower="2023-10-01", - ... activityStartDateUpper="2024-01-01", - ... stateFips="US:51", - ... ) - - >>> # Get all pH samples for two sites in Utah - >>> df, md = dataretrieval.samples.get_usgs_samples( - ... monitoringLocationIdentifier=[ - ... "USGS-393147111462301", - ... "USGS-393343111454101", - ... ], - ... usgsPCode="00400", - ... ) +def get_usgs_samples(**kwargs): + """Deprecated: use ``waterdata.get_samples()`` instead. + All keyword arguments are forwarded directly to + :func:`dataretrieval.waterdata.get_samples`. """ - warnings.warn( - ( - "`get_usgs_samples` is deprecated and will be removed. " - "Use `waterdata.get_samples` instead." - ), + "`get_usgs_samples` is deprecated and will be removed. " + "Use `waterdata.get_samples` instead.", DeprecationWarning, stacklevel=2, ) from dataretrieval.waterdata import get_samples - result = get_samples( - ssl_check=ssl_check, - service=service, - profile=profile, - activityMediaName=activityMediaName, - activityStartDateLower=activityStartDateLower, - activityStartDateUpper=activityStartDateUpper, - activityTypeCode=activityTypeCode, - characteristicGroup=characteristicGroup, - characteristic=characteristic, - characteristicUserSupplied=characteristicUserSupplied, - boundingBox=boundingBox, - countryFips=countryFips, - stateFips=stateFips, - countyFips=countyFips, - siteTypeCode=siteTypeCode, - siteTypeName=siteTypeName, - usgsPCode=usgsPCode, - hydrologicUnit=hydrologicUnit, - monitoringLocationIdentifier=monitoringLocationIdentifier, - organizationIdentifier=organizationIdentifier, - pointLocationLatitude=pointLocationLatitude, - pointLocationLongitude=pointLocationLongitude, - pointLocationWithinMiles=pointLocationWithinMiles, - projectIdentifier=projectIdentifier, - recordIdentifierUserSupplied=recordIdentifierUserSupplied, - ) - - return result + return get_samples(**kwargs) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index ecf99ba..c58148d 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -1041,7 +1041,7 @@ def get_stats_data( headers = dict(req.headers) body = resp.json() - dfs = _handle_stats_nesting(body, geopd=GEOPANDAS) + all_dfs = [_handle_stats_nesting(body, geopd=GEOPANDAS)] # Look for a next code in the response body next_token = body["next"] @@ -1057,8 +1057,7 @@ def get_stats_data( headers=headers, ) body = resp.json() - df1 = _handle_stats_nesting(body, geopd=False) - dfs = pd.concat([dfs, df1], ignore_index=True) + all_dfs.append(_handle_stats_nesting(body, geopd=False)) next_token = body["next"] except Exception: # noqa: BLE001 error_text = _error_body(resp) @@ -1068,6 +1067,8 @@ def get_stats_data( ) next_token = None + dfs = pd.concat(all_dfs, ignore_index=True) if len(all_dfs) > 1 else all_dfs[0] + # . If expand percentiles is True, make each percentile # its own row in the returned dataset. if expand_percentiles: diff --git a/demos/NWIS_demo_1.ipynb b/demos/NWIS_demo_1.ipynb index 6f72207..6edaa8a 100644 --- a/demos/NWIS_demo_1.ipynb +++ b/demos/NWIS_demo_1.ipynb @@ -272,8 +272,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" + "pygments_lexer": "ipython3" }, "vscode": { "interpreter": { diff --git a/demos/WaterData_demo.ipynb b/demos/WaterData_demo.ipynb index 47a10ff..f7d9e8d 100644 --- a/demos/WaterData_demo.ipynb +++ b/demos/WaterData_demo.ipynb @@ -640,8 +640,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.14" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb index 80f05af..8a045ba 100644 --- a/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Peaks_Examples.ipynb @@ -42,7 +42,8 @@ "source": [ "from IPython.display import display\n", "\n", - "from dataretrieval import nwisfrom dataretrieval import waterdata\n", + "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "import dataretrieval.waterdata as waterdata\n" ] }, diff --git a/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb index 77bd221..0daeba4 100644 --- a/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Ratings_Examples.ipynb @@ -42,7 +42,8 @@ "source": [ "from IPython.display import display\n", "\n", - "from dataretrieval import nwisfrom dataretrieval import waterdata\n", + "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "import dataretrieval.waterdata as waterdata\n" ] }, diff --git a/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb index 513a02e..808d2f9 100644 --- a/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_Statistics_Examples.ipynb @@ -43,7 +43,8 @@ "from IPython.display import display\n", "from matplotlib import ticker\n", "\n", - "from dataretrieval import nwisfrom dataretrieval import waterdata\n", + "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "import dataretrieval.waterdata as waterdata\n" ] }, diff --git a/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb b/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb index f0c60da..4d6eb92 100644 --- a/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb +++ b/demos/hydroshare/USGS_dataretrieval_WaterUse_Examples.ipynb @@ -42,7 +42,8 @@ "source": [ "from IPython.display import display\n", "\n", - "from dataretrieval import nwisfrom dataretrieval import waterdata\n", + "from dataretrieval import nwis\n", + "from dataretrieval import waterdata\n", "import dataretrieval.waterdata as waterdata\n" ] }, @@ -103,7 +104,7 @@ "metadata": {}, "outputs": [], "source": [ - "display(pennsylvania[0])" + "# [Defunct] display(pennsylvania[0])" ] }, { @@ -119,7 +120,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(pennsylvania[0].dtypes)" + "# [Defunct] print(pennsylvania[0].dtypes)" ] }, {