From 634c4506291531e0a9d6996c2f6edf9e8a4fa60f Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 19:09:20 -0500 Subject: [PATCH] Use GET with comma-separated values for multi-value waterdata queries The OGC API now supports comma-separated values for fields like monitoring_location_id, parameter_code, and statistic_id, making POST+CQL2 unnecessary for most services. Update _construct_api_requests to join list params with commas and use GET for daily, continuous, latest-daily, latest-continuous, field-measurements, time-series-metadata, and channel-measurements. The monitoring-locations endpoint does not yet support comma-separated GET parameters (returns 400); it retains the POST+CQL2 path. Closes #210. Co-Authored-By: Claude Sonnet 4.6 --- dataretrieval/waterdata/utils.py | 70 ++++++++++++++++++-------------- tests/waterdata_test.py | 24 ++++++++++- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index c58148d5..63d6eddc 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -349,9 +349,11 @@ def _construct_api_requests( """ Constructs an HTTP request object for the specified water data API service. - Depending on the input parameters (whether there's lists of multiple - argument values), the function determines whether to use a GET or POST - request, formats parameters appropriately, and sets required headers. + For most services, list parameters are comma-joined and sent as a single + GET request (e.g. ``parameter_code=["00060","00010"]`` becomes + ``parameter_code=00060,00010`` in the URL). For services that do not + support comma-separated values (currently only ``monitoring-locations``), + a POST request with CQL2 JSON is used instead. Parameters ---------- @@ -377,43 +379,49 @@ def _construct_api_requests( Notes ----- - Date/time parameters are automatically formatted to ISO8601. - - If multiple values are provided for non-single parameters, a POST request - is constructed. - - The function sets appropriate headers for GET and POST requests. """ service_url = f"{OGC_API_URL}/collections/{service}/items" - # Single parameters can only have one value + # The monitoring-locations endpoint does not support comma-separated values + # for multi-value GET parameters; CQL2 POST is required for that service. + _cql2_required_services = {"monitoring-locations"} single_params = {"datetime", "last_modified", "begin", "end", "time"} - # Identify which parameters should be included in the POST content body - post_params = { - k: v - for k, v in kwargs.items() - if k not in single_params and isinstance(v, (list, tuple)) and len(v) > 1 - } + if service in _cql2_required_services: + # Legacy path: POST with CQL2 for multi-value params + post_params = { + k: v + for k, v in kwargs.items() + if k not in single_params and isinstance(v, (list, tuple)) and len(v) > 1 + } + params = {k: v for k, v in kwargs.items() if k not in post_params} + else: + # Format date/time parameters to ISO8601 before comma-joining. + time_periods = {"last_modified", "datetime", "time", "begin", "end"} + for key in time_periods: + if key in kwargs: + kwargs[key] = _format_api_dates( + kwargs[key], + date=(service == "daily" and key != "last_modified"), + ) + post_params = {} + # Join list/tuple values with commas for multi-value GET parameters. + params = { + k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v + for k, v in kwargs.items() + } - # Everything else goes into the params dictionary for the URL - params = {k: v for k, v in kwargs.items() if k not in post_params} - # Set skipGeometry parameter (API expects camelCase) params["skipGeometry"] = skip_geometry - - # If limit is none or greater than 50000, then set limit to max results. Otherwise, - # use the limit params["limit"] = 50000 if limit is None or limit > 50000 else limit - # Indicate if function needs to perform POST conversion - POST = bool(post_params) - - # Convert dates to ISO08601 format - time_periods = {"last_modified", "datetime", "time", "begin", "end"} - for i in time_periods: - if i in params: - dates = service == "daily" and i != "last_modified" - params[i] = _format_api_dates(params[i], date=dates) + # Convert dates to ISO8601 for the legacy (CQL2) path. + if service in _cql2_required_services: + time_periods = {"last_modified", "datetime", "time", "begin", "end"} + for i in time_periods: + if i in params: + dates = service == "daily" and i != "last_modified" + params[i] = _format_api_dates(params[i], date=dates) - # String together bbox elements from a list to a comma-separated string, - # and string together properties if provided if bbox: params["bbox"] = ",".join(map(str, bbox)) if properties: @@ -421,7 +429,7 @@ def _construct_api_requests( headers = _default_headers() - if POST: + if post_params: headers["Content-Type"] = "application/query-cql-json" request = requests.Request( method="POST", diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py index 195441e5..bcc374d5 100644 --- a/tests/waterdata_test.py +++ b/tests/waterdata_test.py @@ -21,7 +21,7 @@ get_stats_por, get_time_series_metadata, ) -from dataretrieval.waterdata.utils import _check_profiles +from dataretrieval.waterdata.utils import _check_profiles, _construct_api_requests def mock_request(requests_mock, request_url, file_path): @@ -65,6 +65,28 @@ def test_check_profiles(): _check_profiles(service="results", profile="foo") +def test_construct_api_requests_multivalue_get(): + """Multi-value params use GET with comma-separated values for daily service.""" + req = _construct_api_requests( + "daily", + monitoring_location_id=["USGS-05427718", "USGS-05427719"], + parameter_code=["00060", "00065"], + ) + assert req.method == "GET" + assert "monitoring_location_id=USGS-05427718%2CUSGS-05427719" in req.url + assert "parameter_code=00060%2C00065" in req.url + + +def test_construct_api_requests_monitoring_locations_post(): + """monitoring-locations uses POST+CQL2 for multi-value params (API limitation).""" + req = _construct_api_requests( + "monitoring-locations", + hydrologic_unit_code=["010802050102", "010802050103"], + ) + assert req.method == "POST" + assert req.body is not None + + def test_samples_results(): """Test results call for proper columns""" df, _ = get_samples(