From fd54ed05b58997830b9e1c2023fd7d62d7e068ef Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 18:44:38 -0500 Subject: [PATCH] Add tsv mimeType support and raise clear error for xlsx in WQP _check_kwargs now accepts 'tsv' as a valid mimeType alongside 'csv', raises NotImplementedError for 'xlsx' (not yet supported), and gives a clearer ValueError message for any other invalid value. A new _read_wqp_response helper replaces the nine hardcoded pd.read_csv calls, using a tab delimiter when mimeType='tsv' and a comma otherwise. Closes #162. Co-Authored-By: Claude Sonnet 4.6 --- dataretrieval/wqp.py | 34 ++++++++++++++++++++++------------ tests/wqp_test.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 0b53e387..a942a327 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -154,7 +154,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -208,7 +208,7 @@ def what_sites( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -263,7 +263,7 @@ def what_organizations( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -314,7 +314,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -378,7 +378,7 @@ def what_activities( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -440,7 +440,7 @@ def what_detection_limits( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -495,7 +495,7 @@ def what_habitat_metrics( response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -551,7 +551,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -607,7 +607,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=",") + df = _read_wqp_response(response.text, kwargs) return df, WQP_Metadata(response) @@ -697,14 +697,24 @@ def _check_kwargs(kwargs): mimetype = kwargs.get("mimeType") if mimetype == "geojson": raise NotImplementedError("GeoJSON not yet supported. Set 'mimeType=csv'.") - elif mimetype != "csv" and mimetype is not None: - raise ValueError("Invalid mimeType. Set 'mimeType=csv'.") - else: + elif mimetype == "xlsx": + raise NotImplementedError( + "Excel format not yet supported. Set 'mimeType=csv' or 'mimeType=tsv'." + ) + elif mimetype not in ("csv", "tsv", None): + raise ValueError("Invalid mimeType. Supported options: 'csv', 'tsv'.") + elif mimetype is None: kwargs["mimeType"] = "csv" return kwargs +def _read_wqp_response(text, kwargs): + """Parse a WQP response into a DataFrame, respecting the requested mimeType.""" + delimiter = "\t" if kwargs.get("mimeType") == "tsv" else "," + return pd.read_csv(StringIO(text), delimiter=delimiter, low_memory=False) + + def _warn_wqx3_use(): message = ( "Support for the WQX3.0 profiles is experimental. " diff --git a/tests/wqp_test.py b/tests/wqp_test.py index f36558bc..4dd8c43c 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -211,8 +211,38 @@ def mock_request(requests_mock, request_url, file_path): def test_check_kwargs(): """Tests that correct errors are raised for invalid mimetypes.""" kwargs = {"mimeType": "geojson"} + with pytest.raises(NotImplementedError): + kwargs = _check_kwargs(kwargs) + kwargs = {"mimeType": "xlsx"} with pytest.raises(NotImplementedError): kwargs = _check_kwargs(kwargs) kwargs = {"mimeType": "foo"} with pytest.raises(ValueError): kwargs = _check_kwargs(kwargs) + # tsv and csv should both be accepted + kwargs = _check_kwargs({"mimeType": "tsv"}) + assert kwargs["mimeType"] == "tsv" + kwargs = _check_kwargs({"mimeType": "csv"}) + assert kwargs["mimeType"] == "csv" + # no mimeType defaults to csv + kwargs = _check_kwargs({}) + assert kwargs["mimeType"] == "csv" + + +def test_get_results_tsv(requests_mock): + """Tests that mimeType=tsv is accepted and parsed with a tab delimiter.""" + request_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=tsv" + ) + tsv_text = "col_a\tcol_b\nvalue_1\tvalue_2\n" + requests_mock.get(request_url, text=tsv_text, headers={"mock_header": "value"}) + df, md = get_results( + legacy=True, + siteid="WIDNR_WQX-10032762", + mimeType="tsv", + ) + assert type(df) is DataFrame + assert list(df.columns) == ["col_a", "col_b"] + assert df.shape == (1, 2) + assert md.url == request_url