Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions dataretrieval/wqp.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def get_results(

response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)
return df, WQP_Metadata(response)


Expand Down Expand Up @@ -208,7 +208,7 @@ def what_sites(

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -263,7 +263,7 @@ def what_organizations(

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -314,7 +314,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs):

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -378,7 +378,7 @@ def what_activities(

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -440,7 +440,7 @@ def what_detection_limits(

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -495,7 +495,7 @@ def what_habitat_metrics(

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -551,7 +551,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs):

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -607,7 +607,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs):

response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check)

df = pd.read_csv(StringIO(response.text), delimiter=",")
df = _read_wqp_response(response.text, kwargs)

return df, WQP_Metadata(response)

Expand Down Expand Up @@ -697,14 +697,24 @@ def _check_kwargs(kwargs):
mimetype = kwargs.get("mimeType")
if mimetype == "geojson":
raise NotImplementedError("GeoJSON not yet supported. Set 'mimeType=csv'.")
elif mimetype != "csv" and mimetype is not None:
raise ValueError("Invalid mimeType. Set 'mimeType=csv'.")
else:
elif mimetype == "xlsx":
raise NotImplementedError(
"Excel format not yet supported. Set 'mimeType=csv' or 'mimeType=tsv'."
)
elif mimetype not in ("csv", "tsv", None):
raise ValueError("Invalid mimeType. Supported options: 'csv', 'tsv'.")
elif mimetype is None:
kwargs["mimeType"] = "csv"

return kwargs


def _read_wqp_response(text, kwargs):
"""Parse a WQP response into a DataFrame, respecting the requested mimeType."""
delimiter = "\t" if kwargs.get("mimeType") == "tsv" else ","
return pd.read_csv(StringIO(text), delimiter=delimiter, low_memory=False)


def _warn_wqx3_use():
message = (
"Support for the WQX3.0 profiles is experimental. "
Expand Down
30 changes: 30 additions & 0 deletions tests/wqp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,38 @@ def mock_request(requests_mock, request_url, file_path):
def test_check_kwargs():
"""Tests that correct errors are raised for invalid mimetypes."""
kwargs = {"mimeType": "geojson"}
with pytest.raises(NotImplementedError):
kwargs = _check_kwargs(kwargs)
kwargs = {"mimeType": "xlsx"}
with pytest.raises(NotImplementedError):
kwargs = _check_kwargs(kwargs)
kwargs = {"mimeType": "foo"}
with pytest.raises(ValueError):
kwargs = _check_kwargs(kwargs)
# tsv and csv should both be accepted
kwargs = _check_kwargs({"mimeType": "tsv"})
assert kwargs["mimeType"] == "tsv"
kwargs = _check_kwargs({"mimeType": "csv"})
assert kwargs["mimeType"] == "csv"
# no mimeType defaults to csv
kwargs = _check_kwargs({})
assert kwargs["mimeType"] == "csv"


def test_get_results_tsv(requests_mock):
"""Tests that mimeType=tsv is accepted and parsed with a tab delimiter."""
request_url = (
"https://www.waterqualitydata.us/data/Result/Search?"
"siteid=WIDNR_WQX-10032762&mimeType=tsv"
)
tsv_text = "col_a\tcol_b\nvalue_1\tvalue_2\n"
requests_mock.get(request_url, text=tsv_text, headers={"mock_header": "value"})
df, md = get_results(
legacy=True,
siteid="WIDNR_WQX-10032762",
mimeType="tsv",
)
assert type(df) is DataFrame
assert list(df.columns) == ["col_a", "col_b"]
assert df.shape == (1, 2)
assert md.url == request_url
Loading