From 87caec1c9b96f2768c69fcad276bdb90a59c7330 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 13 Apr 2026 18:27:32 -0500 Subject: [PATCH] Fix mangled column names for NWIS dam sublocation timeseries NWIS methodDescription values like "STAGE - TAILWATER, [Tailwater]" produced column names with bracket qualifiers (e.g. "00065_stage - tailwater, [tailwater"). Strip the ", [...]" suffix before building the column name so get_iv returns clean names like "00065_stage - tailwater". Closes #177. Co-Authored-By: Claude Sonnet 4.6 --- dataretrieval/nwis.py | 4 ++- tests/nwis_test.py | 62 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 0bcb1d6..7943224 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -972,7 +972,9 @@ def _read_json(json): method = parameter["method"][0]["methodDescription"] if method: - method = method.strip("[]()").lower() + # Strip NWIS sublocation qualifier before using the description as a + # column suffix (e.g. "HEADWATER, [Headwater]" → "headwater"). + method = method.split(", [")[0].strip("[]() ").lower() col_name = f"{col_name}_{method}" if option: diff --git a/tests/nwis_test.py b/tests/nwis_test.py index c52775a..bd206ee 100644 --- a/tests/nwis_test.py +++ b/tests/nwis_test.py @@ -9,6 +9,7 @@ from dataretrieval.nwis import ( NWIS_Metadata, + _read_json, _read_rdb, get_discharge_measurements, get_gwlevels, @@ -365,3 +366,64 @@ def test_all_comments_returns_empty_dataframe(self): df = _read_rdb(rdb) assert isinstance(df, pd.DataFrame) assert df.empty + + +def _make_iv_json(site_no, param_cd, method_description, values): + """Build a minimal NWIS IV JSON structure for use in _read_json tests.""" + return { + "value": { + "timeSeries": [ + { + "sourceInfo": {"siteCode": [{"value": site_no}]}, + "variable": { + "variableCode": [{"value": param_cd}], + "options": {"option": [{"value": None}]}, + }, + "values": [ + { + "method": [{"methodDescription": method_description}], + "value": [ + { + "value": str(v), + "dateTime": f"2023-01-0{i + 1}T00:00:00.000-05:00", + "qualifiers": ["A"], + } + for i, v in enumerate(values) + ], + } + ], + } + ] + } + } + + +class TestReadJsonColumnNames: + """Tests that _read_json produces clean column names. + + Regression tests for GitHub Issue #177: column names were mangled when + NWIS methodDescription contained a sublocation qualifier such as + "STAGE - TAILWATER, [Tailwater]". + """ + + def test_simple_method_description(self): + """A plain methodDescription like 'HEADWATER' becomes a clean column name.""" + data = _make_iv_json("03399800", "00065", "HEADWATER", [13.0, 13.1]) + df = _read_json(data) + assert "00065_headwater" in df.columns + assert df.shape[0] == 2 + + def test_sublocation_bracket_stripped(self): + """Bracket qualifier in methodDescription is stripped from the column name.""" + data = _make_iv_json( + "03399800", "00065", "STAGE - TAILWATER, [Tailwater]", [12.0, 12.1] + ) + df = _read_json(data) + assert "00065_stage - tailwater" in df.columns + assert "00065_stage - tailwater, [tailwater" not in df.columns + + def test_no_method_description(self): + """An empty methodDescription leaves the column name as just the param code.""" + data = _make_iv_json("01491000", "00060", "", [100.0, 101.0]) + df = _read_json(data) + assert "00060" in df.columns