From 1bfaec9bc2603de0d0e2f5219872eb323199dffc Mon Sep 17 00:00:00 2001 From: Eric Novotny Date: Fri, 27 Mar 2026 15:14:22 -0700 Subject: [PATCH 1/3] fix issue with storing null values --- cwms/timeseries/timeseries.py | 7 +++-- tests/cda/timeseries/timeseries_CDA_test.py | 33 +++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 6fdc407..368fb0d 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -438,8 +438,11 @@ def timeseries_df_to_json( pd.Timestamp.isoformat ) df = df.reindex(columns=["date-time", "value", "quality-code"]) - if df.isnull().values.any(): - raise ValueError("Null/NaN data must be removed from the dataframe") + + # Replace NaN/NA/NaT in value column with None so they serialize as JSON + # null rather than the invalid JSON literal NaN. + df["value"] = df["value"].astype(object).where(df["value"].notna(), other=None) + if version_date: version_date_iso = version_date.isoformat() else: diff --git a/tests/cda/timeseries/timeseries_CDA_test.py b/tests/cda/timeseries/timeseries_CDA_test.py index 2c31aba..f3ffa10 100644 --- a/tests/cda/timeseries/timeseries_CDA_test.py +++ b/tests/cda/timeseries/timeseries_CDA_test.py @@ -18,6 +18,7 @@ TEST_TSID_CHUNK_MULTI = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Chunk" TEST_TSID_COPY = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy" TEST_TSID_DELETE = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Delete" +TEST_TSID_CHUNK_NULLS = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Nulls" TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test") # Generate 15-minute interval timestamps START_DATE_CHUNK_MULTI = datetime(2025, 7, 31, 0, 0, tzinfo=timezone.utc) @@ -31,6 +32,7 @@ TEST_TSID_STORE, TEST_TSID_CHUNK_MULTI, TEST_TSID_COPY, + TEST_TSID_CHUNK_NULLS, ] @@ -275,6 +277,37 @@ def test_store_timeseries_chunk_ts(): ), f"Data frames do not match: original = {DF_CHUNK_MULTI.describe()}, stored = {df.describe()}" +def test_store_timesereis_chunk_to_with_null_values(): + # Define parameters + ts_id = TEST_TSID_CHUNK_NULLS + office = TEST_OFFICE + units = "m" + + # Create a copy of the original DataFrame and introduce null values + df_with_nulls = DF_CHUNK_MULTI.copy() + # Set the 100 and 200 index value to null + df_with_nulls.loc[100, "value"] = None + df_with_nulls.loc[200, "value"] = None + + # Convert DataFrame to JSON format + ts_json = ts.timeseries_df_to_json(df_with_nulls, ts_id, units, office) + + ts.store_timeseries(ts_json, multithread=True) + + data_nulls = ts.get_timeseries( + ts_id=ts_id, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + unit="SI", + ) + df_nulls = data_nulls.df + # make sure the dataframe matches stored dataframe with null values + pdt.assert_frame_equal( + df_nulls, df_with_nulls + ), f"Data frames do not match: original with nulls = {df_with_nulls.describe()}, stored = {df_nulls.describe()}" + + def test_copy_timeseries_chunk_json(): data_json = ts.get_timeseries( ts_id=TEST_TSID_CHUNK_MULTI, From 82febeb830bf5d48692384461fc35c931cf7f3b5 Mon Sep 17 00:00:00 2001 From: Eric Novotny Date: Fri, 27 Mar 2026 15:35:36 -0700 Subject: [PATCH 2/3] add data copy --- tests/cda/timeseries/timeseries_CDA_test.py | 47 +++++++++++++++++---- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/tests/cda/timeseries/timeseries_CDA_test.py b/tests/cda/timeseries/timeseries_CDA_test.py index f3ffa10..f4d056e 100644 --- a/tests/cda/timeseries/timeseries_CDA_test.py +++ b/tests/cda/timeseries/timeseries_CDA_test.py @@ -19,6 +19,7 @@ TEST_TSID_COPY = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy" TEST_TSID_DELETE = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Delete" TEST_TSID_CHUNK_NULLS = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Nulls" +TEST_TSID_COPY_NULLS = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy-Nulls" TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test") # Generate 15-minute interval timestamps START_DATE_CHUNK_MULTI = datetime(2025, 7, 31, 0, 0, tzinfo=timezone.utc) @@ -33,6 +34,7 @@ TEST_TSID_CHUNK_MULTI, TEST_TSID_COPY, TEST_TSID_CHUNK_NULLS, + TEST_TSID_COPY_NULLS, ] @@ -50,6 +52,12 @@ "quality-code": [0] * len(DT_CHUNK_MULTI), } ) +# Create a copy of the original DataFrame and introduce null values +DF_WITH_NULLS = DF_CHUNK_MULTI.copy() +# Set the 100 and 200 index value to null +DF_WITH_NULLS.loc[100, "value"] = None +DF_WITH_NULLS.loc[200, "value"] = None + DF_MULTI_TIMESERIES1 = pd.DataFrame( { @@ -283,14 +291,8 @@ def test_store_timesereis_chunk_to_with_null_values(): office = TEST_OFFICE units = "m" - # Create a copy of the original DataFrame and introduce null values - df_with_nulls = DF_CHUNK_MULTI.copy() - # Set the 100 and 200 index value to null - df_with_nulls.loc[100, "value"] = None - df_with_nulls.loc[200, "value"] = None - # Convert DataFrame to JSON format - ts_json = ts.timeseries_df_to_json(df_with_nulls, ts_id, units, office) + ts_json = ts.timeseries_df_to_json(DF_WITH_NULLS, ts_id, units, office) ts.store_timeseries(ts_json, multithread=True) @@ -304,8 +306,35 @@ def test_store_timesereis_chunk_to_with_null_values(): df_nulls = data_nulls.df # make sure the dataframe matches stored dataframe with null values pdt.assert_frame_equal( - df_nulls, df_with_nulls - ), f"Data frames do not match: original with nulls = {df_with_nulls.describe()}, stored = {df_nulls.describe()}" + df_nulls, DF_WITH_NULLS + ), f"Data frames do not match: original with nulls = {DF_WITH_NULLS.describe()}, stored = {df_nulls.describe()}" + + +def test_copy_timeseries_chunk_json_with_nulls(): + data_json = ts.get_timeseries( + ts_id=TEST_TSID_CHUNK_NULLS, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + max_days_per_chunk=14, + unit="SI", + ).json + data_json["name"] = TEST_TSID_COPY_NULLS + ts.store_timeseries(data_json) + + data_multithread = ts.get_timeseries( + ts_id=TEST_TSID_COPY_NULLS, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + max_days_per_chunk=14, + unit="SI", + ) + df = data_multithread.df + # make sure the dataframe matches stored dataframe with null values + pdt.assert_frame_equal( + df, DF_WITH_NULLS + ), f"Data frames do not match: original with nulls = {DF_WITH_NULLS.describe()}, stored = {df.describe()}" def test_copy_timeseries_chunk_json(): From ecab5604ed5f8b354245fb2401c641a3bd7dc9cd Mon Sep 17 00:00:00 2001 From: Eric Novotny Date: Fri, 27 Mar 2026 15:47:05 -0700 Subject: [PATCH 3/3] fix null copying --- cwms/timeseries/timeseries.py | 8 ++++++++ pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 368fb0d..3be32b9 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -233,6 +233,14 @@ def combine_timeseries_results(results: List[Data]) -> Data: ) combined_df["date-time"] = combined_df["date-time"].astype("Int64") combined_df = combined_df.reindex(columns=["date-time", "value", "quality-code"]) + + # Replace NaN in value column with None so they serialize as JSON null + # rather than the invalid JSON literal NaN. + combined_df["value"] = ( + combined_df["value"] + .astype(object) + .where(combined_df["value"].notna(), other=None) + ) # Update the "values" key in the JSON to include the combined data combined_json["values"] = combined_df.values.tolist() diff --git a/pyproject.toml b/pyproject.toml index 38d38ba..609b41f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "cwms-python" repository = "https://github.com/HydrologicEngineeringCenter/cwms-python" -version = "1.0.4" +version = "1.0.6" packages = [