diff --git a/cwms/timeseries/timeseries.py b/cwms/timeseries/timeseries.py index 6fdc407..3be32b9 100644 --- a/cwms/timeseries/timeseries.py +++ b/cwms/timeseries/timeseries.py @@ -233,6 +233,14 @@ def combine_timeseries_results(results: List[Data]) -> Data: ) combined_df["date-time"] = combined_df["date-time"].astype("Int64") combined_df = combined_df.reindex(columns=["date-time", "value", "quality-code"]) + + # Replace NaN in value column with None so they serialize as JSON null + # rather than the invalid JSON literal NaN. + combined_df["value"] = ( + combined_df["value"] + .astype(object) + .where(combined_df["value"].notna(), other=None) + ) # Update the "values" key in the JSON to include the combined data combined_json["values"] = combined_df.values.tolist() @@ -438,8 +446,11 @@ def timeseries_df_to_json( pd.Timestamp.isoformat ) df = df.reindex(columns=["date-time", "value", "quality-code"]) - if df.isnull().values.any(): - raise ValueError("Null/NaN data must be removed from the dataframe") + + # Replace NaN/NA/NaT in value column with None so they serialize as JSON + # null rather than the invalid JSON literal NaN. + df["value"] = df["value"].astype(object).where(df["value"].notna(), other=None) + if version_date: version_date_iso = version_date.isoformat() else: diff --git a/pyproject.toml b/pyproject.toml index 64a8fbd..1e59469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ name = "cwms-python" repository = "https://github.com/HydrologicEngineeringCenter/cwms-python" -version = "1.0.5" - +version = "1.0.6" packages = [ { include = "cwms" }, diff --git a/tests/cda/timeseries/timeseries_CDA_test.py b/tests/cda/timeseries/timeseries_CDA_test.py index 2c31aba..f4d056e 100644 --- a/tests/cda/timeseries/timeseries_CDA_test.py +++ b/tests/cda/timeseries/timeseries_CDA_test.py @@ -18,6 +18,8 @@ TEST_TSID_CHUNK_MULTI = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Chunk" TEST_TSID_COPY = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy" TEST_TSID_DELETE = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Delete" +TEST_TSID_CHUNK_NULLS = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Multi-Nulls" +TEST_TSID_COPY_NULLS = f"{TEST_LOCATION_ID}.Stage.Inst.15Minutes.0.Raw-Copy-Nulls" TS_ID_REV_TEST = TEST_TSID_MULTI.replace("Raw-Multi", "Raw-Rev-Test") # Generate 15-minute interval timestamps START_DATE_CHUNK_MULTI = datetime(2025, 7, 31, 0, 0, tzinfo=timezone.utc) @@ -31,6 +33,8 @@ TEST_TSID_STORE, TEST_TSID_CHUNK_MULTI, TEST_TSID_COPY, + TEST_TSID_CHUNK_NULLS, + TEST_TSID_COPY_NULLS, ] @@ -48,6 +52,12 @@ "quality-code": [0] * len(DT_CHUNK_MULTI), } ) +# Create a copy of the original DataFrame and introduce null values +DF_WITH_NULLS = DF_CHUNK_MULTI.copy() +# Set the 100 and 200 index value to null +DF_WITH_NULLS.loc[100, "value"] = None +DF_WITH_NULLS.loc[200, "value"] = None + DF_MULTI_TIMESERIES1 = pd.DataFrame( { @@ -275,6 +285,58 @@ def test_store_timeseries_chunk_ts(): ), f"Data frames do not match: original = {DF_CHUNK_MULTI.describe()}, stored = {df.describe()}" +def test_store_timesereis_chunk_to_with_null_values(): + # Define parameters + ts_id = TEST_TSID_CHUNK_NULLS + office = TEST_OFFICE + units = "m" + + # Convert DataFrame to JSON format + ts_json = ts.timeseries_df_to_json(DF_WITH_NULLS, ts_id, units, office) + + ts.store_timeseries(ts_json, multithread=True) + + data_nulls = ts.get_timeseries( + ts_id=ts_id, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + unit="SI", + ) + df_nulls = data_nulls.df + # make sure the dataframe matches stored dataframe with null values + pdt.assert_frame_equal( + df_nulls, DF_WITH_NULLS + ), f"Data frames do not match: original with nulls = {DF_WITH_NULLS.describe()}, stored = {df_nulls.describe()}" + + +def test_copy_timeseries_chunk_json_with_nulls(): + data_json = ts.get_timeseries( + ts_id=TEST_TSID_CHUNK_NULLS, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + max_days_per_chunk=14, + unit="SI", + ).json + data_json["name"] = TEST_TSID_COPY_NULLS + ts.store_timeseries(data_json) + + data_multithread = ts.get_timeseries( + ts_id=TEST_TSID_COPY_NULLS, + office_id=TEST_OFFICE, + begin=START_DATE_CHUNK_MULTI, + end=END_DATE_CHUNK_MULTI, + max_days_per_chunk=14, + unit="SI", + ) + df = data_multithread.df + # make sure the dataframe matches stored dataframe with null values + pdt.assert_frame_equal( + df, DF_WITH_NULLS + ), f"Data frames do not match: original with nulls = {DF_WITH_NULLS.describe()}, stored = {df.describe()}" + + def test_copy_timeseries_chunk_json(): data_json = ts.get_timeseries( ts_id=TEST_TSID_CHUNK_MULTI,