diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py index fc88f9d4..a6f14723 100644 --- a/dataretrieval/waterdata/api.py +++ b/dataretrieval/waterdata/api.py @@ -165,8 +165,7 @@ def get_daily( if your internet connection is spotty. The default (NA) will set the limit to the maximum allowable limit for the service. convert_type : boolean, optional - If True, the function will convert the data to dates and qualifier to - string vector + If True, converts columns to appropriate types. Returns ------- @@ -475,6 +474,8 @@ def get_monitoring_locations( The returning object will be a data frame with no spatial information. Note that the USGS Water Data APIs use camelCase "skipGeometry" in CQL2 queries. + convert_type : boolean, optional + If True, converts columns to appropriate types. Returns ------- @@ -666,8 +667,7 @@ def get_time_series_metadata( if your internet connection is spotty. The default (None) will set the limit to the maximum allowable limit for the service. convert_type : boolean, optional - If True, the function will convert the data to dates and qualifier to - string vector + If True, converts columns to appropriate types. Returns ------- @@ -842,8 +842,7 @@ def get_latest_continuous( if your internet connection is spotty. The default (None) will set the limit to the maximum allowable limit for the service. convert_type : boolean, optional - If True, the function will convert the data to dates and qualifier to - string vector + If True, converts columns to appropriate types. Returns ------- @@ -1017,8 +1016,7 @@ def get_latest_daily( if your internet connection is spotty. The default (None) will set the limit to the maximum allowable limit for the service. convert_type : boolean, optional - If True, the function will convert the data to dates and qualifier to - string vector + If True, converts columns to appropriate types. Returns ------- @@ -1183,8 +1181,7 @@ def get_field_measurements( if your internet connection is spotty. The default (None) will set the limit to the maximum allowable limit for the service. convert_type : boolean, optional - If True, the function will convert the data to dates and qualifier to - string vector + If True, converts columns to appropriate types. Returns ------- diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 68ae9e13..0f6e8bd9 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -668,32 +668,48 @@ def _arrange_cols( return df.rename(columns={"id": output_id}) -def _cleanup_cols(df: pd.DataFrame, service: str = "daily") -> pd.DataFrame: +def _type_cols(df: pd.DataFrame) -> pd.DataFrame: """ - Cleans and standardizes columns in a pandas DataFrame for water data endpoints. + Casts columns into appropriate types. Parameters ---------- df : pd.DataFrame The input DataFrame containing water data. - service : str, optional - The type of water data service (default is "daily"). Returns ------- pd.DataFrame - The cleaned DataFrame with standardized columns. + The DataFrame with columns cast to appropriate types. - Notes - ----- - - If the 'time' column exists and service is "daily", it is converted to date objects. - - The 'value' and 'contributing_drainage_area' columns are coerced to numeric types. """ - if "time" in df.columns and service == "daily": - df["time"] = pd.to_datetime(df["time"]).dt.date - for col in ["value", "contributing_drainage_area"]: - if col in df.columns: - df[col] = pd.to_numeric(df[col], errors="coerce") + cols = set(df.columns) + numerical_cols = [ + "altitude", + "altitude_accuracy", + "contributing_drainage_area", + "drainage_area", + "hole_constructed_depth", + "value", + "well_constructed_depth", + ] + time_cols = [ + "begin", + "begin_utc", + "construction_date", + "end", + "end_utc", + "datetime", # unused + "last_modified", + "time", + ] + + for col in cols.intersection(time_cols): + df[col] = pd.to_datetime(df[col], errors="coerce") + + for col in cols.intersection(numerical_cols): + df[col] = pd.to_numeric(df[col], errors="coerce") + return df @@ -749,8 +765,10 @@ def get_ogc_data( ) # Manage some aspects of the returned dataset return_list = _deal_with_empty(return_list, properties, service) + if convert_type: - return_list = _cleanup_cols(return_list, service=service) + return_list = _type_cols(return_list) + return_list = _arrange_cols(return_list, properties, output_id) # Create metadata object from response metadata = BaseMetadata(response)