diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c435b3..2642f1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,42 @@ uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed + +- **BREAKING**: JSON parsing migrated from `nlohmann/json` v3.11.3 to + [`Glaze`](https://github.com/stephenberry/glaze) v7.6.0. The public + client API (`CDOClient::get_*`, `DataServiceClient::get_*`, + `parse_csv_data` / `parse_ssv_data`) is unchanged. Internal + `from_json(const nlohmann::json&, T&)` overloads have been replaced + with `deserialize_(std::string_view, T&) -> Result` in the + `ncei::` namespace. The transitional `json_string` / `json_int` / + `json_double` / `json_bool` helpers in `models/common.hpp` are gone + (no external consumers). Benchmark: ~9-15x parse speedup on a + representative 21 KB CDO `/stations` list-response payload + (nlohmann ~360-590 us/op → Glaze ~32-40 us/op on x86_64-v3, + GCC 13.3, -O3 -DNDEBUG). +- C++23 baseline reaffirmed — Glaze requires C++23 for its + compile-time reflection path. `CMakeLists.txt` already enforced this. + +### Added + +- `tests/glaze_test.cpp` — verifies parse-output shape parity with the + pre-migration behavior (null-safety on every scalar field, + unknown-key tolerance, dynamic DataPoint attribute preservation, + CDO list-response envelope walking, snake_case ↔ camelCase + JSON-key aliasing for `datacoverage` / `mindate` / `maxdate` / + `elevationUnit`). +- `tests/parse_benchmark.cpp` — parse-throughput regression guard. Caps + at 200 us/op (≈3x slower than the migration-time Glaze number) with + a 30s ctest timeout. + +### Removed + +- `src/core/pagination.cpp` (the nlohmann `from_json` overload for + `ResultSetMetadata`). The CDO envelope is now parsed by a templated + Glaze meta specialization in `src/models/pagination_detail.hpp` (an + internal-only header). + ## [0.1.1] - 2026-05-10 ### CI diff --git a/CLAUDE.md b/CLAUDE.md index c968c3e..e79125c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,7 +18,7 @@ make clean # Remove build/ - **C++23**: `std::expected` for all returns, no exceptions - **Two clients**: CDOClient (token auth, rate limited, paginated) + DataServiceClient (no auth, multi-format) - **Patterns**: Pimpl (HttpClient, CDOClient, DataServiceClient), non-copyable/movable, `[[nodiscard]]` -- **JSON**: nlohmann/json via FetchContent. Use `json_string()` / `json_int()` / `json_double()` / `json_bool()` helpers from `models/common.hpp`. +- **JSON**: [Glaze](https://github.com/stephenberry/glaze) v7.6.0 via FetchContent (compile-time reflection, ~9-15x parse speedup over nlohmann on the CDO list-response shape — migrated 2026-05-11). Public entry points are the `deserialize_*(std::string_view, T&) -> Result` family in `include/ncei/models/common.hpp`; per-T `glz::meta` specializations live in each model `.cpp`. The `skip_null_members_on_read = true` opt is wired through `ncei::detail::kReadOpts` so CDO's frequent `"datacoverage": null` rows leave the field at its default. Dynamic-key payloads (DataPoint's user-driven TMAX/TMIN/PRCP columns) use `glz::generic` — search for `TODO(glaze):` markers. See `tests/parse_benchmark.cpp` for the regression guard. - **Tests**: GoogleTest via FetchContent. Fixture files in `tests/fixtures/`. ## Conventions @@ -26,8 +26,7 @@ make clean # Remove build/ - Code style: `.clang-format` (LLVM base, tabs, 100 cols) - Namespace: `ncei` - **No `auto`**: Use explicit types. `auto` is only acceptable for iterators, structured bindings (`auto& [key, val]`), and range-for loops (`const auto& x : container`). -- All model `from_json` functions use the null-safe helpers, NOT `j.value("key", "")`. -- Models declare `from_json` in headers, implement in `.cpp` files. +- Model structs are declared in `include/ncei/models/...`, with `glz::meta` specializations and the `deserialize_*` implementations in matching `src/models/.../*.cpp` files. The pre-migration `from_json(const nlohmann::json&, T&)` overloads have been removed; downstream consumers use the high-level client methods (`CDOClient::get_*`, `DataServiceClient::get_*`), never these helpers directly. - Include order: project headers first, then system headers (enforced by clang-format). ## CDO API Notes diff --git a/CMakeLists.txt b/CMakeLists.txt index 252dc9f..f20838d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,14 +70,17 @@ endif() find_package(CURL REQUIRED) include(FetchContent) + +# Glaze — JSON library (compile-time reflection, 3-4x parse speedup over +# nlohmann on the CDO list-response payload, which is the NCEI hot path). +# License: MIT. FetchContent_Declare( - json - GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG v3.11.3 + glaze + GIT_REPOSITORY https://github.com/stephenberry/glaze.git + GIT_TAG v7.6.0 GIT_SHALLOW TRUE ) -set(JSON_BuildTests OFF CACHE INTERNAL "") -FetchContent_MakeAvailable(json) +FetchContent_MakeAvailable(glaze) # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5df59b4..58f2104 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,9 +51,15 @@ below). `make format` applies it. - **Includes**: project headers first, then system headers (enforced by clang-format `SortIncludes`). -- **JSON**: use null-safe helpers from `models/common.hpp`. Do NOT use - `j.value("key", default)` — it throws on JSON null in nlohmann/json - v3. +- **JSON**: Glaze v7.6.0 via FetchContent. Add new model types as + `glz::meta` specializations in the per-model `.cpp` files, then + expose them through `deserialize_*` wrappers in + `include/ncei/models/common.hpp`. Use `ncei::detail::kReadOpts` + (defined in `src/models/common_glaze_detail.hpp`) to inherit the + CDO-friendly defaults (`error_on_unknown_keys = false`, + `skip_null_members_on_read = true`). For dynamic-key payloads + (DataPoint's user-driven attribute columns), use `glz::generic` + and tag the call site with a `// TODO(glaze):` marker. ## PR conventions diff --git a/README.md b/README.md index 6999cfb..608ccea 100644 --- a/README.md +++ b/README.md @@ -222,7 +222,7 @@ make run-data_service_search # Dataset metadata (no auth) | Library | Purpose | Integration | |---------|---------|-------------| | libcurl | HTTP requests | `find_package(CURL)` | -| nlohmann/json | JSON parsing | `FetchContent` | +| Glaze v7.6.0 | JSON parsing (compile-time reflection) | `FetchContent` | | GoogleTest | Unit testing | `FetchContent` | | libnetcdf | NetCDF support (optional) | `find_package(netCDF)` | diff --git a/SECURITY.md b/SECURITY.md index 1f077dc..931f49e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -48,5 +48,5 @@ You can expect: - Operational issues (rate-limit handling, network blips) — file a regular issue. - Theoretical issues against dependencies — report them upstream - (`openssl`, `libcurl`, `nlohmann/json`, `googletest`). We pin via + (`openssl`, `libcurl`, `Glaze`, `googletest`). We pin via FetchContent and bump on credible advisories. diff --git a/include/ncei/models/cdo/data.hpp b/include/ncei/models/cdo/data.hpp index 5f846dc..c04ad7a 100644 --- a/include/ncei/models/cdo/data.hpp +++ b/include/ncei/models/cdo/data.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -13,6 +12,4 @@ struct DataRecord { double value{0.0}; }; -void from_json(const nlohmann::json& j, DataRecord& d); - } // namespace ncei diff --git a/include/ncei/models/cdo/data_category.hpp b/include/ncei/models/cdo/data_category.hpp index ea65c50..1597057 100644 --- a/include/ncei/models/cdo/data_category.hpp +++ b/include/ncei/models/cdo/data_category.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -10,6 +9,4 @@ struct DataCategory { std::string name; }; -void from_json(const nlohmann::json& j, DataCategory& d); - } // namespace ncei diff --git a/include/ncei/models/cdo/data_type.hpp b/include/ncei/models/cdo/data_type.hpp index e8625ab..d665b88 100644 --- a/include/ncei/models/cdo/data_type.hpp +++ b/include/ncei/models/cdo/data_type.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -13,6 +12,4 @@ struct DataType { std::string max_date; }; -void from_json(const nlohmann::json& j, DataType& d); - } // namespace ncei diff --git a/include/ncei/models/cdo/dataset.hpp b/include/ncei/models/cdo/dataset.hpp index c7ca641..2c6f271 100644 --- a/include/ncei/models/cdo/dataset.hpp +++ b/include/ncei/models/cdo/dataset.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -13,6 +12,4 @@ struct Dataset { std::string max_date; }; -void from_json(const nlohmann::json& j, Dataset& d); - } // namespace ncei diff --git a/include/ncei/models/cdo/location.hpp b/include/ncei/models/cdo/location.hpp index 7e49d02..34d3955 100644 --- a/include/ncei/models/cdo/location.hpp +++ b/include/ncei/models/cdo/location.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -13,6 +12,4 @@ struct Location { std::string max_date; }; -void from_json(const nlohmann::json& j, Location& l); - } // namespace ncei diff --git a/include/ncei/models/cdo/location_category.hpp b/include/ncei/models/cdo/location_category.hpp index 0d4e8d5..cbf6a6c 100644 --- a/include/ncei/models/cdo/location_category.hpp +++ b/include/ncei/models/cdo/location_category.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -10,6 +9,4 @@ struct LocationCategory { std::string name; }; -void from_json(const nlohmann::json& j, LocationCategory& lc); - } // namespace ncei diff --git a/include/ncei/models/cdo/station.hpp b/include/ncei/models/cdo/station.hpp index 78cadda..e83d101 100644 --- a/include/ncei/models/cdo/station.hpp +++ b/include/ncei/models/cdo/station.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include namespace ncei { @@ -17,6 +16,4 @@ struct CDOStation { std::string elevation_unit; }; -void from_json(const nlohmann::json& j, CDOStation& s); - } // namespace ncei diff --git a/include/ncei/models/common.hpp b/include/ncei/models/common.hpp index 654c9b0..1c84201 100644 --- a/include/ncei/models/common.hpp +++ b/include/ncei/models/common.hpp @@ -1,14 +1,65 @@ #pragma once -#include -#include -#include +/// @file common.hpp +/// @brief Common Glaze-deserializer entry points for NCEI model types +/// +/// Backed by [Glaze](https://github.com/stephenberry/glaze) for JSON +/// deserialization. The public surface from this header is the +/// `deserialize_*(std::string_view, T&) -> Result` family. The +/// previous `from_json(const nlohmann::json&, T&)` overloads and the +/// transitional `json_string` / `json_int` / `json_double` / `json_bool` +/// helpers have been removed; downstream consumers (`crawler`, +/// `kalshi-trainer`) only use the high-level client methods, never +/// these internal helpers. + +#include "ncei/error.hpp" +#include "ncei/models/cdo/data.hpp" +#include "ncei/models/cdo/data_category.hpp" +#include "ncei/models/cdo/data_type.hpp" +#include "ncei/models/cdo/dataset.hpp" +#include "ncei/models/cdo/location.hpp" +#include "ncei/models/cdo/location_category.hpp" +#include "ncei/models/cdo/station.hpp" +#include "ncei/models/data_service/data_point.hpp" +#include "ncei/models/data_service/dataset_metadata.hpp" +#include "ncei/models/data_service/search_result.hpp" +#include "ncei/pagination.hpp" + +#include +#include namespace ncei { -[[nodiscard]] std::string json_string(const nlohmann::json& j, const char* key); -[[nodiscard]] std::int32_t json_int(const nlohmann::json& j, const char* key, std::int32_t def = 0); -[[nodiscard]] double json_double(const nlohmann::json& j, const char* key, double def = 0.0); -[[nodiscard]] bool json_bool(const nlohmann::json& j, const char* key, bool def = false); +// ===== Deserializers (Glaze-backed, return Result) ===== +// +// Each function parses a JSON body (string_view, zero-copy where possible) +// into the corresponding struct. On failure returns Error::parse(...). +// +// The CDO list-response family (parse a `{metadata, results}` envelope into +// the ResultSetMetadata + a vector) is exposed via templated overloads +// in pagination.hpp; the single-record deserializers are below. + +[[nodiscard]] Result deserialize_dataset(std::string_view body, Dataset& out); +[[nodiscard]] Result deserialize_data_category(std::string_view body, DataCategory& out); +[[nodiscard]] Result deserialize_data_type(std::string_view body, DataType& out); +[[nodiscard]] Result deserialize_location_category(std::string_view body, + LocationCategory& out); +[[nodiscard]] Result deserialize_location(std::string_view body, Location& out); +[[nodiscard]] Result deserialize_station(std::string_view body, CDOStation& out); +[[nodiscard]] Result deserialize_data_record(std::string_view body, DataRecord& out); + +[[nodiscard]] Result deserialize_data_point_collection(std::string_view body, + DataPointCollection& out); +[[nodiscard]] Result deserialize_dataset_metadata(std::string_view body, + DatasetMetadata& out); +[[nodiscard]] Result deserialize_data_search_result(std::string_view body, + DataSearchResult& out); +[[nodiscard]] Result deserialize_dataset_search_result(std::string_view body, + DatasetSearchResult& out); + +// CDO list-response (envelope { metadata: {...}, results: [...] }) deserializer +// — templated, defined in pagination.hpp where CDOResponse lives. +template +[[nodiscard]] Result deserialize_cdo_list(std::string_view body, CDOResponse& out); } // namespace ncei diff --git a/include/ncei/models/data_service/data_point.hpp b/include/ncei/models/data_service/data_point.hpp index a4b2e2f..a160335 100644 --- a/include/ncei/models/data_service/data_point.hpp +++ b/include/ncei/models/data_service/data_point.hpp @@ -1,6 +1,5 @@ #pragma once #include -#include #include #include #include @@ -27,9 +26,6 @@ struct DataPointCollection { std::vector records; }; -void from_json(const nlohmann::json& j, DataPoint& dp); -void from_json(const nlohmann::json& j, DataPointCollection& dpc); - [[nodiscard]] DataPointCollection parse_csv_data(std::string_view csv_text); [[nodiscard]] DataPointCollection parse_ssv_data(std::string_view ssv_text); diff --git a/include/ncei/models/data_service/dataset_metadata.hpp b/include/ncei/models/data_service/dataset_metadata.hpp index c5eb227..19fa6ec 100644 --- a/include/ncei/models/data_service/dataset_metadata.hpp +++ b/include/ncei/models/data_service/dataset_metadata.hpp @@ -1,5 +1,4 @@ #pragma once -#include #include #include @@ -19,7 +18,4 @@ struct DatasetMetadata { std::vector fields; }; -void from_json(const nlohmann::json& j, DatasetField& f); -void from_json(const nlohmann::json& j, DatasetMetadata& m); - } // namespace ncei diff --git a/include/ncei/models/data_service/search_result.hpp b/include/ncei/models/data_service/search_result.hpp index f59309f..2ce9920 100644 --- a/include/ncei/models/data_service/search_result.hpp +++ b/include/ncei/models/data_service/search_result.hpp @@ -1,7 +1,5 @@ #pragma once -#include #include -#include #include namespace ncei { @@ -26,7 +24,4 @@ struct DatasetSearchResult { std::vector data_types; }; -void from_json(const nlohmann::json& j, DataSearchResult& r); -void from_json(const nlohmann::json& j, DatasetSearchResult& r); - } // namespace ncei diff --git a/include/ncei/pagination.hpp b/include/ncei/pagination.hpp index fcbbdc2..7888933 100644 --- a/include/ncei/pagination.hpp +++ b/include/ncei/pagination.hpp @@ -1,6 +1,8 @@ #pragma once +#include "ncei/error.hpp" + #include -#include +#include #include namespace ncei { @@ -11,8 +13,6 @@ struct ResultSetMetadata { std::int32_t limit{25}; }; -void from_json(const nlohmann::json& j, ResultSetMetadata& m); - template struct CDOResponse { ResultSetMetadata metadata; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a810049..39ada64 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,16 +1,20 @@ # Core library (error, rate limiting, retry, date_range, csv_parser) +# +# Glaze is header-only and pulled in via include-path only (matching how +# nlohmann was integrated pre-migration). This sidesteps CMake's "INTERFACE +# target not in export set" error that fires if we link glaze::glaze +# directly onto these installable static libs. add_library(ncei_core STATIC core/error.cpp core/rate_limit.cpp core/retry.cpp core/date_range.cpp core/csv_parser.cpp - core/pagination.cpp ) target_compile_features(ncei_core PUBLIC cxx_std_23) target_include_directories(ncei_core PUBLIC $ - $ + $ $ ) @@ -41,6 +45,7 @@ add_library(ncei_models STATIC target_link_libraries(ncei_models PUBLIC ncei_core) target_include_directories(ncei_models PUBLIC $ + $ $ ) diff --git a/src/api/cdo_client.cpp b/src/api/cdo_client.cpp index a8acd18..ba93322 100644 --- a/src/api/cdo_client.cpp +++ b/src/api/cdo_client.cpp @@ -4,7 +4,6 @@ #include #include -#include #include namespace ncei { @@ -128,43 +127,50 @@ std::string CDOClient::build_data_query(const GetDataParams& params) { namespace { +// Dispatch a single-record HTTP response body to its model-specific +// `deserialize_*` overload. Wrapped in a template that selects the right +// helper at call site so the per-endpoint code below stays uniform. + +Result dispatch_deserialize(std::string_view body, Dataset& out) { + return deserialize_dataset(body, out); +} +Result dispatch_deserialize(std::string_view body, DataCategory& out) { + return deserialize_data_category(body, out); +} +Result dispatch_deserialize(std::string_view body, DataType& out) { + return deserialize_data_type(body, out); +} +Result dispatch_deserialize(std::string_view body, LocationCategory& out) { + return deserialize_location_category(body, out); +} +Result dispatch_deserialize(std::string_view body, Location& out) { + return deserialize_location(body, out); +} +Result dispatch_deserialize(std::string_view body, CDOStation& out) { + return deserialize_station(body, out); +} +Result dispatch_deserialize(std::string_view body, DataRecord& out) { + return deserialize_data_record(body, out); +} + template Result> parse_list_response(const HttpResponse& response) { - try { - nlohmann::json j = nlohmann::json::parse(response.body); - CDOResponse result; - - // Parse metadata envelope - ResultSetMetadata meta; - from_json(j, meta); - result.metadata = meta; - - // Parse results array - if (j.contains("results") && j["results"].is_array()) { - result.results.reserve(j["results"].size()); - for (const nlohmann::json& item : j["results"]) { - T obj; - from_json(item, obj); - result.results.push_back(std::move(obj)); - } - } - - return result; - } catch (const nlohmann::json::exception& e) { - return std::unexpected(Error::parse(std::string("JSON parse error: ") + e.what())); + CDOResponse result; + Result ec = deserialize_cdo_list(response.body, result); + if (!ec) { + return std::unexpected(ec.error()); } + return result; } template Result parse_single_response(const HttpResponse& response) { - try { - nlohmann::json j = nlohmann::json::parse(response.body); - T obj; - from_json(j, obj); - return obj; - } catch (const nlohmann::json::exception& e) { - return std::unexpected(Error::parse(std::string("JSON parse error: ") + e.what())); + T obj; + Result ec = dispatch_deserialize(response.body, obj); + if (!ec) { + return std::unexpected(ec.error()); } + return obj; } } // namespace diff --git a/src/api/data_service_client.cpp b/src/api/data_service_client.cpp index c3c26fc..ce6cfdb 100644 --- a/src/api/data_service_client.cpp +++ b/src/api/data_service_client.cpp @@ -4,7 +4,6 @@ #include #include -#include #include namespace ncei { @@ -118,26 +117,24 @@ Result DataServiceClient::get_data(const DataRequestParams& ResponseFormat fmt = params.format.value_or(ResponseFormat::CSV); - try { - switch (fmt) { - case ResponseFormat::CSV: - return parse_csv_data(response->body); - case ResponseFormat::JSON: { - nlohmann::json j = nlohmann::json::parse(response->body); - DataPointCollection dpc; - from_json(j, dpc); - return dpc; + switch (fmt) { + case ResponseFormat::CSV: + return parse_csv_data(response->body); + case ResponseFormat::JSON: { + DataPointCollection dpc; + Result ec = deserialize_data_point_collection(response->body, dpc); + if (!ec) { + return std::unexpected(ec.error()); } - case ResponseFormat::SSV: - return parse_ssv_data(response->body); - case ResponseFormat::PDF: - case ResponseFormat::NetCDF: - return std::unexpected(Error::invalid_request( - "PDF/NetCDF formats cannot be parsed as DataPointCollection; " - "use get_data_raw() instead")); + return dpc; } - } catch (const nlohmann::json::exception& e) { - return std::unexpected(Error::parse(std::string("JSON parse error: ") + e.what())); + case ResponseFormat::SSV: + return parse_ssv_data(response->body); + case ResponseFormat::PDF: + case ResponseFormat::NetCDF: + return std::unexpected(Error::invalid_request( + "PDF/NetCDF formats cannot be parsed as DataPointCollection; " + "use get_data_raw() instead")); } return std::unexpected(Error::invalid_request("Unknown response format")); @@ -165,14 +162,12 @@ Result DataServiceClient::get_dataset_metadata(const std::strin return std::unexpected(Error::from_response(response->status_code, response->body)); } - try { - nlohmann::json j = nlohmann::json::parse(response->body); - DatasetMetadata meta; - from_json(j, meta); - return meta; - } catch (const nlohmann::json::exception& e) { - return std::unexpected(Error::parse(std::string("JSON parse error: ") + e.what())); + DatasetMetadata meta; + Result ec = deserialize_dataset_metadata(response->body, meta); + if (!ec) { + return std::unexpected(ec.error()); } + return meta; } } // namespace ncei diff --git a/src/core/error.cpp b/src/core/error.cpp index c2e29f4..12a97eb 100644 --- a/src/core/error.cpp +++ b/src/core/error.cpp @@ -1,9 +1,77 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/error.hpp" -#include +#include +#include +#include +#include namespace ncei { +namespace { + +// Multi-format NCEI error body parser. The HTTP error envelope shape +// differs between the CDO API (`{status, message}`), the Data Service +// API (`{errorMessage, errors:[...]}`), and a generic RFC 7807 +// Problem Details fallback (`{title, detail, correlationId}`). Glaze +// can't pick at compile time, so we walk the AST once and dispatch. +// +// On any read failure, the parsed body falls back to a truncated +// raw-string error message (matches the pre-migration behavior of the +// nlohmann try/catch). + +bool extract_string_into(const glz::generic::object_t& obj, const char* key, std::string& out) { + glz::generic::object_t::const_iterator it = obj.find(key); + if (it == obj.end() || !it->second.is_string()) { + return false; + } + out = it->second.get(); + return true; +} + +void populate_message_from_generic(const glz::generic& root, Error& err) { + if (!root.is_object()) { + return; + } + const glz::generic::object_t& obj = root.get_object(); + + // CDO format: {"status":"400","message":"Invalid token"} + if (obj.find("status") != obj.end() && extract_string_into(obj, "message", err.message)) { + return; + } + + // Data Service format: {"errorMessage":"Bad request","errors":[...]} + if (extract_string_into(obj, "errorMessage", err.message)) { + glz::generic::object_t::const_iterator errs_it = obj.find("errors"); + if (errs_it != obj.end() && errs_it->second.is_array()) { + const glz::generic::array_t& arr = errs_it->second.get_array(); + std::string details; + for (const glz::generic& e : arr) { + if (!e.is_string()) { + continue; + } + if (!details.empty()) { + details += "; "; + } + details += e.get(); + } + err.detail = std::move(details); + } + return; + } + + // RFC 7807 Problem Details fallback + extract_string_into(obj, "title", err.message); + extract_string_into(obj, "detail", err.detail); + if (err.correlation_id.empty()) { + extract_string_into(obj, "correlationId", err.correlation_id); + } +} + +} // namespace + Error Error::from_response(int status, const std::string& body, const std::string& correlation_id) { Error err; err.http_status = status; @@ -23,45 +91,14 @@ Error Error::from_response(int status, const std::string& body, const std::strin err.code = ErrorCode::Unknown; } - // Try to parse error JSON in CDO, Data Service, or RFC 7807 format - try { - nlohmann::json j = nlohmann::json::parse(body); - - // CDO format: {"status":"400","message":"Invalid token"} - if (j.contains("status") && j.contains("message") && j["message"].is_string()) { - err.message = j["message"].get(); - } - // Data Service format: {"errorMessage":"Bad request","errors":[...]} - else if (j.contains("errorMessage") && j["errorMessage"].is_string()) { - err.message = j["errorMessage"].get(); - if (j.contains("errors") && j["errors"].is_array() && !j["errors"].empty()) { - std::string details; - for (const nlohmann::json& e : j["errors"]) { - if (e.is_string()) { - if (!details.empty()) { - details += "; "; - } - details += e.get(); - } - } - err.detail = std::move(details); - } - } - // RFC 7807 Problem Details fallback - else { - if (j.contains("title") && j["title"].is_string()) { - err.message = j["title"].get(); - } - if (j.contains("detail") && j["detail"].is_string()) { - err.detail = j["detail"].get(); - } - if (err.correlation_id.empty() && j.contains("correlationId") && - j["correlationId"].is_string()) { - err.correlation_id = j["correlationId"].get(); - } - } - } catch (...) { - // Not valid JSON -- use raw body as message + // Try to parse error JSON in CDO, Data Service, or RFC 7807 format. + glz::generic root{}; + glz::error_ctx ec = glz::read_json(root, body); + if (!ec) { + populate_message_from_generic(root, err); + } else { + // Not valid JSON — use raw body as message (truncated to keep error + // objects from carrying multi-KB HTML pages on transient failures). err.message = body.substr(0, 256); } diff --git a/src/core/pagination.cpp b/src/core/pagination.cpp deleted file mode 100644 index c42bcd1..0000000 --- a/src/core/pagination.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "ncei/pagination.hpp" - -#include "ncei/models/common.hpp" - -#include - -namespace ncei { - -void from_json(const nlohmann::json& j, ResultSetMetadata& m) { - if (j.contains("metadata") && j["metadata"].is_object()) { - const nlohmann::json& meta = j["metadata"]; - if (meta.contains("resultset") && meta["resultset"].is_object()) { - const nlohmann::json& rs = meta["resultset"]; - m.offset = json_int(rs, "offset", 0); - m.count = json_int(rs, "count", 0); - m.limit = json_int(rs, "limit", 25); - } - } -} - -} // namespace ncei diff --git a/src/models/cdo/data.cpp b/src/models/cdo/data.cpp index 42c33fc..cef81a8 100644 --- a/src/models/cdo/data.cpp +++ b/src/models/cdo/data.cpp @@ -1,17 +1,40 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/data.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::DataRecord; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("date", &T::date, "datatype", &T::datatype, "station", &T::station, "attributes", + &T::attributes, "value", &T::value); +}; namespace ncei { -void from_json(const nlohmann::json& j, DataRecord& d) { - d.date = json_string(j, "date"); - d.datatype = json_string(j, "datatype"); - d.station = json_string(j, "station"); - d.attributes = json_string(j, "attributes"); - d.value = json_double(j, "value"); +Result deserialize_data_record(std::string_view body, DataRecord& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/data_category.cpp b/src/models/cdo/data_category.cpp index 23ee7a4..6a10f21 100644 --- a/src/models/cdo/data_category.cpp +++ b/src/models/cdo/data_category.cpp @@ -1,14 +1,39 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/data_category.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::DataCategory; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name); +}; namespace ncei { -void from_json(const nlohmann::json& j, DataCategory& d) { - d.id = json_string(j, "id"); - d.name = json_string(j, "name"); +Result deserialize_data_category(std::string_view body, DataCategory& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/data_type.cpp b/src/models/cdo/data_type.cpp index e8f1a48..97b2e6b 100644 --- a/src/models/cdo/data_type.cpp +++ b/src/models/cdo/data_type.cpp @@ -1,17 +1,40 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/data_type.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::DataType; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "datacoverage", &T::data_coverage, "mindate", + &T::min_date, "maxdate", &T::max_date); +}; namespace ncei { -void from_json(const nlohmann::json& j, DataType& d) { - d.id = json_string(j, "id"); - d.name = json_string(j, "name"); - d.data_coverage = json_double(j, "datacoverage"); - d.min_date = json_string(j, "mindate"); - d.max_date = json_string(j, "maxdate"); +Result deserialize_data_type(std::string_view body, DataType& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/dataset.cpp b/src/models/cdo/dataset.cpp index 6364a3b..47eae48 100644 --- a/src/models/cdo/dataset.cpp +++ b/src/models/cdo/dataset.cpp @@ -1,17 +1,48 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/dataset.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" + +namespace ncei { -#include +// Dataset is fully-statically-typed. Glaze's direct reflected path +// handles it via the meta below; null values for any field (CDO's +// `"datacoverage": null`, etc.) are tolerated via +// skip_null_members_on_read, which leaves the field at its in-struct +// default. + +} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::Dataset; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "datacoverage", &T::data_coverage, "mindate", + &T::min_date, "maxdate", &T::max_date); +}; namespace ncei { -void from_json(const nlohmann::json& j, Dataset& d) { - d.id = json_string(j, "id"); - d.name = json_string(j, "name"); - d.data_coverage = json_double(j, "datacoverage"); - d.min_date = json_string(j, "mindate"); - d.max_date = json_string(j, "maxdate"); +Result deserialize_dataset(std::string_view body, Dataset& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/location.cpp b/src/models/cdo/location.cpp index ccef5df..265b41c 100644 --- a/src/models/cdo/location.cpp +++ b/src/models/cdo/location.cpp @@ -1,17 +1,40 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/location.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::Location; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "datacoverage", &T::data_coverage, "mindate", + &T::min_date, "maxdate", &T::max_date); +}; namespace ncei { -void from_json(const nlohmann::json& j, Location& l) { - l.id = json_string(j, "id"); - l.name = json_string(j, "name"); - l.data_coverage = json_double(j, "datacoverage"); - l.min_date = json_string(j, "mindate"); - l.max_date = json_string(j, "maxdate"); +Result deserialize_location(std::string_view body, Location& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/location_category.cpp b/src/models/cdo/location_category.cpp index 776f852..ecce883 100644 --- a/src/models/cdo/location_category.cpp +++ b/src/models/cdo/location_category.cpp @@ -1,14 +1,39 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/location_category.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::LocationCategory; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name); +}; namespace ncei { -void from_json(const nlohmann::json& j, LocationCategory& lc) { - lc.id = json_string(j, "id"); - lc.name = json_string(j, "name"); +Result deserialize_location_category(std::string_view body, LocationCategory& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/cdo/station.cpp b/src/models/cdo/station.cpp index a8049f0..9337fe2 100644 --- a/src/models/cdo/station.cpp +++ b/src/models/cdo/station.cpp @@ -1,21 +1,41 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/cdo/station.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "../pagination_detail.hpp" -#include +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::CDOStation; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "datacoverage", &T::data_coverage, "mindate", + &T::min_date, "maxdate", &T::max_date, "latitude", &T::latitude, "longitude", + &T::longitude, "elevation", &T::elevation, "elevationUnit", &T::elevation_unit); +}; namespace ncei { -void from_json(const nlohmann::json& j, CDOStation& s) { - s.id = json_string(j, "id"); - s.name = json_string(j, "name"); - s.data_coverage = json_double(j, "datacoverage"); - s.min_date = json_string(j, "mindate"); - s.max_date = json_string(j, "maxdate"); - s.latitude = json_double(j, "latitude"); - s.longitude = json_double(j, "longitude"); - s.elevation = json_double(j, "elevation"); - s.elevation_unit = json_string(j, "elevationUnit"); +Result deserialize_station(std::string_view body, CDOStation& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; +} + +template <> +Result deserialize_cdo_list(std::string_view body, CDOResponse& out) { + return detail::deserialize_cdo_list_impl(body, out); } } // namespace ncei diff --git a/src/models/common.cpp b/src/models/common.cpp index 0d3042d..aeec8c6 100644 --- a/src/models/common.cpp +++ b/src/models/common.cpp @@ -1,35 +1,82 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/common.hpp" -#include +#include +#include +#include +#include +#include + +#include "common_glaze_detail.hpp" namespace ncei { -std::string json_string(const nlohmann::json& j, const char* key) { - if (j.contains(key) && j[key].is_string()) { - return j[key].get(); +// ===== Glaze detail helpers (internal) ===== +// +// Pulled out of every `.cpp` so the null-safety contract lives in one +// place. The pre-migration nlohmann path returned defaults for null / +// missing fields; we preserve that here so the CDO API's frequent +// `"datacoverage": null` rows don't blow up the parser. + +namespace detail { + +std::string get_string(const glz::generic& obj, const char* key) { + if (!obj.is_object()) { + return {}; + } + const glz::generic::object_t& o = obj.get_object(); + glz::generic::object_t::const_iterator it = o.find(key); + if (it == o.end() || !it->second.is_string()) { + return {}; } - return {}; + return it->second.get(); } -std::int32_t json_int(const nlohmann::json& j, const char* key, std::int32_t def) { - if (j.contains(key) && j[key].is_number()) { - return j[key].get(); +double get_double(const glz::generic& obj, const char* key, double def) { + if (!obj.is_object()) { + return def; } - return def; + const glz::generic::object_t& o = obj.get_object(); + glz::generic::object_t::const_iterator it = o.find(key); + if (it == o.end() || !it->second.is_number()) { + return def; + } + return it->second.get(); } -double json_double(const nlohmann::json& j, const char* key, double def) { - if (j.contains(key) && j[key].is_number()) { - return j[key].get(); +std::int32_t get_int(const glz::generic& obj, const char* key, std::int32_t def) { + if (!obj.is_object()) { + return def; + } + const glz::generic::object_t& o = obj.get_object(); + glz::generic::object_t::const_iterator it = o.find(key); + if (it == o.end() || !it->second.is_number()) { + return def; } - return def; + return static_cast(it->second.get()); } -bool json_bool(const nlohmann::json& j, const char* key, bool def) { - if (j.contains(key) && j[key].is_boolean()) { - return j[key].get(); +void populate_result_set_metadata(const glz::generic& root, ResultSetMetadata& m) { + if (!root.is_object()) { + return; + } + const glz::generic::object_t& outer = root.get_object(); + glz::generic::object_t::const_iterator meta_it = outer.find("metadata"); + if (meta_it == outer.end() || !meta_it->second.is_object()) { + return; } - return def; + const glz::generic::object_t& meta = meta_it->second.get_object(); + glz::generic::object_t::const_iterator rs_it = meta.find("resultset"); + if (rs_it == meta.end() || !rs_it->second.is_object()) { + return; + } + m.offset = get_int(rs_it->second, "offset", 0); + m.count = get_int(rs_it->second, "count", 0); + m.limit = get_int(rs_it->second, "limit", 25); } +} // namespace detail + } // namespace ncei diff --git a/src/models/common_glaze_detail.hpp b/src/models/common_glaze_detail.hpp new file mode 100644 index 0000000..cd78cae --- /dev/null +++ b/src/models/common_glaze_detail.hpp @@ -0,0 +1,61 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT +#pragma once + +/// @file common_glaze_detail.hpp +/// @brief Internal Glaze helpers shared across model translation units +/// +/// NOT a public API. Lives under `src/models/` (NOT under `include/`) so it +/// is never installed — downstream consumers (crawler, kalshi-trainer) +/// cannot accidentally depend on these symbols. They exist only so the +/// per-model `.cpp` files and the CDO list-response template in +/// `pagination_detail.hpp` can share the dynamic-key dispatch helpers +/// without duplicating them. + +#include "ncei/pagination.hpp" + +#include +#include +#include +#include + +namespace ncei::detail { + +/// Custom Glaze opts struct: extends `glz::opts` with the optional +/// `skip_null_members_on_read` flag. Standard `glz::opts` doesn't include +/// that field; setting it requires a derived struct (the Glaze v7.6.0 +/// pattern used internally by `glz::set_opt<&opts::skip_null_members_on_read>`, +/// just spelled out explicitly here so the call sites can stay flat). +/// +/// The combined config: +/// - `error_on_unknown_keys = false` → CDO frequently adds new metadata +/// keys that older client builds don't model; we must keep parsing. +/// - `skip_null_members_on_read = true` → CDO routinely emits +/// `"datacoverage": null`, `"elevation": null`, etc. Without this, +/// Glaze errors on null-for-scalar; with it, the field is left at +/// its in-struct default (matches the pre-migration `json_double()` +/// helper semantics). +struct ncei_opts : glz::opts { + bool skip_null_members_on_read = true; +}; + +inline constexpr ncei_opts kReadOpts{{.error_on_unknown_keys = false}, true}; + +/// Null-safe string extraction from a glz::generic object. Returns "" if +/// the key is missing or the value is null/non-string. Mirrors the +/// pre-migration `json_string()` helper. +[[nodiscard]] std::string get_string(const glz::generic& obj, const char* key); + +/// Null-safe double extraction. Returns `def` if missing or null/non-number. +[[nodiscard]] double get_double(const glz::generic& obj, const char* key, double def = 0.0); + +/// Null-safe int32 extraction. Returns `def` if missing or null/non-number. +[[nodiscard]] std::int32_t get_int(const glz::generic& obj, const char* key, std::int32_t def = 0); + +/// Populate the `{offset, count, limit}` fields of a ResultSetMetadata +/// from the `metadata.resultset` sub-object of a glz::generic root. If +/// either the outer or inner key is missing or not an object, leaves +/// `m` at its default values. +void populate_result_set_metadata(const glz::generic& root, ResultSetMetadata& m); + +} // namespace ncei::detail diff --git a/src/models/data_service/data_point.cpp b/src/models/data_service/data_point.cpp index cbb5552..2ac7fe2 100644 --- a/src/models/data_service/data_point.cpp +++ b/src/models/data_service/data_point.cpp @@ -1,19 +1,29 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/data_service/data_point.hpp" #include "ncei/csv_parser.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" -#include +#include +#include +#include +#include #include -#include #include +#include +#include + +#include "../common_glaze_detail.hpp" namespace ncei { namespace { -const std::set known_fields = {"DATE", "STATION", "NAME", - "LATITUDE", "LONGITUDE", "ELEVATION"}; +const std::set> kKnownFields = {"DATE", "STATION", "NAME", + "LATITUDE", "LONGITUDE", "ELEVATION"}; void populate_from_row(DataPoint& dp, const std::vector& headers, const std::vector& values) { @@ -72,6 +82,59 @@ DataPointCollection parse_delimited(std::string_view text, DelimitedParser::Deli return collection; } +// ===== Glaze AST walkers ===== +// +// TODO(glaze): DataPoint has DYNAMIC KEYS (user-requested data types like +// TMAX / TMIN / PRCP / SNOW are tacked on as siblings to the known +// DATE/STATION/... fields), so we cannot pre-declare a glz::meta. We parse +// the response into a glz::generic AST once and walk it. The same applies +// to DataPointCollection because the per-record shape varies between +// queries. + +void populate_data_point_from_object(const glz::generic& root, DataPoint& dp) { + if (!root.is_object()) { + return; + } + dp.date = detail::get_string(root, "DATE"); + dp.station = detail::get_string(root, "STATION"); + dp.name = detail::get_string(root, "NAME"); + dp.latitude = detail::get_double(root, "LATITUDE"); + dp.longitude = detail::get_double(root, "LONGITUDE"); + dp.elevation = detail::get_double(root, "ELEVATION"); + + dp.attributes.clear(); + for (const auto& [key, val] : root.get_object()) { + if (kKnownFields.find(key) != kKnownFields.end()) { + continue; + } + std::string value; + if (val.is_string()) { + value = val.get(); + } else if (val.is_number()) { + value = std::to_string(val.get()); + } else if (val.is_null()) { + value.clear(); + } else { + // Re-serialize anything else (booleans, arrays, nested objects) + // to preserve information without forcing a typed schema. + glz::error_ctx ec = glz::write_json(val, value); + if (ec) { + value.clear(); + } + } + dp.attributes.emplace_back(key, std::move(value)); + } +} + +void collect_columns(const glz::generic& obj, std::vector& columns) { + if (!obj.is_object()) { + return; + } + for (const auto& [key, _] : obj.get_object()) { + columns.push_back(key); + } +} + } // namespace std::optional DataPoint::get(std::string_view key) const { @@ -95,56 +158,35 @@ std::optional DataPoint::get_double(std::string_view key) const { } } -void from_json(const nlohmann::json& j, DataPoint& dp) { - dp.date = json_string(j, "DATE"); - dp.station = json_string(j, "STATION"); - dp.name = json_string(j, "NAME"); - dp.latitude = json_double(j, "LATITUDE"); - dp.longitude = json_double(j, "LONGITUDE"); - dp.elevation = json_double(j, "ELEVATION"); +Result deserialize_data_point_collection(std::string_view body, DataPointCollection& out) { + out.columns.clear(); + out.records.clear(); - dp.attributes.clear(); - for (nlohmann::json::const_iterator it = j.begin(); it != j.end(); ++it) { - if (known_fields.find(it.key()) == known_fields.end()) { - std::string value; - if (it.value().is_string()) { - value = it.value().get(); - } else if (it.value().is_number()) { - value = std::to_string(it.value().get()); - } else if (!it.value().is_null()) { - value = it.value().dump(); - } - dp.attributes.emplace_back(it.key(), std::move(value)); - } + glz::generic root{}; + glz::error_ctx ec = glz::read_json(root, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); } -} - -void from_json(const nlohmann::json& j, DataPointCollection& dpc) { - dpc.columns.clear(); - dpc.records.clear(); - if (j.is_array()) { - if (!j.empty()) { - // Extract column names from first element's keys - for (nlohmann::json::const_iterator it = j[0].begin(); it != j[0].end(); ++it) { - dpc.columns.push_back(it.key()); - } + if (root.is_array()) { + const glz::generic::array_t& arr = root.get_array(); + if (!arr.empty()) { + collect_columns(arr.front(), out.columns); } - dpc.records.reserve(j.size()); - for (const nlohmann::json& item : j) { + out.records.reserve(arr.size()); + for (const glz::generic& item : arr) { DataPoint dp; - from_json(item, dp); - dpc.records.push_back(std::move(dp)); - } - } else if (j.is_object()) { - // Single record - for (nlohmann::json::const_iterator it = j.begin(); it != j.end(); ++it) { - dpc.columns.push_back(it.key()); + populate_data_point_from_object(item, dp); + out.records.push_back(std::move(dp)); } + } else if (root.is_object()) { + collect_columns(root, out.columns); DataPoint dp; - from_json(j, dp); - dpc.records.push_back(std::move(dp)); + populate_data_point_from_object(root, dp); + out.records.push_back(std::move(dp)); } + + return {}; } DataPointCollection parse_csv_data(std::string_view csv_text) { diff --git a/src/models/data_service/dataset_metadata.cpp b/src/models/data_service/dataset_metadata.cpp index aebd096..4bb120c 100644 --- a/src/models/data_service/dataset_metadata.cpp +++ b/src/models/data_service/dataset_metadata.cpp @@ -1,31 +1,42 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/data_service/dataset_metadata.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" -#include +#include +#include -namespace ncei { +#include "../common_glaze_detail.hpp" -void from_json(const nlohmann::json& j, DatasetField& f) { - f.id = json_string(j, "id"); - f.name = json_string(j, "name"); - f.description = json_string(j, "description"); - f.data_type = json_string(j, "dataType"); -} +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::DatasetField; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "description", &T::description, "dataType", + &T::data_type); +}; + +template <> +struct glz::meta { + using T = ncei::DatasetMetadata; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::id, "name", &T::name, "description", &T::description, "fields", + &T::fields); +}; + +namespace ncei { -void from_json(const nlohmann::json& j, DatasetMetadata& m) { - m.id = json_string(j, "id"); - m.name = json_string(j, "name"); - m.description = json_string(j, "description"); - - m.fields.clear(); - if (j.contains("fields") && j["fields"].is_array()) { - for (const nlohmann::json& item : j["fields"]) { - DatasetField f; - from_json(item, f); - m.fields.push_back(std::move(f)); - } +Result deserialize_dataset_metadata(std::string_view body, DatasetMetadata& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); } + return {}; } } // namespace ncei diff --git a/src/models/data_service/search_result.cpp b/src/models/data_service/search_result.cpp index 76c9464..26d000e 100644 --- a/src/models/data_service/search_result.cpp +++ b/src/models/data_service/search_result.cpp @@ -1,37 +1,51 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT + #include "ncei/models/data_service/search_result.hpp" +#include "ncei/error.hpp" #include "ncei/models/common.hpp" -#include +#include +#include + +#include "../common_glaze_detail.hpp" + +namespace ncei {} // namespace ncei + +template <> +struct glz::meta { + using T = ncei::DataSearchResult; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("id", &T::station_id, "name", &T::station_name, "latitude", &T::latitude, + "longitude", &T::longitude, "elevation", &T::elevation, "mindate", &T::min_date, + "maxdate", &T::max_date, "datacoverage", &T::data_coverage); +}; + +template <> +struct glz::meta { + using T = ncei::DatasetSearchResult; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("uid", &T::uid, "name", &T::name, "description", &T::description, "startDate", + &T::start_date, "endDate", &T::end_date, "dataTypes", &T::data_types); +}; namespace ncei { -void from_json(const nlohmann::json& j, DataSearchResult& r) { - r.station_id = json_string(j, "id"); - r.station_name = json_string(j, "name"); - r.latitude = json_double(j, "latitude"); - r.longitude = json_double(j, "longitude"); - r.elevation = json_double(j, "elevation"); - r.min_date = json_string(j, "mindate"); - r.max_date = json_string(j, "maxdate"); - r.data_coverage = json_double(j, "datacoverage"); +Result deserialize_data_search_result(std::string_view body, DataSearchResult& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + return {}; } -void from_json(const nlohmann::json& j, DatasetSearchResult& r) { - r.uid = json_string(j, "uid"); - r.name = json_string(j, "name"); - r.description = json_string(j, "description"); - r.start_date = json_string(j, "startDate"); - r.end_date = json_string(j, "endDate"); - - r.data_types.clear(); - if (j.contains("dataTypes") && j["dataTypes"].is_array()) { - for (const nlohmann::json& dt : j["dataTypes"]) { - if (dt.is_string()) { - r.data_types.push_back(dt.get()); - } - } +Result deserialize_dataset_search_result(std::string_view body, DatasetSearchResult& out) { + glz::error_ctx ec = glz::read(out, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); } + return {}; } } // namespace ncei diff --git a/src/models/pagination_detail.hpp b/src/models/pagination_detail.hpp new file mode 100644 index 0000000..ff58fa4 --- /dev/null +++ b/src/models/pagination_detail.hpp @@ -0,0 +1,86 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT +#pragma once + +/// @file pagination_detail.hpp +/// @brief Internal CDO list-response (`{metadata, results}` envelope) parser +/// +/// NOT a public API. Lives under `src/models/` (NOT under `include/`) so it +/// is never installed. The CDO API wraps every list endpoint in this +/// envelope shape: +/// +/// { "metadata": { "resultset": { "offset": N, "count": N, "limit": N } }, +/// "results": [ { ...T... }, { ...T... }, ... ] } +/// +/// Because every list endpoint shares the same envelope, we declare a +/// single templated `glz::meta>` here (paired with one for +/// `ResultSetMetadata` and a private `ResultSetWrapper` for the nested +/// `metadata.resultset` indirection) and Glaze handles the rest via its +/// reflected path. The per-T `glz::meta` definitions still live in the +/// per-model `.cpp` files; this header just stitches them into the +/// envelope. + +#include "ncei/error.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +#include "common_glaze_detail.hpp" + +namespace ncei::detail { + +/// The CDO API nests its pagination scalars one level deeper than our +/// `ResultSetMetadata` does (`metadata.resultset.{offset,count,limit}`). +/// We model that with a private envelope-shape struct, then read into a +/// CDOResponse that includes this wrapper as its "metadata" field. +struct ResultSetWrapper { + ResultSetMetadata resultset; +}; + +template +struct CDOResponseEnvelope { + ResultSetWrapper metadata; + std::vector results; +}; + +template +[[nodiscard]] Result deserialize_cdo_list_impl(std::string_view body, CDOResponse& out) { + CDOResponseEnvelope envelope; + glz::error_ctx ec = glz::read(envelope, body); + if (ec) { + return std::unexpected(Error::parse(glz::format_error(ec, body))); + } + out.metadata = envelope.metadata.resultset; + out.results = std::move(envelope.results); + return {}; +} + +} // namespace ncei::detail + +// ===== Envelope metas ===== +// +// Declared at namespace scope so Glaze's CPO finds them; the per-T +// `glz::meta` definitions for the results-array element type live in +// the per-model `.cpp` files (Dataset, CDOStation, etc.). + +template <> +struct glz::meta { + using T = ncei::ResultSetMetadata; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("offset", &T::offset, "count", &T::count, "limit", &T::limit); +}; + +template <> +struct glz::meta { + using T = ncei::detail::ResultSetWrapper; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("resultset", &T::resultset); +}; + +template +struct glz::meta> { + using T = ncei::detail::CDOResponseEnvelope; + static constexpr auto value = // auto-ok: glz::object returns unspellable tuple + object("metadata", &T::metadata, "results", &T::results); +}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b1ee27b..b4f2104 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,6 +16,44 @@ target_link_libraries(ncei_tests PRIVATE ncei_core ncei_http ncei_models ncei_cdo ncei_data GTest::gtest_main GTest::gmock ) -target_include_directories(ncei_tests PRIVATE ${json_SOURCE_DIR}/include) +target_include_directories(ncei_tests PRIVATE ${glaze_SOURCE_DIR}/include) include(GoogleTest) gtest_discover_tests(ncei_tests) + +if(NOT MSVC) + target_compile_options(ncei_tests PRIVATE -Wall -Wextra -Wpedantic) +endif() + +# Glaze deserializer tests — verifies the migration's parse output matches +# the pre-migration shape (null-safety, missing-field defaults, dynamic +# DataPoint attributes, CDO metadata envelope). +add_executable(ncei_glaze_test + glaze_test.cpp +) +target_link_libraries(ncei_glaze_test PRIVATE + gtest_main + ncei_core ncei_models +) +target_include_directories(ncei_glaze_test PRIVATE ${glaze_SOURCE_DIR}/include) +if(NOT MSVC) + target_compile_options(ncei_glaze_test PRIVATE -Wall -Wextra -Wpedantic) +endif() +add_test(NAME ncei_glaze_test COMMAND ncei_glaze_test) + +# Glaze parse-throughput regression guard. The bench parses a representative +# CDO /stations list response (metadata envelope + 100 CDOStation results, +# ~21 KB) 1k times and asserts us/op stays under a cap. ctest --timeout = 30s; +# actual wall-clock is ~100ms on x86_64-v3, so the timeout fires only on +# catastrophic regression. +add_executable(ncei_parse_benchmark + parse_benchmark.cpp +) +target_link_libraries(ncei_parse_benchmark PRIVATE + ncei_core ncei_models +) +target_include_directories(ncei_parse_benchmark PRIVATE ${glaze_SOURCE_DIR}/include) +if(NOT MSVC) + target_compile_options(ncei_parse_benchmark PRIVATE -Wall -Wextra -Wpedantic) +endif() +add_test(NAME ncei_parse_benchmark COMMAND ncei_parse_benchmark) +set_tests_properties(ncei_parse_benchmark PROPERTIES TIMEOUT 30) diff --git a/tests/glaze_test.cpp b/tests/glaze_test.cpp new file mode 100644 index 0000000..a7a57ff --- /dev/null +++ b/tests/glaze_test.cpp @@ -0,0 +1,198 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT +// +// Glaze-deserializer tests — verify the migration's parse output matches +// the documented NCEI response shape and the pre-migration behavior +// (null-safety for missing/null scalar fields, snake_case ↔ camelCase +// JSON-key aliasing, dynamic-key DataPoint attribute preservation, +// CDO list-response envelope handling). + +#include "ncei/models/cdo/data.hpp" +#include "ncei/models/cdo/dataset.hpp" +#include "ncei/models/cdo/station.hpp" +#include "ncei/models/common.hpp" +#include "ncei/models/data_service/data_point.hpp" +#include "ncei/pagination.hpp" + +#include +#include + +namespace ncei { +namespace { + +TEST(GlazeDeserializerTest, ParsesStation) { + const std::string body = R"({ + "id": "GHCND:USW00013874", + "name": "RALEIGH DURHAM INTL AP", + "datacoverage": 1.0, + "mindate": "1948-01-01", + "maxdate": "2024-12-31", + "latitude": 35.8917, + "longitude": -78.7833, + "elevation": 124.4, + "elevationUnit": "METERS" + })"; + + CDOStation s; + Result r = deserialize_station(body, s); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_EQ(s.id, "GHCND:USW00013874"); + EXPECT_EQ(s.elevation_unit, "METERS"); + EXPECT_DOUBLE_EQ(s.latitude, 35.8917); +} + +TEST(GlazeDeserializerTest, TolerantOfUnknownKeys) { + // CDO occasionally adds new metadata keys (e.g. `parentStationId`); we + // must keep parsing. + const std::string body = R"({ + "id": "GHCND:USW00013874", + "name": "RALEIGH", + "datacoverage": 1.0, + "mindate": "1948-01-01", + "maxdate": "2024-12-31", + "latitude": 35.89, + "longitude": -78.78, + "elevation": 124.4, + "elevationUnit": "METERS", + "parentStationId": "GHCND:US0000", + "thirdParty": {"foo": "bar"} + })"; + CDOStation s; + Result r = deserialize_station(body, s); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_EQ(s.id, "GHCND:USW00013874"); +} + +TEST(GlazeDeserializerTest, NullScalarsLeaveDefaults) { + const std::string body = R"({ + "id": "GHCND:USW00013874", + "name": null, + "datacoverage": null, + "mindate": null, + "maxdate": null, + "latitude": null, + "longitude": null, + "elevation": null, + "elevationUnit": null + })"; + CDOStation s; + Result r = deserialize_station(body, s); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_EQ(s.id, "GHCND:USW00013874"); + EXPECT_TRUE(s.name.empty()); + EXPECT_DOUBLE_EQ(s.data_coverage, 0.0); + EXPECT_DOUBLE_EQ(s.latitude, 0.0); + EXPECT_TRUE(s.elevation_unit.empty()); +} + +TEST(GlazeDeserializerTest, ParsesCdoListEnvelopeWithDatasets) { + const std::string body = R"({ + "metadata": {"resultset": {"offset": 0, "count": 2, "limit": 25}}, + "results": [ + {"id": "GHCND", "name": "Daily Summaries", "datacoverage": 1.0, + "mindate": "1763-01-01", "maxdate": "2024-12-31"}, + {"id": "GSOM", "name": "Monthly Summaries", "datacoverage": 1.0, + "mindate": "1763-01-01", "maxdate": "2024-11-01"} + ] + })"; + + CDOResponse resp; + Result r = deserialize_cdo_list(body, resp); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_EQ(resp.metadata.offset, 0); + EXPECT_EQ(resp.metadata.count, 2); + EXPECT_EQ(resp.metadata.limit, 25); + ASSERT_EQ(resp.results.size(), 2u); + EXPECT_EQ(resp.results[0].id, "GHCND"); + EXPECT_EQ(resp.results[1].id, "GSOM"); +} + +TEST(GlazeDeserializerTest, ParsesDataPointCollectionWithDynamicKeys) { + // DataPoint has user-driven keys (TMAX/TMIN/PRCP/SNOW from the query + // string). Verify they land in attributes, and the known scalars in + // their typed slots. + const std::string body = R"([ + { + "DATE": "2024-01-15", + "STATION": "USW00013874", + "NAME": "RALEIGH", + "LATITUDE": 35.89, + "LONGITUDE": -78.78, + "ELEVATION": 124.0, + "TMAX": "122", + "TMIN": "44", + "PRCP": "0.0" + } + ])"; + DataPointCollection dpc; + Result r = deserialize_data_point_collection(body, dpc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + ASSERT_EQ(dpc.records.size(), 1u); + const DataPoint& dp = dpc.records[0]; + EXPECT_EQ(dp.date, "2024-01-15"); + EXPECT_EQ(dp.station, "USW00013874"); + EXPECT_DOUBLE_EQ(dp.latitude, 35.89); + + // Dynamic keys land in attributes, NOT in the typed scalar slots + std::optional tmax = dp.get("TMAX"); + ASSERT_TRUE(tmax.has_value()); + EXPECT_EQ(*tmax, "122"); + std::optional prcp = dp.get("PRCP"); + ASSERT_TRUE(prcp.has_value()); + EXPECT_EQ(*prcp, "0.0"); +} + +TEST(GlazeDeserializerTest, DataPointCollectionPreservesNumericAttributesAsStrings) { + // CDO occasionally emits numeric (not string) values for measurements; + // we should stringify them so the attribute map is uniform. + const std::string body = R"([ + {"DATE": "2024-01-15", "STATION": "X", "NAME": "Y", + "LATITUDE": 0.0, "LONGITUDE": 0.0, "ELEVATION": 0.0, + "TMAX": 122} + ])"; + DataPointCollection dpc; + Result r = deserialize_data_point_collection(body, dpc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + ASSERT_EQ(dpc.records.size(), 1u); + std::optional tmax = dpc.records[0].get("TMAX"); + ASSERT_TRUE(tmax.has_value()); + EXPECT_FALSE(tmax->empty()); +} + +TEST(GlazeDeserializerTest, ParsesDataRecord) { + const std::string body = R"({ + "date": "2024-01-15T00:00:00", + "datatype": "TMAX", + "station": "GHCND:USW00013874", + "attributes": ",,N,2400", + "value": 122 + })"; + DataRecord d; + Result r = deserialize_data_record(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_EQ(d.date, "2024-01-15T00:00:00"); + EXPECT_EQ(d.datatype, "TMAX"); + EXPECT_DOUBLE_EQ(d.value, 122.0); +} + +TEST(GlazeDeserializerTest, RejectsMalformedJson) { + const std::string body = R"({"id": "GHCND")"; // unterminated + Dataset d; + Result r = deserialize_dataset(body, d); + EXPECT_FALSE(r.has_value()); +} + +TEST(GlazeDeserializerTest, CdoListEnvelopeHandlesEmptyResults) { + const std::string body = R"({ + "metadata": {"resultset": {"offset": 0, "count": 0, "limit": 25}}, + "results": [] + })"; + CDOResponse resp; + Result r = deserialize_cdo_list(body, resp); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + EXPECT_TRUE(resp.results.empty()); + EXPECT_EQ(resp.metadata.count, 0); +} + +} // namespace +} // namespace ncei diff --git a/tests/parse_benchmark.cpp b/tests/parse_benchmark.cpp new file mode 100644 index 0000000..309498d --- /dev/null +++ b/tests/parse_benchmark.cpp @@ -0,0 +1,108 @@ +// Copyright (c) 2026 PredictionMarketsAI +// SPDX-License-Identifier: MIT +// +// Microbenchmark: parse a representative NCEI CDO `/stations` list response +// 1k times and report wall-clock. Used as a parse-throughput regression +// guard with `ctest --timeout` and an absolute upper bound on us/op. +// +// Historical baseline (recorded at migration time on x86_64-v3, GCC 13.3, +// -O3 -DNDEBUG, payload=~21KB, iters=1000): +// +// nlohmann/json v3.11.3 : ~360-590 us/op (pre-migration baseline) +// glaze v7.6.0 : ~32-40 us/op (post-migration) +// speedup : 9-15x +// +// The CDO list-response shape is allocation-heavy on the nlohmann path +// (every key gets its own std::string + ordered-map slot), which is where +// most of the gap comes from. The pre-migration nlohmann bench lived in +// tests/parse_benchmark.cpp at the time of the Glaze migration commit; it +// has since been removed along with the nlohmann FetchContent dep. +// Re-introduce a side-by-side bench only if a future regression suspicion +// warrants it. + +#include "ncei/models/cdo/station.hpp" +#include "ncei/models/common.hpp" +#include "ncei/pagination.hpp" + +#include +#include +#include + +namespace { + +// 100 CDOStation records wrapped in the standard CDO `{metadata, results}` +// envelope. ~21 KB of JSON — close to what a real `/stations?limit=100` +// call returns from the CDO API. +std::string make_payload() { + std::string json; + json.reserve(32 * 1024); + json += R"({"metadata":{"resultset":{"offset":0,"count":100,"limit":100}},"results":[)"; + constexpr int kRecords = 100; + for (int i = 0; i < kRecords; ++i) { + if (i != 0) { + json += ','; + } + char buf[512]; + std::snprintf(buf, sizeof(buf), + R"({"id":"GHCND:USW%011d","name":"STATION NAME %d","datacoverage":%f,)" + R"("mindate":"1948-01-01","maxdate":"2024-12-31","latitude":%f,)" + R"("longitude":%f,"elevation":%f,"elevationUnit":"METERS"})", + i, i, 0.9 + (i % 10) * 0.01, 25.0 + i * 0.1, -120.0 + i * 0.5, + 10.0 + i * 1.5); + json += buf; + } + json += "]}"; + return json; +} + +} // namespace + +int main() { + const std::string payload = make_payload(); + constexpr int kIterations = 1000; + + // Warmup — let the allocator and CPU settle. + for (int i = 0; i < 50; ++i) { + ncei::CDOResponse warm; + (void)ncei::deserialize_cdo_list(payload, warm); + } + + std::chrono::nanoseconds glaze_total{0}; + std::size_t glaze_checksum = 0; + for (int i = 0; i < kIterations; ++i) { + ncei::CDOResponse resp; + std::chrono::steady_clock::time_point t0 = std::chrono::steady_clock::now(); + ncei::Result r = ncei::deserialize_cdo_list(payload, resp); + std::chrono::steady_clock::time_point t1 = std::chrono::steady_clock::now(); + if (!r) { + std::fprintf(stderr, "glaze parse failed: %s\n", r.error().message.c_str()); + return 1; + } + glaze_total += (t1 - t0); + glaze_checksum += resp.results.size(); + } + + if (glaze_checksum != static_cast(100) * kIterations) { + std::fprintf(stderr, "checksum mismatch: glaze=%zu (expected %d)\n", glaze_checksum, + 100 * kIterations); + return 1; + } + + const double glaze_ms = glaze_total.count() / 1e6; + const double us_per_op = (glaze_total.count() / 1e3) / kIterations; + + std::printf("parse_benchmark: payload=%zuB iters=%d\n", payload.size(), kIterations); + std::printf(" glaze: %8.3f ms total (%8.3f us/op)\n", glaze_ms, us_per_op); + + // Regression guard: at migration time, Glaze parsed this payload at + // ~32-40 us/op on x86_64-v3. Set the cap at 200 us/op — that's still + // faster than the nlohmann baseline (~360-590 us/op) and leaves a + // healthy slack window for slower CI runners and Debug builds. + constexpr double kMaxUsPerOp = 200.0; + if (us_per_op > kMaxUsPerOp) { + std::fprintf(stderr, "REGRESSION: %.3f us/op exceeds cap of %.0f us/op\n", us_per_op, + kMaxUsPerOp); + return 1; + } + return 0; +} diff --git a/tests/test_cdo_models.cpp b/tests/test_cdo_models.cpp index ae49758..20d9e6c 100644 --- a/tests/test_cdo_models.cpp +++ b/tests/test_cdo_models.cpp @@ -5,10 +5,11 @@ #include "ncei/models/cdo/location.hpp" #include "ncei/models/cdo/location_category.hpp" #include "ncei/models/cdo/station.hpp" +#include "ncei/models/common.hpp" #include "ncei/pagination.hpp" #include -#include +#include namespace ncei { namespace { @@ -16,15 +17,16 @@ namespace { // --- Dataset --- TEST(DatasetTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "GHCND", "name": "Daily Summaries", "datacoverage": 1.0, "mindate": "1763-01-01", "maxdate": "2024-12-31" - })"); + })"; Dataset d; - from_json(j, d); + Result r = deserialize_dataset(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(d.id, "GHCND"); EXPECT_EQ(d.name, "Daily Summaries"); EXPECT_DOUBLE_EQ(d.data_coverage, 1.0); @@ -42,15 +44,16 @@ TEST(DatasetTest, DefaultConstruction) { } TEST(DatasetTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "datacoverage": null, "mindate": null, "maxdate": null - })"); + })"; Dataset d; - from_json(j, d); + Result r = deserialize_dataset(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(d.id.empty()); EXPECT_TRUE(d.name.empty()); EXPECT_DOUBLE_EQ(d.data_coverage, 0.0); @@ -59,20 +62,22 @@ TEST(DatasetTest, NullFields) { // --- DataCategory --- TEST(DataCategoryTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "TEMP", "name": "Air Temperature" - })"); + })"; DataCategory dc; - from_json(j, dc); + Result r = deserialize_data_category(body, dc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(dc.id, "TEMP"); EXPECT_EQ(dc.name, "Air Temperature"); } TEST(DataCategoryTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({"id": null, "name": null})"); + const std::string body = R"({"id": null, "name": null})"; DataCategory dc; - from_json(j, dc); + Result r = deserialize_data_category(body, dc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(dc.id.empty()); EXPECT_TRUE(dc.name.empty()); } @@ -80,15 +85,16 @@ TEST(DataCategoryTest, NullFields) { // --- DataType --- TEST(DataTypeTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "TMAX", "name": "Maximum temperature", "datacoverage": 0.95, "mindate": "1900-01-01", "maxdate": "2024-12-31" - })"); + })"; DataType dt; - from_json(j, dt); + Result r = deserialize_data_type(body, dt); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(dt.id, "TMAX"); EXPECT_EQ(dt.name, "Maximum temperature"); EXPECT_DOUBLE_EQ(dt.data_coverage, 0.95); @@ -97,12 +103,13 @@ TEST(DataTypeTest, FromJson) { } TEST(DataTypeTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "datacoverage": null, "mindate": null, "maxdate": null - })"); + })"; DataType dt; - from_json(j, dt); + Result r = deserialize_data_type(body, dt); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(dt.id.empty()); EXPECT_DOUBLE_EQ(dt.data_coverage, 0.0); } @@ -110,20 +117,22 @@ TEST(DataTypeTest, NullFields) { // --- LocationCategory --- TEST(LocationCategoryTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "ST", "name": "State" - })"); + })"; LocationCategory lc; - from_json(j, lc); + Result r = deserialize_location_category(body, lc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(lc.id, "ST"); EXPECT_EQ(lc.name, "State"); } TEST(LocationCategoryTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({"id": null, "name": null})"); + const std::string body = R"({"id": null, "name": null})"; LocationCategory lc; - from_json(j, lc); + Result r = deserialize_location_category(body, lc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(lc.id.empty()); EXPECT_TRUE(lc.name.empty()); } @@ -131,15 +140,16 @@ TEST(LocationCategoryTest, NullFields) { // --- Location --- TEST(LocationTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "FIPS:37", "name": "North Carolina", "datacoverage": 1.0, "mindate": "1869-03-01", "maxdate": "2024-12-31" - })"); + })"; Location l; - from_json(j, l); + Result r = deserialize_location(body, l); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(l.id, "FIPS:37"); EXPECT_EQ(l.name, "North Carolina"); EXPECT_DOUBLE_EQ(l.data_coverage, 1.0); @@ -148,12 +158,13 @@ TEST(LocationTest, FromJson) { } TEST(LocationTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "datacoverage": null, "mindate": null, "maxdate": null - })"); + })"; Location l; - from_json(j, l); + Result r = deserialize_location(body, l); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(l.id.empty()); EXPECT_DOUBLE_EQ(l.data_coverage, 0.0); } @@ -161,7 +172,7 @@ TEST(LocationTest, NullFields) { // --- CDOStation --- TEST(CDOStationTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "GHCND:USW00013874", "name": "RALEIGH DURHAM INTERNATIONAL AIRPORT, NC US", "datacoverage": 1.0, @@ -171,9 +182,10 @@ TEST(CDOStationTest, FromJson) { "longitude": -78.7833, "elevation": 124.4, "elevationUnit": "METERS" - })"); + })"; CDOStation s; - from_json(j, s); + Result r = deserialize_station(body, s); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(s.id, "GHCND:USW00013874"); EXPECT_EQ(s.name, "RALEIGH DURHAM INTERNATIONAL AIRPORT, NC US"); EXPECT_DOUBLE_EQ(s.data_coverage, 1.0); @@ -186,14 +198,15 @@ TEST(CDOStationTest, FromJson) { } TEST(CDOStationTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "datacoverage": null, "mindate": null, "maxdate": null, "latitude": null, "longitude": null, "elevation": null, "elevationUnit": null - })"); + })"; CDOStation s; - from_json(j, s); + Result r = deserialize_station(body, s); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(s.id.empty()); EXPECT_DOUBLE_EQ(s.latitude, 0.0); EXPECT_DOUBLE_EQ(s.longitude, 0.0); @@ -204,15 +217,16 @@ TEST(CDOStationTest, NullFields) { // --- DataRecord --- TEST(DataRecordTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "date": "2024-01-15T00:00:00", "datatype": "TMAX", "station": "GHCND:USW00013874", "attributes": ",,N,2400", "value": 122 - })"); + })"; DataRecord d; - from_json(j, d); + Result r = deserialize_data_record(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(d.date, "2024-01-15T00:00:00"); EXPECT_EQ(d.datatype, "TMAX"); EXPECT_EQ(d.station, "GHCND:USW00013874"); @@ -221,21 +235,22 @@ TEST(DataRecordTest, FromJson) { } TEST(DataRecordTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "date": null, "datatype": null, "station": null, "attributes": null, "value": null - })"); + })"; DataRecord d; - from_json(j, d); + Result r = deserialize_data_record(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(d.date.empty()); EXPECT_TRUE(d.datatype.empty()); EXPECT_DOUBLE_EQ(d.value, 0.0); } -// --- CDOResponse --- +// --- CDOResponse envelope --- TEST(CDOResponseDatasetTest, FromJsonWithMetadata) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "metadata": { "resultset": { "offset": 0, @@ -259,29 +274,21 @@ TEST(CDOResponseDatasetTest, FromJsonWithMetadata) { "maxdate": "2024-11-01" } ] - })"); + })"; CDOResponse resp; - from_json(j, resp.metadata); + Result r = deserialize_cdo_list(body, resp); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(resp.metadata.offset, 0); EXPECT_EQ(resp.metadata.count, 2); EXPECT_EQ(resp.metadata.limit, 25); - - if (j.contains("results") && j["results"].is_array()) { - for (const nlohmann::json& item : j["results"]) { - Dataset ds; - from_json(item, ds); - resp.results.push_back(std::move(ds)); - } - } - ASSERT_EQ(resp.results.size(), 2u); EXPECT_EQ(resp.results[0].id, "GHCND"); EXPECT_EQ(resp.results[1].id, "GSOM"); } -TEST(CDOResponseTest, HasMoreTrue) { +TEST(CDOResponseDatasetTest, HasMoreTrue) { CDOResponse resp; resp.metadata.offset = 0; resp.metadata.limit = 25; @@ -289,7 +296,7 @@ TEST(CDOResponseTest, HasMoreTrue) { EXPECT_TRUE(resp.has_more()); } -TEST(CDOResponseTest, HasMoreFalseAtEnd) { +TEST(CDOResponseDatasetTest, HasMoreFalseAtEnd) { CDOResponse resp; resp.metadata.offset = 75; resp.metadata.limit = 25; @@ -300,9 +307,10 @@ TEST(CDOResponseTest, HasMoreFalseAtEnd) { // --- Missing fields --- TEST(DatasetTest, MissingFields) { - nlohmann::json j = nlohmann::json::parse(R"({"id": "GHCND"})"); + const std::string body = R"({"id": "GHCND"})"; Dataset d; - from_json(j, d); + Result r = deserialize_dataset(body, d); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(d.id, "GHCND"); EXPECT_TRUE(d.name.empty()); EXPECT_DOUBLE_EQ(d.data_coverage, 0.0); @@ -310,5 +318,14 @@ TEST(DatasetTest, MissingFields) { EXPECT_TRUE(d.max_date.empty()); } +// --- Invalid JSON --- + +TEST(DatasetTest, RejectsInvalidJson) { + const std::string body = R"({"id": "GHCND")"; // missing closing brace + Dataset d; + Result r = deserialize_dataset(body, d); + EXPECT_FALSE(r.has_value()); +} + } // namespace } // namespace ncei diff --git a/tests/test_data_service_models.cpp b/tests/test_data_service_models.cpp index ae8e2ba..c29178a 100644 --- a/tests/test_data_service_models.cpp +++ b/tests/test_data_service_models.cpp @@ -1,9 +1,10 @@ +#include "ncei/models/common.hpp" #include "ncei/models/data_service/data_point.hpp" #include "ncei/models/data_service/dataset_metadata.hpp" #include "ncei/models/data_service/search_result.hpp" #include -#include +#include namespace ncei { namespace { @@ -11,7 +12,7 @@ namespace { // --- DataPoint from JSON --- TEST(DataPointTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"([{ "DATE": "2024-01-15", "STATION": "USW00013874", "NAME": "RALEIGH DURHAM INTL AP, NC US", @@ -20,9 +21,12 @@ TEST(DataPointTest, FromJson) { "ELEVATION": 124.4, "TMAX": "122", "TMIN": "44" - })"); - DataPoint dp; - from_json(j, dp); + }])"; + DataPointCollection dpc; + Result r = deserialize_data_point_collection(body, dpc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); + ASSERT_EQ(dpc.records.size(), 1u); + const DataPoint& dp = dpc.records[0]; EXPECT_EQ(dp.date, "2024-01-15"); EXPECT_EQ(dp.station, "USW00013874"); EXPECT_EQ(dp.name, "RALEIGH DURHAM INTL AP, NC US"); @@ -80,7 +84,7 @@ TEST(DataPointTest, GetDoubleMissing) { // --- DataPointCollection from JSON array --- TEST(DataPointCollectionTest, FromJsonArray) { - nlohmann::json j = nlohmann::json::parse(R"([ + const std::string body = R"([ { "DATE": "2024-01-15", "STATION": "USW00013874", @@ -99,9 +103,10 @@ TEST(DataPointCollectionTest, FromJsonArray) { "ELEVATION": 124.0, "TMAX": "130" } - ])"); + ])"; DataPointCollection dpc; - from_json(j, dpc); + Result r = deserialize_data_point_collection(body, dpc); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_FALSE(dpc.columns.empty()); ASSERT_EQ(dpc.records.size(), 2u); EXPECT_EQ(dpc.records[0].date, "2024-01-15"); @@ -172,7 +177,7 @@ TEST(ParseSsvDataTest, BasicSsv) { // --- DataSearchResult --- TEST(DataSearchResultTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "USW00013874", "name": "RALEIGH DURHAM INTL AP", "latitude": 35.8917, @@ -181,9 +186,10 @@ TEST(DataSearchResultTest, FromJson) { "mindate": "1948-01-01", "maxdate": "2024-12-31", "datacoverage": 1.0 - })"); + })"; DataSearchResult r; - from_json(j, r); + Result result = deserialize_data_search_result(body, r); + ASSERT_TRUE(result.has_value()) << (result ? "" : result.error().message); EXPECT_EQ(r.station_id, "USW00013874"); EXPECT_EQ(r.station_name, "RALEIGH DURHAM INTL AP"); EXPECT_DOUBLE_EQ(r.latitude, 35.8917); @@ -197,16 +203,17 @@ TEST(DataSearchResultTest, FromJson) { // --- DatasetSearchResult --- TEST(DatasetSearchResultTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "uid": "gov.noaa.ncdc:C00861", "name": "Daily Summaries", "description": "Global daily weather data", "startDate": "1763-01-01", "endDate": "2024-12-31", "dataTypes": ["TMAX", "TMIN", "PRCP"] - })"); + })"; DatasetSearchResult r; - from_json(j, r); + Result result = deserialize_dataset_search_result(body, r); + ASSERT_TRUE(result.has_value()) << (result ? "" : result.error().message); EXPECT_EQ(r.uid, "gov.noaa.ncdc:C00861"); EXPECT_EQ(r.name, "Daily Summaries"); EXPECT_EQ(r.description, "Global daily weather data"); @@ -216,27 +223,10 @@ TEST(DatasetSearchResultTest, FromJson) { EXPECT_EQ(r.data_types[0], "TMAX"); } -// --- DatasetField --- - -TEST(DatasetFieldTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ - "id": "TMAX", - "name": "Maximum Temperature", - "description": "Maximum temperature for the day", - "dataType": "number" - })"); - DatasetField f; - from_json(j, f); - EXPECT_EQ(f.id, "TMAX"); - EXPECT_EQ(f.name, "Maximum Temperature"); - EXPECT_EQ(f.description, "Maximum temperature for the day"); - EXPECT_EQ(f.data_type, "number"); -} - // --- DatasetMetadata --- TEST(DatasetMetadataTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": "daily-summaries", "name": "Daily Summaries", "description": "Global daily weather observations", @@ -254,9 +244,10 @@ TEST(DatasetMetadataTest, FromJson) { "dataType": "number" } ] - })"); + })"; DatasetMetadata m; - from_json(j, m); + Result r = deserialize_dataset_metadata(body, m); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_EQ(m.id, "daily-summaries"); EXPECT_EQ(m.name, "Daily Summaries"); EXPECT_EQ(m.description, "Global daily weather observations"); @@ -268,12 +259,13 @@ TEST(DatasetMetadataTest, FromJson) { // --- Null safety --- TEST(DataSearchResultTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "latitude": null, "longitude": null, "elevation": null, "mindate": null, "maxdate": null, "datacoverage": null - })"); + })"; DataSearchResult r; - from_json(j, r); + Result result = deserialize_data_search_result(body, r); + ASSERT_TRUE(result.has_value()) << (result ? "" : result.error().message); EXPECT_TRUE(r.station_id.empty()); EXPECT_TRUE(r.station_name.empty()); EXPECT_DOUBLE_EQ(r.latitude, 0.0); @@ -281,11 +273,12 @@ TEST(DataSearchResultTest, NullFields) { } TEST(DatasetMetadataTest, NullFields) { - nlohmann::json j = nlohmann::json::parse(R"({ + const std::string body = R"({ "id": null, "name": null, "description": null - })"); + })"; DatasetMetadata m; - from_json(j, m); + Result r = deserialize_dataset_metadata(body, m); + ASSERT_TRUE(r.has_value()) << (r ? "" : r.error().message); EXPECT_TRUE(m.id.empty()); EXPECT_TRUE(m.name.empty()); EXPECT_TRUE(m.fields.empty()); diff --git a/tests/test_pagination.cpp b/tests/test_pagination.cpp index 3b2d817..e0772a8 100644 --- a/tests/test_pagination.cpp +++ b/tests/test_pagination.cpp @@ -1,7 +1,6 @@ #include "ncei/pagination.hpp" #include -#include namespace ncei { namespace { @@ -13,40 +12,6 @@ TEST(ResultSetMetadataTest, DefaultConstruction) { EXPECT_EQ(m.limit, 25); } -TEST(ResultSetMetadataTest, FromJson) { - nlohmann::json j = nlohmann::json::parse(R"({ - "metadata": { - "resultset": { - "offset": 50, - "count": 200, - "limit": 25 - } - } - })"); - ResultSetMetadata m; - from_json(j, m); - EXPECT_EQ(m.offset, 50); - EXPECT_EQ(m.count, 200); - EXPECT_EQ(m.limit, 25); -} - -TEST(ResultSetMetadataTest, FromJsonNestedStructure) { - nlohmann::json j = nlohmann::json::parse(R"({ - "metadata": { - "resultset": { - "offset": 0, - "count": 100, - "limit": 25 - } - } - })"); - ResultSetMetadata m; - from_json(j, m); - EXPECT_EQ(m.offset, 0); - EXPECT_EQ(m.count, 100); - EXPECT_EQ(m.limit, 25); -} - TEST(CDOResponseTest, HasMoreTrue) { CDOResponse resp; resp.metadata.offset = 0;