Skip to content

Commit fa45dcb

Browse files
authored
Introduce data classes to structure the responses from the API (#204)
* Crete base DCResponse * NodeResponse * ObservationResponse * ResolveResponse * SparqlResponse * fix return * consistent naming * Update response.py * tests for response utilities * Move response scripts and tests * Improve endpoint docstrings
1 parent 63a78f9 commit fa45dcb

File tree

2 files changed

+836
-0
lines changed

2 files changed

+836
-0
lines changed
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
from dataclasses import asdict, dataclass, field
2+
from typing import Any, Dict, List
3+
4+
from datacommons_client.models.node import Arcs, NextToken, NodeDCID, Properties
5+
from datacommons_client.models.observation import (
6+
Facet,
7+
Variable,
8+
facetID,
9+
variableDCID,
10+
)
11+
from datacommons_client.models.resolve import Entity
12+
from datacommons_client.models.sparql import Row
13+
14+
15+
@dataclass
16+
class DCResponse:
17+
"""Represents a structured response from the Data Commons API."""
18+
19+
json: Dict[str, Any] = field(default_factory=dict)
20+
21+
def __repr__(self) -> str:
22+
"""Returns a string with the name of the object,"""
23+
return f"<Raw Data Commons API response>"
24+
25+
@property
26+
def next_token(self):
27+
return self.json.get("nextToken")
28+
29+
30+
@dataclass
31+
class NodeResponse:
32+
"""Represents a response from the Node endpoint of the Data Commons API.
33+
34+
Attributes:
35+
data: A dictionary mapping node DCIDs to Arcs or Properties objects.
36+
nextToken: A token for pagination, if present.
37+
"""
38+
39+
data: Dict[NodeDCID, Arcs | Properties] = field(default_factory=dict)
40+
nextToken: NextToken = None
41+
42+
@classmethod
43+
def from_json(cls, json_data: Dict[str, Any]) -> "NodeResponse":
44+
"""Parses a dictionary of nodes from JSON.
45+
46+
Args:
47+
json_data: The raw JSON data from the API response.
48+
49+
Returns:
50+
A NodeResponse instance.
51+
"""
52+
53+
def parse_data(data: Dict[str, Any]) -> Arcs | Properties:
54+
if "arcs" in data:
55+
return Arcs.from_json(data["arcs"])
56+
return Properties.from_json(data)
57+
58+
parsed_data = {
59+
dcid: parse_data(data)
60+
for dcid, data in json_data.get("data", {}).items()
61+
}
62+
return cls(data=parsed_data, nextToken=json_data.get("nextToken"))
63+
64+
def get_properties(self) -> Dict:
65+
return flatten_properties(self.data)
66+
67+
@property
68+
def json(self):
69+
return asdict(self)
70+
71+
72+
def flatten_properties(data: Dict[str, Any]) -> Dict[str, Any]:
73+
"""
74+
Flatten the properties of a node response.
75+
76+
Processes a dictionary of node responses, extracting and
77+
simplifying their properties and arcs into a flattened dictionary.
78+
79+
Args:
80+
data (Dict[str, Dict[str, Any]]):
81+
The input dictionary containing node responses. Each node maps to
82+
a dictionary with potential "arcs" and "properties" keys.
83+
84+
Returns:
85+
Dict[str, Any]:
86+
A flattened dictionary where keys are node identifiers, and values
87+
are the simplified properties or nodes.
88+
"""
89+
90+
# Store simplified properties
91+
items = {}
92+
93+
for node, node_data in data.items():
94+
# If arcs are present, process them
95+
if hasattr(node_data, "arcs"):
96+
processed_arcs = _unpack_arcs(node_data.arcs)
97+
if processed_arcs is not None:
98+
items[node] = processed_arcs
99+
continue
100+
101+
# Include properties if present
102+
if hasattr(node_data, "properties"):
103+
items[node] = node_data.properties
104+
105+
return items
106+
107+
108+
def _unpack_arcs(arcs: Dict[str, Any]) -> Any:
109+
"""Simplify the 'arcs' structure."""
110+
if len(arcs) > 1:
111+
# Multiple arcs: return dictionary of property nodes
112+
return {prop: arc_data["nodes"] for prop, arc_data in arcs.items()}
113+
114+
# Single arc: extract first node's data
115+
for property_data in arcs.values():
116+
nodes = property_data.nodes
117+
if nodes is not None:
118+
return nodes if len(nodes) > 1 else nodes[0]
119+
120+
121+
@dataclass
122+
class ObservationResponse:
123+
"""Represents a response from the Observation endpoint of the Data Commons API.
124+
125+
Attributes:
126+
byVariable: A dictionary of variable DCIDs and their corresponding data.
127+
facets: A dictionary of facet IDs and their corresponding data.
128+
"""
129+
130+
byVariable: Dict[variableDCID, Any] = field(default_factory=dict)
131+
facets: Dict[facetID, Any] = field(default_factory=dict)
132+
133+
@classmethod
134+
def from_json(cls, json_data: Dict[str, Any]) -> "ObservationResponse":
135+
"""Parses the data from the API response."""
136+
return cls(
137+
byVariable={
138+
variable: Variable.from_json(data)
139+
for variable, data in json_data.get("byVariable", {}).items()
140+
},
141+
facets={
142+
facet: Facet.from_json(data)
143+
for facet, data in json_data.get("facets", {}).items()
144+
},
145+
)
146+
147+
@property
148+
def json(self):
149+
return asdict(self)
150+
151+
def get_data_by_entity(self) -> Dict:
152+
"""Unpacks the data for each entity, for each variable.
153+
154+
Returns:
155+
Dict: The variables object from the response.
156+
"""
157+
return {
158+
variable: data.byEntity
159+
for variable, data in self.byVariable.items()
160+
}
161+
162+
def get_observations_as_records(self) -> List[Dict[str, Any]]:
163+
"""Converts the observation data into a list of records.
164+
165+
Returns:
166+
List[Dict[str, Any]]: A flattened list of observation records.
167+
"""
168+
return observations_as_records(
169+
data=self.get_data_by_entity(), facets=self.facets
170+
)
171+
172+
173+
def extract_observations(
174+
variable: str, entity: str, entity_data: dict, facet_metadata: dict
175+
) -> list[dict]:
176+
"""
177+
Extracts observations for a given variable, entity, and its data.
178+
179+
Args:
180+
variable (str): The variable name.
181+
entity (str): The entity name.
182+
entity_data (dict): Data for the entity, including ordered facets.
183+
facet_metadata (dict): Metadata for facets.
184+
185+
Returns:
186+
list[dict]: A list of observation records.
187+
"""
188+
# Store observation records
189+
records = []
190+
191+
# Extract observations
192+
for facet in entity_data.get("orderedFacets", []):
193+
facet_id = facet.facetId
194+
metadata = facet_metadata.get(facet_id, {})
195+
records.extend(
196+
{
197+
"date": observation.date,
198+
"entity": entity,
199+
"variable": variable,
200+
"value": observation.value,
201+
"facetId": facet_id,
202+
**asdict(metadata),
203+
}
204+
for observation in facet.observations
205+
)
206+
return records
207+
208+
209+
def observations_as_records(data: dict, facets: dict) -> list[dict]:
210+
"""
211+
Converts observation data into a list of records.
212+
213+
Args:
214+
data (dict): A mapping of variables to entities and their data.
215+
facets (dict): Facet metadata for the observations.
216+
217+
Returns:
218+
list[dict]: A flattened list of observation records.
219+
"""
220+
return [
221+
record
222+
for variable, entities in data.items()
223+
for entity, entity_data in entities.items()
224+
for record in extract_observations(
225+
variable=variable,
226+
entity=entity,
227+
entity_data=entity_data,
228+
facet_metadata=facets,
229+
)
230+
]
231+
232+
233+
@dataclass
234+
class ResolveResponse:
235+
"""Represents a response from the Resolve endpoint of the Data Commons API.
236+
237+
Attributes:
238+
entities (List[Entity]): A list of entities resolved by the API, each
239+
containing the query node and its associated candidates.
240+
"""
241+
242+
entities: List[Entity] = field(default_factory=list)
243+
244+
@classmethod
245+
def from_json(cls, json_data: Dict[str, Any]) -> "ResolveResponse":
246+
"""Parses a ResolveResponse instance from JSON data.
247+
248+
Args:
249+
json_data (Dict[str, Any]): A dictionary containing the API response
250+
data, with keys like "entities".
251+
252+
Returns:
253+
ResolveResponse: A populated instance of the ResolveResponse class.
254+
"""
255+
return cls(
256+
entities=[
257+
Entity.from_json(entity)
258+
for entity in json_data.get("entities", [])
259+
]
260+
)
261+
262+
@property
263+
def json(self):
264+
"""Converts the ResolveResponse instance to a dictionary.
265+
266+
This is useful for serializing the response data back into a JSON-compatible
267+
format.
268+
269+
Returns:
270+
Dict[str, Any]: The dictionary representation of the ResolveResponse instance.
271+
"""
272+
return asdict(self)
273+
274+
275+
@dataclass
276+
class SparqlResponse:
277+
"""Represents a response from the SPARQL endpoint of the Data Commons API.
278+
279+
Attributes:
280+
header (List[str]): The list of strings representing the query header.
281+
rows (List[Row]): A list of rows, each containing cells with values.
282+
"""
283+
284+
header: List[str] = field(default_factory=list)
285+
rows: List[Row] = field(default_factory=list)
286+
287+
@classmethod
288+
def from_json(cls, json_data: Dict[str, Any]) -> "SparqlResponse":
289+
"""Parses a SparqlResponse instance from JSON data.
290+
291+
Args:
292+
json_data (Dict[str, Any]): A dictionary containing the SPARQL query
293+
response data, with keys like "header" and "rows".
294+
295+
Returns:
296+
SparqlResponse: A populated instance of the SparqlResponse class.
297+
"""
298+
return cls(
299+
header=json_data.get("header", []),
300+
rows=[Row.from_json(row) for row in json_data.get("rows", [])],
301+
)
302+
303+
@property
304+
def json(self) -> Dict[str, Any]:
305+
"""Converts the SparqlResponse object to a JSON-compatible dictionary.
306+
307+
Returns:
308+
Dict[str, Any]: A dictionary representing the SparqlResponse object,
309+
suitable for serialization into JSON format.
310+
"""
311+
return asdict(self)

0 commit comments

Comments
 (0)