-
Notifications
You must be signed in to change notification settings - Fork 322
feat: support timestamp_precision in table schema #2333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2e1daa8
253ac1f
dc3c498
97f0251
234a3fd
b20159b
a1bc2cb
bc6dcda
518a12c
a8d5f5c
0567adf
1268c45
8603973
cb9f818
696dfff
d24df7d
873bff6
6a93c26
9a4f72f
c146e39
fc08533
0b743f3
2a81ef9
e131b6d
7693537
5d2fbf0
c7c2b47
f87b618
c0e4595
04c5f59
255b87a
657dd84
4cd3df4
e6a3f8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -196,6 +196,14 @@ class SchemaField(object): | |||
|
|
||||
| Only valid for top-level schema fields (not nested fields). | ||||
| If the type is FOREIGN, this field is required. | ||||
|
|
||||
| timestamp_precision: Optional[enums.TimestampPrecision] | ||||
| Precision (maximum number of total digits in base 10) for seconds | ||||
| of TIMESTAMP type. | ||||
|
|
||||
| Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for | ||||
| microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` | ||||
| (`12`) for picosecond precision. | ||||
| """ | ||||
|
|
||||
| def __init__( | ||||
|
|
@@ -213,6 +221,7 @@ def __init__( | |||
| range_element_type: Union[FieldElementType, str, None] = None, | ||||
| rounding_mode: Union[enums.RoundingMode, str, None] = None, | ||||
| foreign_type_definition: Optional[str] = None, | ||||
| timestamp_precision: Optional[enums.TimestampPrecision] = None, | ||||
| ): | ||||
| self._properties: Dict[str, Any] = { | ||||
| "name": name, | ||||
|
|
@@ -237,6 +246,13 @@ def __init__( | |||
| if isinstance(policy_tags, PolicyTagList) | ||||
| else None | ||||
| ) | ||||
| if isinstance(timestamp_precision, enums.TimestampPrecision): | ||||
| self._properties["timestampPrecision"] = timestamp_precision.value | ||||
| elif timestamp_precision is not None: | ||||
| raise ValueError( | ||||
| "timestamp_precision must be class enums.TimestampPrecision " | ||||
| f"or None, got {type(timestamp_precision)} instead." | ||||
| ) | ||||
| if isinstance(range_element_type, str): | ||||
| self._properties["rangeElementType"] = {"type": range_element_type} | ||||
| if isinstance(range_element_type, FieldElementType): | ||||
|
|
@@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": | |||
| """Return a ``SchemaField`` object deserialized from a dictionary. | ||||
|
|
||||
| Args: | ||||
| api_repr (Mapping[str, str]): The serialized representation | ||||
| of the SchemaField, such as what is output by | ||||
| :meth:`to_api_repr`. | ||||
| api_repr (dict): The serialized representation of the SchemaField, | ||||
| such as what is output by :meth:`to_api_repr`. | ||||
|
|
||||
| Returns: | ||||
| google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. | ||||
| """ | ||||
| placeholder = cls("this_will_be_replaced", "PLACEHOLDER") | ||||
|
|
||||
| # The API would return a string despite we send an integer. To ensure | ||||
| # success of resending received schema, we convert string to integer | ||||
| # to ensure consistency. | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems a bit surprising. The backend returns
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the backend expects an integer but returns a string. I will open a bug with the BigQuery team.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Upon a second thought, this has been a persistent issue with other integer fields as well, hence the helper function would force a type conversion here:
|
||||
| try: | ||||
| api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"]) | ||||
| except (TypeError, KeyError): | ||||
| pass | ||||
|
|
||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was going to point out there was an error here, because the docstrings say Still, I think this could be a bit cleaner as a try/catch
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, I have updated the docstring and the code. |
||||
| # Note: we don't make a copy of api_repr because this can cause | ||||
| # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD | ||||
| # fields. See https://github.com/googleapis/python-bigquery/issues/6 | ||||
|
|
@@ -374,6 +397,16 @@ def policy_tags(self): | |||
| resource = self._properties.get("policyTags") | ||||
| return PolicyTagList.from_api_repr(resource) if resource is not None else None | ||||
|
|
||||
| @property | ||||
| def timestamp_precision(self) -> enums.TimestampPrecision: | ||||
| """Precision (maximum number of total digits in base 10) for seconds of | ||||
| TIMESTAMP type. | ||||
|
|
||||
| Returns: | ||||
| enums.TimestampPrecision: value of TimestampPrecision. | ||||
| """ | ||||
| return enums.TimestampPrecision(self._properties.get("timestampPrecision")) | ||||
|
|
||||
| def to_api_repr(self) -> dict: | ||||
| """Return a dictionary representing this schema field. | ||||
|
|
||||
|
|
@@ -408,6 +441,8 @@ def _key(self): | |||
| None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) | ||||
| ) | ||||
|
|
||||
| timestamp_precision = self._properties.get("timestampPrecision") | ||||
|
|
||||
| return ( | ||||
| self.name, | ||||
| field_type, | ||||
|
|
@@ -417,6 +452,7 @@ def _key(self): | |||
| self.description, | ||||
| self.fields, | ||||
| policy_tags, | ||||
| timestamp_precision, | ||||
| ) | ||||
|
|
||||
| def to_standard_sql(self) -> standard_sql.StandardSqlField: | ||||
|
|
@@ -467,10 +503,9 @@ def __hash__(self): | |||
| return hash(self._key()) | ||||
|
|
||||
| def __repr__(self): | ||||
| key = self._key() | ||||
| policy_tags = key[-1] | ||||
| *initial_tags, policy_tags, timestamp_precision_tag = self._key() | ||||
| policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) | ||||
| adjusted_key = key[:-1] + (policy_tags_inst,) | ||||
| adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) | ||||
| return f"{self.__class__.__name__}{adjusted_key}" | ||||
|
|
||||
|
|
||||
|
|
@@ -530,9 +565,11 @@ def _to_schema_fields(schema): | |||
| if isinstance(schema, Sequence): | ||||
| # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields | ||||
| return [ | ||||
| field | ||||
| if isinstance(field, SchemaField) | ||||
| else SchemaField.from_api_repr(field) | ||||
| ( | ||||
| field | ||||
| if isinstance(field, SchemaField) | ||||
| else SchemaField.from_api_repr(field) | ||||
| ) | ||||
| for field in schema | ||||
| ] | ||||
|
|
||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,6 +74,16 @@ | |
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
| ] | ||
| SCHEMA_PICOSECOND = [ | ||
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
| bigquery.SchemaField( | ||
| "time_pico", | ||
| "TIMESTAMP", | ||
| mode="REQUIRED", | ||
| timestamp_precision=enums.TimestampPrecision.PICOSECOND, | ||
| ), | ||
| ] | ||
| CLUSTERING_SCHEMA = [ | ||
| bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), | ||
| bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), | ||
|
|
@@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): | |
| self.assertEqual(time_partitioning.field, "transaction_time") | ||
| self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) | ||
|
|
||
| def test_create_table_w_picosecond_timestamp(self): | ||
| dataset = self.temp_dataset(_make_dataset_id("create_table")) | ||
| table_id = "test_table" | ||
| table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) | ||
| self.assertFalse(_table_exists(table_arg)) | ||
|
|
||
| table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) | ||
| self.to_delete.insert(0, table) | ||
|
|
||
| self.assertTrue(_table_exists(table)) | ||
| self.assertEqual(table.table_id, table_id) | ||
| self.assertEqual(table.schema, SCHEMA_PICOSECOND) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we have a test that reads back a timestamp, and makes sure its in the expected range? Or am I misunderstanding?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This PR only involves creating and reading table schema that has picosecond timestamp. I think we can add the tests in the PR supporting writing to and reading from the table. |
||
|
|
||
| def test_delete_dataset_with_string(self): | ||
| dataset_id = _make_dataset_id("delete_table_true_with_string") | ||
| project = Config.CLIENT.project | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.