From 8a0d0014df510664220a9a6417e9d3a9ecabf0d0 Mon Sep 17 00:00:00 2001 From: Edgar Ochoa Date: Wed, 18 Mar 2026 23:00:23 +0100 Subject: [PATCH] feat: folder creation and extra measures generation (Issue #22, #18) and fix tests --- .gitignore | 1 + dataform2looker/database_mappers.py | 28 +++++++++++++++++++++++---- dataform2looker/dataform2looker.py | 23 +++++++++++++++++++--- dataform2looker/lookml.py | 10 +++++++++- tests/test_column.py | 2 +- tests/test_generictable.py | 12 ++++++++++++ tests/test_lookml.py | 30 +++++++++++++++++++++++++++++ 7 files changed, 97 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 50c9ae6..8f66820 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,4 @@ result.json .vscode/ .DS_Store +.envrc diff --git a/dataform2looker/database_mappers.py b/dataform2looker/database_mappers.py index cf4aff0..f457cfd 100644 --- a/dataform2looker/database_mappers.py +++ b/dataform2looker/database_mappers.py @@ -90,12 +90,18 @@ class GenericTable: UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided. """ # noqa: E501 - def __init__(self, table_id: str, db_type: str = "bigquery") -> None: + def __init__( + self, + table_id: str, + db_type: str = "bigquery", + gen_extra_measures: bool = False, + ) -> None: """Initializes the `GenericTable` object based on the database type. Args: table_id: The full ID of the table in the database. db_type: The type of the database ("bigquery" currently supported). + gen_extra_measures: Whether to generate extra measures (e.g., sums, count_distinct). Raises: UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided. @@ -127,9 +133,23 @@ def __init__(self, table_id: str, db_type: str = "bigquery") -> None: f"Dimensions Group for table {self.table_name}: {self.dimension_group}" ) self.measures = [{"type": "count", "name": "count"}] - # TODO it should be possible to include other measures by passing an argument - # Include measures if needed such as sums of all number dimensions - # include count_distinct + if gen_extra_measures: + for column in self.__table.columns: + if ( + column.dimension_type == "dimension" + and column.field_type == "number" + ): + self.measures.append({ + "type": "sum", + "name": f"total_{column.name}", + "sql": f"${{TABLE}}.{column.name}", + }) + if column.dimension_type == "dimension": + self.measures.append({ + "type": "count_distinct", + "name": f"count_distinct_{column.name}", + "sql": f"${{TABLE}}.{column.name}", + }) self.table_dictionary = { "view": { diff --git a/dataform2looker/dataform2looker.py b/dataform2looker/dataform2looker.py index 3250683..b91fc1c 100644 --- a/dataform2looker/dataform2looker.py +++ b/dataform2looker/dataform2looker.py @@ -10,20 +10,28 @@ from dataform2looker.lookml import LookML -def _generate_view(path_to_json_file: str, target_dir: str, tags: set[str]) -> int: +def _generate_view( + path_to_json_file: str, target_dir: str, tags: set[str], gen_extra_measures: bool +) -> int: """Generates LookML view files from a Dataform model. Args: path_to_json_file (str): Path to the JSON file from compiled Dataform project. target_dir (str): Target directory for Looker views. tags (set[str]): Filter to dataform models using this tag. + gen_extra_measures (bool): Whether to generate extra measures. Returns: int: 0 if the view generation was successful, 1 otherwise. """ logging.info(f" Generating views from: {path_to_json_file}") try: - lookml_object = LookML(path_to_json_file, target_dir, tags=tags) + lookml_object = LookML( + path_to_json_file, + target_dir, + tags=tags, + gen_extra_measures=gen_extra_measures, + ) lookml_object.save_lookml_views() return 0 except subprocess.CalledProcessError as e: @@ -70,17 +78,26 @@ def main(argv: Sequence[str] | None = None) -> int: required=False, ) + parser.add_argument( + "--gen-extra-measures", + action="store_true", + help="Generate extra measures like sum and count distinct.", + ) + args = parser.parse_args(argv) source_file = args.source_file_path target_dir = args.target_dir verbose = args.verbose tags = args.tags + gen_extra_measures = args.gen_extra_measures logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) if source_file.is_file(): logging.info(f" Processing file: {source_file}") - return _generate_view(str(source_file), str(target_dir), set(tags)) + return _generate_view( + str(source_file), str(target_dir), set(tags), gen_extra_measures + ) logging.error("The provided path is not taking to a JSON file") sys.exit(1) diff --git a/dataform2looker/lookml.py b/dataform2looker/lookml.py index 3df32e9..f654701 100644 --- a/dataform2looker/lookml.py +++ b/dataform2looker/lookml.py @@ -2,6 +2,7 @@ import json import logging +import os import lkml @@ -25,6 +26,7 @@ def __init__( target_folder_path: str, db_type: str = "bigquery", tags: list[str] = None, + gen_extra_measures: bool = False, ) -> None: """Initializes the `LookML` object. @@ -33,10 +35,12 @@ def __init__( target_folder_path: The target folder for LookML view files. db_type: The type of the database ("bigquery" currently supported). tags: A list of tags to filter tables (not yet implemented). + gen_extra_measures: Whether to generate extra measures. """ # noqa: E501 self.source_json_path = source_json_path self.db_type = db_type self.tags = set(tags or []) + self.gen_extra_measures = gen_extra_measures self.__tables_ids = self.__get_list_of_table_ids() self.__tables_list = self.__initialize_tables(self.__tables_ids) self.lookml_templates = self.__generate_lookml_templates(self.__tables_list) @@ -44,6 +48,7 @@ def __init__( def save_lookml_views(self) -> None: """Generates and saves LookML view files for each table.""" # noqa: E501 + os.makedirs(self.target_folder_path, exist_ok=True) for table_name, table_template in self.lookml_templates.items(): file_path = f"{self.target_folder_path}/" f"{table_name}.view.lkml" logging.debug(f"Creating file {file_path}") @@ -84,7 +89,10 @@ def __initialize_tables( Returns: A list of `GenericTable` objects representing the tables. """ # noqa: E501 - tables_list = [GenericTable(table_id, self.db_type) for table_id in tables_ids] + tables_list = [ + GenericTable(table_id, self.db_type, self.gen_extra_measures) + for table_id in tables_ids + ] return tables_list def __get_list_of_table_ids(self) -> list[str]: diff --git a/tests/test_column.py b/tests/test_column.py index 9026d1a..a5f3bba 100644 --- a/tests/test_column.py +++ b/tests/test_column.py @@ -45,7 +45,7 @@ def test_dimension_dictionar(self, my_column: Column) -> None: "name": my_column_name, "type": my_field_type, "description": my_column_description, - "sql": f"{{TABLE}}.{my_column_name}", + "sql": f"${{TABLE}}.{my_column_name}", } assert my_column.column_dictionary == column_dictionary diff --git a/tests/test_generictable.py b/tests/test_generictable.py index e58e16c..47f247a 100644 --- a/tests/test_generictable.py +++ b/tests/test_generictable.py @@ -63,5 +63,17 @@ def test_table_dictionary_measures(self, my_generic_table: GenericTable) -> None assert "view" in table_dictionary assert "measures" in table_dictionary["view"] + def test_gen_extra_measures(self, bq_table_id: str) -> None: + """Tests that extra measures are generated when `gen_extra_measures` is True.""" + table = GenericTable(bq_table_id, gen_extra_measures=True) + measures = table.table_dictionary["view"]["measures"] + + # Should have count + sum for number columns + count_distinct for all dimensions + # In the mock data, let's see what we have. + # From conftest.py or dataform_result.json + assert any(m["type"] == "sum" for m in measures) + assert any(m["type"] == "count_distinct" for m in measures) + assert any(m["name"] == "count" for m in measures) + # TODO add tests to check the table_dictionary diff --git a/tests/test_lookml.py b/tests/test_lookml.py index 82fd1ba..f175ca5 100644 --- a/tests/test_lookml.py +++ b/tests/test_lookml.py @@ -1,5 +1,8 @@ """This module contains unit tests for the `LookML` class from the `dataform2looker.lookml` module.""" # noqa: E501 +import os +import shutil + import pytest from dataform2looker.lookml import LookML @@ -58,5 +61,32 @@ def test_measure_generation(self, my_lookml: LookML) -> None: in view.strip() ) + def test_extra_measure_generation( + self, source_json_path: str, target_folder_path: str + ) -> None: + """Tests that extra measures are generated in LookML templates.""" + my_lookml = LookML( + source_json_path, target_folder_path, gen_extra_measures=True + ) + for view in my_lookml.lookml_templates.values(): + assert "type: sum" in view + assert "type: count_distinct" in view + + def test_save_lookml_views_creates_folder( + self, source_json_path: str, tmp_path: str + ) -> None: + """Tests folder creation in `save_lookml_views`.""" + target_folder = os.path.join(tmp_path, "new_views_folder") + if os.path.exists(target_folder): + shutil.rmtree(target_folder) + + lookml = LookML(source_json_path, target_folder) + lookml.save_lookml_views() + + assert os.path.exists(target_folder) + assert os.path.isdir(target_folder) + # Check if files were created + assert len(os.listdir(target_folder)) == 2 + # TODO include a test for the generated template