From 3818b994e6091074972175c2ecc7715f10534b5e Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 21 May 2025 15:29:30 +0100 Subject: [PATCH 1/5] Add GCP auth --- .github/workflows/code_changes.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml index 7c7714ee..58f4a2c7 100644 --- a/.github/workflows/code_changes.yaml +++ b/.github/workflows/code_changes.yaml @@ -21,6 +21,9 @@ jobs: args: ". -l 79 --check" Test: runs-on: ubuntu-latest + permissions: + contents: read + id-token: write steps: - name: Checkout repo uses: actions/checkout@v2 @@ -31,6 +34,10 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.11' + - uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" + service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Install package run: uv pip install .[dev] --system From 83066b252eca8425534b020a2dee2fa6bae8014b Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 21 May 2025 15:35:27 +0100 Subject: [PATCH 2/5] Default to GCP --- policyengine/utils/data_download.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/policyengine/utils/data_download.py b/policyengine/utils/data_download.py index c7722173..bf5565f6 100644 --- a/policyengine/utils/data_download.py +++ b/policyengine/utils/data_download.py @@ -27,18 +27,6 @@ def download( ) logging.info = print - # NOTE: tests will break on build if you don't default to huggingface. - if data_file.huggingface_repo is not None: - logging.info("Using Hugging Face for download.") - try: - return download_from_hf( - repo=data_file.huggingface_org - + "/" - + data_file.huggingface_repo, - repo_filename=data_file.filepath, - ) - except: - logging.info("Failed to download from Hugging Face.") if Path(filepath).exists(): logging.info(f"File {filepath} already exists. Skipping download.") @@ -53,6 +41,20 @@ def download( ) return filepath + + # NOTE: tests will break on build if you don't default to huggingface. + elif data_file.huggingface_repo is not None: + logging.info("Using Hugging Face for download.") + try: + return download_from_hf( + repo=data_file.huggingface_org + + "/" + + data_file.huggingface_repo, + repo_filename=data_file.filepath, + ) + except: + logging.info("Failed to download from Hugging Face.") + raise ValueError( "No valid download method specified. Please provide either a Hugging Face repo or a Google Cloud Storage bucket." ) From 7ad26ef430a73ac1ef9e57c1bb36d8ec22453582 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 21 May 2025 15:42:18 +0100 Subject: [PATCH 3/5] Test removing hf token --- .github/workflows/code_changes.yaml | 4 +--- policyengine/constants.py | 7 ------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml index 58f4a2c7..261fd216 100644 --- a/.github/workflows/code_changes.yaml +++ b/.github/workflows/code_changes.yaml @@ -43,6 +43,4 @@ jobs: run: uv pip install .[dev] --system - name: Run tests - run: make test - env: - HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} \ No newline at end of file + run: make test \ No newline at end of file diff --git a/policyengine/constants.py b/policyengine/constants.py index 43351271..b3e969ea 100644 --- a/policyengine/constants.py +++ b/policyengine/constants.py @@ -3,13 +3,6 @@ from policyengine_core.data import Dataset from policyengine.utils.data_download import download -# Datasets -ENHANCED_FRS = "hf://policyengine/policyengine-uk-data/enhanced_frs_2022_23.h5" -FRS = "hf://policyengine/policyengine-uk-data/frs_2022_23.h5" -ENHANCED_CPS = "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5" -CPS = "hf://policyengine/policyengine-us-data/cps_2023.h5" -POOLED_CPS = "hf://policyengine/policyengine-us-data/pooled_3_year_cps_2023.h5" - def get_default_dataset(country: str, region: str): if country == "uk": From 51f096e7fed7e5a46010721aa867f30dc21ec84f Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 22 May 2025 11:01:30 +0100 Subject: [PATCH 4/5] Use GCP by default for all storage locations Fixes #144 --- .github/workflows/any_changes.yaml | 4 ++++ changelog_entry.yaml | 4 ++++ policyengine/utils/data_download.py | 1 - 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/any_changes.yaml b/.github/workflows/any_changes.yaml index 91d39f36..a0b8de86 100644 --- a/.github/workflows/any_changes.yaml +++ b/.github/workflows/any_changes.yaml @@ -23,6 +23,10 @@ jobs: - name: Install package run: uv pip install .[dev] --system + - uses: "google-github-actions/auth@v2" + with: + workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" + service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - name: Test documentation builds run: make documentation diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..acba3149 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Default storage location is GCP. diff --git a/policyengine/utils/data_download.py b/policyengine/utils/data_download.py index bf5565f6..6ea212d0 100644 --- a/policyengine/utils/data_download.py +++ b/policyengine/utils/data_download.py @@ -41,7 +41,6 @@ def download( ) return filepath - # NOTE: tests will break on build if you don't default to huggingface. elif data_file.huggingface_repo is not None: logging.info("Using Hugging Face for download.") From 3137f7c1fc47e7e2244102b560a14897f4b1526e Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Thu, 22 May 2025 11:03:03 +0100 Subject: [PATCH 5/5] Add perms --- .github/workflows/any_changes.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/any_changes.yaml b/.github/workflows/any_changes.yaml index a0b8de86..d51a3a4b 100644 --- a/.github/workflows/any_changes.yaml +++ b/.github/workflows/any_changes.yaml @@ -10,6 +10,9 @@ jobs: docs: name: Test documentation builds runs-on: ubuntu-latest + permissions: + contents: read + id-token: write steps: - name: Checkout repo uses: actions/checkout@v2