From cd7245f9fe6be33492a27f95402fca0631a1175e Mon Sep 17 00:00:00 2001 From: nlathia Date: Sat, 21 Feb 2026 17:27:43 +0000 Subject: [PATCH] Update aws & backblaze --- modelstore/storage/aws.py | 32 ++++++++++++------------ modelstore/storage/backblaze.py | 44 +++++++++++++-------------------- tests/storage/test_aws.py | 3 +-- tests/storage/test_backblaze.py | 14 +++++------ 4 files changed, 40 insertions(+), 53 deletions(-) diff --git a/modelstore/storage/aws.py b/modelstore/storage/aws.py index f34e61ad..522f2cc0 100644 --- a/modelstore/storage/aws.py +++ b/modelstore/storage/aws.py @@ -81,8 +81,7 @@ def client(self): def validate(self) -> bool: logger.debug("Querying for buckets with prefix=%s...", self.bucket_name) try: - resource = boto3.resource("s3") - resource.meta.client.head_bucket(Bucket=self.bucket_name) + self.client.head_bucket(Bucket=self.bucket_name) return True except ClientError: logger.error("Unable to access bucket: %s", self.bucket_name) @@ -137,20 +136,21 @@ def _get_storage_location(self, meta_data: metadata.Storage) -> str: def _read_json_objects(self, prefix: str) -> list: logger.debug("Listing files in: %s/%s", self.bucket_name, prefix) results = [] - objects = self.client.list_objects_v2(Bucket=self.bucket_name, Prefix=prefix) - for version in objects.get("Contents", []): - object_path = version["Key"] - if not object_path.endswith(".json"): - logger.debug("Skipping non-json file: %s", object_path) - continue - if os.path.split(object_path)[0] != prefix: - # We don't want to read files in a sub-prefix - logger.debug("Skipping file in sub-prefix: %s", object_path) - continue - - obj = self._read_json_object(object_path) - if obj is not None: - results.append(obj) + paginator = self.client.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix): + for version in page.get("Contents", []): + object_path = version["Key"] + if not object_path.endswith(".json"): + logger.debug("Skipping non-json file: %s", object_path) + continue + if os.path.split(object_path)[0] != prefix: + # We don't want to read files in a sub-prefix + logger.debug("Skipping file in sub-prefix: %s", object_path) + continue + + obj = self._read_json_object(object_path) + if obj is not None: + results.append(obj) return sorted_by_created(results) def _read_json_object(self, prefix: str) -> dict: diff --git a/modelstore/storage/backblaze.py b/modelstore/storage/backblaze.py index 093da740..eb19fdbd 100644 --- a/modelstore/storage/backblaze.py +++ b/modelstore/storage/backblaze.py @@ -65,6 +65,7 @@ def __init__( endpoint: Optional[str] = None, region: Optional[str] = None, root_prefix: Optional[str] = None, + _client=None, ): super().__init__(["boto3"], root_prefix, "MODEL_STORE_B2_ROOT_PREFIX") self.bucket_name = environment.get_value( @@ -86,7 +87,7 @@ def __init__( ) if self.endpoint is None: self.endpoint = f"https://s3.{self.region}.backblazeb2.com" - self.__client = None + self.__client = _client @staticmethod def _boto_config(): @@ -115,22 +116,10 @@ def client(self): logger.error("Unable to create B2 s3 client!") raise - def _get_resource(self): - """Returns a boto s3 resource configured for B2""" - return boto3.resource( - "s3", - region_name=self.region, - endpoint_url=self.endpoint, - aws_access_key_id=self.key_id, - aws_secret_access_key=self.application_key, - config=self._boto_config(), - ) - def validate(self) -> bool: logger.debug("Querying for buckets with prefix=%s...", self.bucket_name) try: - resource = self._get_resource() - resource.meta.client.head_bucket(Bucket=self.bucket_name) + self.client.head_bucket(Bucket=self.bucket_name) return True except ClientError: logger.error("Unable to access bucket: %s", self.bucket_name) @@ -184,19 +173,20 @@ def _get_storage_location(self, meta_data: metadata.Storage) -> str: def _read_json_objects(self, prefix: str) -> list: logger.debug("Listing files in: %s/%s", self.bucket_name, prefix) results = [] - objects = self.client.list_objects_v2(Bucket=self.bucket_name, Prefix=prefix) - for version in objects.get("Contents", []): - object_path = version["Key"] - if not object_path.endswith(".json"): - logger.debug("Skipping non-json file: %s", object_path) - continue - if os.path.split(object_path)[0] != prefix: - logger.debug("Skipping file in sub-prefix: %s", object_path) - continue - - obj = self._read_json_object(object_path) - if obj is not None: - results.append(obj) + paginator = self.client.get_paginator("list_objects_v2") + for page in paginator.paginate(Bucket=self.bucket_name, Prefix=prefix): + for version in page.get("Contents", []): + object_path = version["Key"] + if not object_path.endswith(".json"): + logger.debug("Skipping non-json file: %s", object_path) + continue + if os.path.split(object_path)[0] != prefix: + logger.debug("Skipping file in sub-prefix: %s", object_path) + continue + + obj = self._read_json_object(object_path) + if obj is not None: + results.append(obj) return sorted_by_created(results) def _read_json_object(self, prefix: str) -> dict: diff --git a/tests/storage/test_aws.py b/tests/storage/test_aws.py index fed8d59a..d9f34937 100644 --- a/tests/storage/test_aws.py +++ b/tests/storage/test_aws.py @@ -55,10 +55,9 @@ def get_file_contents(moto_boto, prefix): def test_create_from_environment_variables(monkeypatch): # Does not fail when environment variables exist monkeypatch.setenv("MODEL_STORE_AWS_BUCKET", _MOCK_BUCKET_NAME) - # pylint: disable=bare-except try: _ = AWSStorage() - except: + except Exception: pytest.fail("Failed to initialise storage from env variables") diff --git a/tests/storage/test_backblaze.py b/tests/storage/test_backblaze.py index 205141cb..040d6204 100644 --- a/tests/storage/test_backblaze.py +++ b/tests/storage/test_backblaze.py @@ -60,26 +60,23 @@ def get_file_contents(moto_boto, prefix): def _create_storage(): - storage = BackblazeStorage( + client = boto3.client("s3", region_name="us-east-1") + return BackblazeStorage( bucket_name=_MOCK_BUCKET_NAME, key_id="testing", application_key="testing", region="us-east-1", + _client=client, ) - # Override endpoint so boto3 uses the default AWS endpoint - # that moto intercepts - storage.endpoint = None - return storage def test_create_from_environment_variables(monkeypatch): monkeypatch.setenv("MODEL_STORE_B2_BUCKET", _MOCK_BUCKET_NAME) monkeypatch.setenv("B2_APPLICATION_KEY_ID", "testing") monkeypatch.setenv("B2_APPLICATION_KEY", "testing") - # pylint: disable=bare-except try: _ = BackblazeStorage() - except: + except Exception: pytest.fail("Failed to initialise storage from env variables") @@ -104,13 +101,14 @@ def test_create_fails_with_missing_environment_variables(monkeypatch): ], ) def test_validate(bucket_name, validate_should_pass): + client = boto3.client("s3", region_name="us-east-1") storage = BackblazeStorage( bucket_name=bucket_name, key_id="testing", application_key="testing", region="us-east-1", + _client=client, ) - storage.endpoint = None assert storage.validate() == validate_should_pass