From 1f63992d082f293de1681011493bbe217790a4b1 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 2 Feb 2026 14:56:15 -0500 Subject: [PATCH 1/2] fix: re-enable CI linting and unit tests for FTS branch Update the SDK to work with the new alpha API model structure that uses schema + deployment instead of the legacy spec-based approach. Key changes: - Update request_factory.py to translate legacy API calls to new format - Fix model imports (BackupModelSchema -> Schema) - Update attribute access patterns for IndexModel dynamic attributes - Fix gRPC utils for new namespace schema structure - Update all unit test fixtures to use alpha API structure - Add proper skip conditions for asyncio tests - Remove fts branch skip conditions from on-pr.yaml Resolves: SDK-116 Co-authored-by: Cursor --- .github/workflows/on-pr.yaml | 9 +- mypy.ini | 24 ++ pinecone/admin/eraser/resources/index.py | 3 +- pinecone/db_control/models/backup_model.py | 6 +- pinecone/db_control/models/index_list.py | 2 +- pinecone/db_control/request_factory.py | 338 ++++++------------ .../db_control/resources/asyncio/index.py | 13 +- pinecone/db_control/resources/sync/index.py | 17 +- pinecone/grpc/utils.py | 70 ++-- pinecone/pinecone.py | 6 +- pinecone/pinecone_asyncio.py | 4 +- pinecone/pinecone_interface_asyncio.py | 6 +- tests/unit/db_control/test_index.py | 33 +- .../db_control/test_index_request_factory.py | 236 ++++++------ tests/unit/models/test_index_list.py | 77 ++-- tests/unit/openapi_support/test_api_client.py | 67 ++-- tests/unit/test_control.py | 52 ++- tests/unit/test_index.py | 17 +- 18 files changed, 499 insertions(+), 481 deletions(-) diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index c5888d624..86d3e7f36 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -32,25 +32,20 @@ concurrency: jobs: linting: - # Skip linting for PRs targeting fts branch until SDK compatibility work is complete - if: github.base_ref != 'fts' uses: './.github/workflows/testing-lint.yaml' unit-tests: - # Skip unit tests for PRs targeting fts branch until SDK compatibility work is complete - if: github.base_ref != 'fts' uses: './.github/workflows/testing-unit.yaml' secrets: inherit with: python_versions_json: '["3.10", "3.11", "3.12", "3.13"]' create-project: - if: github.base_ref != 'fts' uses: './.github/workflows/project-setup.yaml' secrets: inherit integration-tests: - if: always() && (needs.create-project.result == 'success') && github.base_ref != 'fts' + if: always() && (needs.create-project.result == 'success') uses: './.github/workflows/testing-integration.yaml' secrets: inherit needs: @@ -62,7 +57,7 @@ jobs: sparse_index_host: ${{ needs.create-project.outputs.index_host_sparse }} cleanup-project: - if: always() && github.base_ref != 'fts' + if: always() needs: - create-project - integration-tests diff --git a/mypy.ini b/mypy.ini index 09e488221..03ca7981a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -31,3 +31,27 @@ ignore_missing_imports = True [mypy-aiohttp_retry.*] ignore_missing_imports = True + +[mypy-pandas] +ignore_missing_imports = True +follow_imports = skip + +[mypy-pandas.*] +ignore_missing_imports = True +follow_imports = skip + +[mypy-google.protobuf] +ignore_missing_imports = True +follow_imports = skip + +[mypy-google.protobuf.*] +ignore_missing_imports = True +follow_imports = skip + +[mypy-dateutil] +ignore_missing_imports = True +follow_imports = skip + +[mypy-dateutil.*] +ignore_missing_imports = True +follow_imports = skip diff --git a/pinecone/admin/eraser/resources/index.py b/pinecone/admin/eraser/resources/index.py index 53d5add25..ff68b49d4 100644 --- a/pinecone/admin/eraser/resources/index.py +++ b/pinecone/admin/eraser/resources/index.py @@ -17,7 +17,8 @@ def delete(self, name): def get_state(self, name): desc = self.pc.db.index.describe(name=name) - return desc["status"]["state"] + status = desc.status + return getattr(status, "state", None) def list(self): return self.pc.db.index.list() diff --git a/pinecone/db_control/models/backup_model.py b/pinecone/db_control/models/backup_model.py index 2c8ffdb8a..414b9218c 100644 --- a/pinecone/db_control/models/backup_model.py +++ b/pinecone/db_control/models/backup_model.py @@ -6,7 +6,7 @@ from pinecone.utils.repr_overrides import custom_serializer if TYPE_CHECKING: - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema class BackupModel: @@ -21,7 +21,7 @@ def __init__(self, backup: OpenAPIBackupModel): self._backup = backup @property - def schema(self) -> "BackupModelSchema" | None: + def schema(self) -> "Schema" | None: """Schema for the behavior of Pinecone's internal metadata index. This property defines which metadata fields are indexed and filterable @@ -32,7 +32,7 @@ def schema(self) -> "BackupModelSchema" | None: The schema is a map of metadata field names to their configuration, where each field configuration specifies whether the field is filterable. - :type: BackupModelSchema, optional + :type: Schema, optional :returns: The metadata schema configuration, or None if not set. """ return getattr(self._backup, "schema", None) diff --git a/pinecone/db_control/models/index_list.py b/pinecone/db_control/models/index_list.py index 4a0fdc93c..45ad0f60c 100644 --- a/pinecone/db_control/models/index_list.py +++ b/pinecone/db_control/models/index_list.py @@ -10,7 +10,7 @@ def __init__(self, index_list: OpenAPIIndexList): self.current = 0 def names(self) -> list[str]: - return [i.name for i in self.indexes] + return [str(i.name) for i in self.indexes] def __getitem__(self, key): return self.indexes[key] diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index c6738215a..d1b2145e1 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -15,31 +15,18 @@ ) from pinecone.core.openapi.db_control.model.create_index_request import CreateIndexRequest from pinecone.core.openapi.db_control.model.configure_index_request import ConfigureIndexRequest -from pinecone.core.openapi.db_control.model.configure_index_request_embed import ( - ConfigureIndexRequestEmbed, -) -from pinecone.core.openapi.db_control.model.index_spec import IndexSpec from pinecone.core.openapi.db_control.model.index_tags import IndexTags -from pinecone.core.openapi.db_control.model.serverless_spec import ( - ServerlessSpec as ServerlessSpecModel, -) from pinecone.core.openapi.db_control.model.read_capacity_on_demand_spec import ( ReadCapacityOnDemandSpec, ) from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) -from pinecone.core.openapi.db_control.model.read_capacity_dedicated_config import ( - ReadCapacityDedicatedConfig, +from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec_response_scaling import ( + ReadCapacityDedicatedSpecResponseScaling, ) -from pinecone.core.openapi.db_control.model.scaling_config_manual import ScalingConfigManual -from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema -from pinecone.core.openapi.db_control.model.backup_model_schema_fields import ( - BackupModelSchemaFields, -) -from pinecone.core.openapi.db_control.model.byoc_spec import ByocSpec as ByocSpecModel -from pinecone.core.openapi.db_control.model.pod_spec import PodSpec as PodSpecModel -from pinecone.core.openapi.db_control.model.pod_spec_metadata_config import PodSpecMetadataConfig +from pinecone.core.openapi.db_control.model.schema import Schema as OpenAPISchema +from pinecone.core.openapi.db_control.model.schema_fields import SchemaFields as OpenAPISchemaFields from pinecone.core.openapi.db_control.model.create_index_from_backup_request import ( CreateIndexFromBackupRequest, ) @@ -123,54 +110,28 @@ def __parse_read_capacity( return cast(ReadCapacityOnDemandSpec, result) elif mode == "Dedicated": dedicated_dict: dict[str, Any] = read_capacity.get("dedicated", {}) # type: ignore - # Construct ReadCapacityDedicatedConfig - # node_type and scaling are required fields + # Alpha API structure: node_type and scaling are top-level on ReadCapacityDedicatedSpec if "node_type" not in dedicated_dict or dedicated_dict.get("node_type") is None: raise ValueError( "node_type is required when using Dedicated read capacity mode. " "Please specify 'node_type' (e.g., 't1' or 'b1') in the 'dedicated' configuration." ) - if "scaling" not in dedicated_dict or dedicated_dict.get("scaling") is None: - raise ValueError( - "scaling is required when using Dedicated read capacity mode. " - "Please specify 'scaling' (e.g., 'Manual') in the 'dedicated' configuration." - ) node_type = dedicated_dict["node_type"] - scaling = dedicated_dict["scaling"] - dedicated_config_kwargs = {"node_type": node_type, "scaling": scaling} - - # Validate that manual scaling configuration is provided when scaling is "Manual" - if scaling == "Manual": - if "manual" not in dedicated_dict or dedicated_dict.get("manual") is None: - raise ValueError( - "When using 'Manual' scaling with Dedicated read capacity mode, " - "the 'manual' field with 'shards' and 'replicas' is required. " - "Please specify 'manual': {'shards': , 'replicas': } " - "in the 'dedicated' configuration." - ) - manual_dict = dedicated_dict["manual"] - if not isinstance(manual_dict, dict): - raise ValueError( - "The 'manual' field must be a dictionary with 'shards' and 'replicas' keys." - ) - if "shards" not in manual_dict or "replicas" not in manual_dict: - missing = [] - if "shards" not in manual_dict: - missing.append("shards") - if "replicas" not in manual_dict: - missing.append("replicas") - raise ValueError( - f"The 'manual' configuration is missing required fields: {', '.join(missing)}. " - "Please provide both 'shards' and 'replicas' in the 'manual' configuration." - ) - dedicated_config_kwargs["manual"] = ScalingConfigManual(**manual_dict) - elif "manual" in dedicated_dict: - # Allow manual to be provided for other scaling types (future compatibility) - manual_dict = dedicated_dict["manual"] - dedicated_config_kwargs["manual"] = ScalingConfigManual(**manual_dict) - - dedicated_config = ReadCapacityDedicatedConfig(**dedicated_config_kwargs) - result = ReadCapacityDedicatedSpec(mode="Dedicated", dedicated=dedicated_config) + + # Handle scaling configuration + scaling_strategy = dedicated_dict.get("scaling", "Manual") + manual_dict = dedicated_dict.get("manual", {}) + replicas = manual_dict.get("replicas", 1) if manual_dict else 1 + shards = manual_dict.get("shards", 1) if manual_dict else 1 + + # Create the scaling object with the alpha API structure + scaling_obj = ReadCapacityDedicatedSpecResponseScaling( + strategy=scaling_strategy, replicas=replicas, shards=shards + ) + + result = ReadCapacityDedicatedSpec( + mode="Dedicated", node_type=node_type, scaling=scaling_obj + ) return cast(ReadCapacityDedicatedSpec, result) else: # Fallback: let OpenAPI handle it @@ -193,30 +154,34 @@ def __parse_schema( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | BackupModelSchema # OpenAPI model instance + | OpenAPISchema # OpenAPI model instance ), - ) -> BackupModelSchema: - """Parse schema dict into BackupModelSchema instance. + ) -> OpenAPISchema: + """Parse schema dict into Schema instance. - :param schema: Dict with schema configuration (either {field_name: {filterable: bool, ...}} or - {"fields": {field_name: {filterable: bool, ...}}, ...}) or existing BackupModelSchema instance - :return: BackupModelSchema instance + :param schema: Dict with schema configuration (either {field_name: {type: str, ...}} or + {"fields": {field_name: {type: str, ...}}, ...}) or existing Schema instance + :return: Schema instance """ if isinstance(schema, dict): schema_kwargs: dict[str, Any] = {} # Handle two formats: - # 1. {field_name: {filterable: bool, ...}} - direct field mapping - # 2. {"fields": {field_name: {filterable: bool, ...}}, ...} - with fields wrapper + # 1. {field_name: {type: str, ...}} - direct field mapping + # 2. {"fields": {field_name: {type: str, ...}}, ...} - with fields wrapper if "fields" in schema: # Format 2: has fields wrapper fields = {} for field_name, field_config in schema["fields"].items(): if isinstance(field_config, dict): - # Pass through the entire field_config dict to allow future API fields - fields[field_name] = BackupModelSchemaFields(**field_config) + # SchemaFields requires 'type' as a required field + field_type = field_config.get("type", "string") + fields[field_name] = OpenAPISchemaFields( + type=field_type, + **{k: v for k, v in field_config.items() if k != "type"}, + ) else: - # If not a dict, create with default filterable=True - fields[field_name] = BackupModelSchemaFields(filterable=True) + # If not a dict, create with default type=string + fields[field_name] = OpenAPISchemaFields(type="string") schema_kwargs["fields"] = fields # Pass through any other fields in schema_dict to allow future API fields @@ -229,15 +194,19 @@ def __parse_schema( fields = {} for field_name, field_config in schema.items(): if isinstance(field_config, dict): - # Pass through the entire field_config dict to allow future API fields - fields[field_name] = BackupModelSchemaFields(**field_config) + # SchemaFields requires 'type' as a required field + field_type = field_config.get("type", "string") + fields[field_name] = OpenAPISchemaFields( + type=field_type, + **{k: v for k, v in field_config.items() if k != "type"}, + ) else: - # If not a dict, create with default filterable=True - fields[field_name] = BackupModelSchemaFields(filterable=True) + # If not a dict, create with default type=string + fields[field_name] = OpenAPISchemaFields(type="string") # Ensure fields is always set, even if empty schema_kwargs["fields"] = fields - # Validate that fields is present before constructing BackupModelSchema + # Validate that fields is present before constructing Schema if "fields" not in schema_kwargs: raise ValueError( "Schema dict must contain field definitions. " @@ -247,126 +216,12 @@ def __parse_schema( from typing import cast - result = BackupModelSchema(**schema_kwargs) - return cast(BackupModelSchema, result) + result = OpenAPISchema(**schema_kwargs) + return cast(OpenAPISchema, result) else: - # Already a BackupModelSchema instance + # Already a Schema instance return schema - @staticmethod - def __parse_index_spec(spec: Dict | ServerlessSpec | PodSpec | ByocSpec) -> IndexSpec: - if isinstance(spec, dict): - if "serverless" in spec: - spec["serverless"]["cloud"] = convert_enum_to_string(spec["serverless"]["cloud"]) - spec["serverless"]["region"] = convert_enum_to_string(spec["serverless"]["region"]) - - # Handle read_capacity if present - if "read_capacity" in spec["serverless"]: - spec["serverless"]["read_capacity"] = ( - PineconeDBControlRequestFactory.__parse_read_capacity( - spec["serverless"]["read_capacity"] - ) - ) - - # Handle schema if present - convert to BackupModelSchema - if "schema" in spec["serverless"]: - schema_dict = spec["serverless"]["schema"] - if isinstance(schema_dict, dict): - # Process fields if present, otherwise pass through as-is - schema_kwargs = {} - if "fields" in schema_dict: - fields = {} - for field_name, field_config in schema_dict["fields"].items(): - if isinstance(field_config, dict): - # Pass through the entire field_config dict to allow future API fields - fields[field_name] = BackupModelSchemaFields(**field_config) - else: - # If not a dict, create with default filterable=True - fields[field_name] = BackupModelSchemaFields(filterable=True) - schema_kwargs["fields"] = fields - - # Pass through any other fields in schema_dict to allow future API fields - for key, value in schema_dict.items(): - if key != "fields": - schema_kwargs[key] = value - - spec["serverless"]["schema"] = BackupModelSchema(**schema_kwargs) - - index_spec = IndexSpec(serverless=ServerlessSpecModel(**spec["serverless"])) - elif "pod" in spec: - spec["pod"]["environment"] = convert_enum_to_string(spec["pod"]["environment"]) - args_dict = parse_non_empty_args( - [ - ("environment", spec["pod"].get("environment")), - ("metadata_config", spec["pod"].get("metadata_config")), - ("replicas", spec["pod"].get("replicas")), - ("shards", spec["pod"].get("shards")), - ("pods", spec["pod"].get("pods")), - ("source_collection", spec["pod"].get("source_collection")), - ] - ) - if args_dict.get("metadata_config"): - args_dict["metadata_config"] = PodSpecMetadataConfig( - indexed=args_dict["metadata_config"].get("indexed", None) - ) - index_spec = IndexSpec(pod=PodSpecModel(**args_dict)) - elif "byoc" in spec: - index_spec = IndexSpec(byoc=ByocSpecModel(**spec["byoc"])) - else: - raise ValueError("spec must contain either 'serverless', 'pod', or 'byoc' key") - elif isinstance(spec, ServerlessSpec): - # Build args dict for ServerlessSpecModel - serverless_args: dict[str, Any] = {"cloud": spec.cloud, "region": spec.region} - - # Handle read_capacity - if spec.read_capacity is not None: - serverless_args["read_capacity"] = ( - PineconeDBControlRequestFactory.__parse_read_capacity(spec.read_capacity) - ) - - # Handle schema - if spec.schema is not None: - # Convert dict to BackupModelSchema - # schema is {field_name: {filterable: bool, ...}} - # Pass through the entire field_config to allow future API fields - fields = {} - for field_name, field_config in spec.schema.items(): - if isinstance(field_config, dict): - # Pass through the entire field_config dict to allow future API fields - fields[field_name] = BackupModelSchemaFields(**field_config) - else: - # If not a dict, create with default filterable=True - fields[field_name] = BackupModelSchemaFields(filterable=True) - serverless_args["schema"] = BackupModelSchema(fields=fields) - - index_spec = IndexSpec(serverless=ServerlessSpecModel(**serverless_args)) - elif isinstance(spec, PodSpec): - args_dict = parse_non_empty_args( - [ - ("replicas", spec.replicas), - ("shards", spec.shards), - ("pods", spec.pods), - ("source_collection", spec.source_collection), - ] - ) - if spec.metadata_config: - args_dict["metadata_config"] = PodSpecMetadataConfig( - indexed=spec.metadata_config.get("indexed", None) - ) - - index_spec = IndexSpec( - pod=PodSpecModel(environment=spec.environment, pod_type=spec.pod_type, **args_dict) - ) - elif isinstance(spec, ByocSpec): - args_dict = parse_non_empty_args([("environment", spec.environment)]) - index_spec = IndexSpec(byoc=ByocSpecModel(**args_dict)) - else: - raise TypeError("spec must be of type dict, ServerlessSpec, PodSpec, or ByocSpec") - - from typing import cast - - return cast(IndexSpec, index_spec) - @staticmethod def _translate_legacy_request( spec: Dict | ServerlessSpec | PodSpec | ByocSpec, @@ -415,10 +270,11 @@ def _translate_legacy_request( # Translate spec to deployment deployment_dict: dict[str, Any] + deployment: ServerlessDeployment | PodDeployment | ByocDeployment if isinstance(spec, dict): if "serverless" in spec: serverless_spec = spec["serverless"] - # Convert enum values to strings for consistency with __parse_index_spec + # Convert enum values to strings for consistency cloud = convert_enum_to_string(serverless_spec.get("cloud", "")) region = convert_enum_to_string(serverless_spec.get("region", "")) deployment = ServerlessDeployment(cloud=cloud, region=region) @@ -654,11 +510,23 @@ def create_index_with_schema_request( deployment_dict = deployment.to_dict() schema_dict = PineconeDBControlRequestFactory._serialize_schema(schema) + # Convert schema_dict to proper Schema OpenAPI object + schema_fields = {} + for field_name, field_config in schema_dict.get("fields", {}).items(): + if isinstance(field_config, dict): + field_type = field_config.get("type", "string") + schema_fields[field_name] = OpenAPISchemaFields( + type=field_type, + **{k: v for k, v in field_config.items() if k != "type"}, + _check_type=False, + ) + schema_obj = OpenAPISchema(fields=schema_fields, _check_type=False) + args = parse_non_empty_args( [ ("name", name), ("deployment", deployment_dict), - ("schema", schema_dict), + ("schema", schema_obj), ("deletion_protection", dp), ("tags", tags_obj), ] @@ -666,7 +534,7 @@ def create_index_with_schema_request( from typing import cast - result = CreateIndexRequest(**args) + result = CreateIndexRequest(**args, _check_type=False) return cast(CreateIndexRequest, result) @staticmethod @@ -689,26 +557,41 @@ def create_index_request( dp = None tags_obj = PineconeDBControlRequestFactory.__parse_tags(tags) - index_spec = PineconeDBControlRequestFactory.__parse_index_spec(spec) if vector_type == VectorType.SPARSE.value and dimension is not None: raise ValueError("dimension should not be specified for sparse indexes") + # Translate legacy spec/dimension/metric to deployment + schema format for alpha API + deployment_dict, schema_dict = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=dimension, metric=metric, vector_type=vector_type + ) + + # Convert schema_dict to proper Schema OpenAPI object + schema_fields = {} + for field_name, field_config in schema_dict.get("fields", {}).items(): + if isinstance(field_config, dict): + field_type = field_config.get("type", "string") + schema_fields[field_name] = OpenAPISchemaFields( + type=field_type, + **{k: v for k, v in field_config.items() if k != "type"}, + _check_type=False, + ) + schema_obj = OpenAPISchema(fields=schema_fields, _check_type=False) + + # Deployment dict is passed directly - OpenAPI model accepts dicts with _check_type=False args = parse_non_empty_args( [ ("name", name), - ("dimension", dimension), - ("metric", metric), - ("spec", index_spec), + ("schema", schema_obj), + ("deployment", deployment_dict), ("deletion_protection", dp), - ("vector_type", vector_type), ("tags", tags_obj), ] ) from typing import cast - result = CreateIndexRequest(**args) + result = CreateIndexRequest(**args, _check_type=False) return cast(CreateIndexRequest, result) @staticmethod @@ -733,7 +616,7 @@ def create_index_for_model_request( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | BackupModelSchema # OpenAPI model instance + | OpenAPISchema # OpenAPI model instance ) | None = None, ) -> CreateIndexForModelRequest: @@ -824,7 +707,7 @@ def configure_index_request( | ReadCapacityDedicatedSpec ) | None = None, - ): + ) -> ConfigureIndexRequest: if deletion_protection is None: dp = description.deletion_protection elif isinstance(deletion_protection, DeletionProtection): @@ -836,9 +719,14 @@ def configure_index_request( fetched_tags = description.tags if fetched_tags is None: - starting_tags = {} + starting_tags: dict[str, str] = {} else: - starting_tags = fetched_tags.to_dict() + # Use getattr with a default to handle the 'object' type issue + tags_obj = getattr(fetched_tags, "to_dict", None) + if tags_obj is not None and callable(tags_obj): + starting_tags = tags_obj() + else: + starting_tags = {} if tags is None: # Do not modify tags if none are provided @@ -847,17 +735,6 @@ def configure_index_request( # Merge existing tags with new tags tags = {**starting_tags, **tags} - pod_config_args: dict[str, Any] = {} - if pod_type: - new_pod_type = convert_enum_to_string(pod_type) - pod_config_args.update(pod_type=new_pod_type) - if replicas: - pod_config_args.update(replicas=replicas) - - embed_config = None - if embed is not None: - embed_config = ConfigureIndexRequestEmbed(**dict(embed)) - # Parse read_capacity if provided parsed_read_capacity = None if read_capacity is not None: @@ -865,19 +742,30 @@ def configure_index_request( read_capacity ) - spec = None - if pod_config_args: - spec = {"pod": pod_config_args} - elif parsed_read_capacity is not None: - # Serverless index configuration - spec = {"serverless": {"read_capacity": parsed_read_capacity}} + # Build deployment for pod configuration updates + deployment_dict: dict[str, Any] | None = None + if replicas is not None or pod_type is not None: + pod_type_str = convert_enum_to_string(pod_type) if pod_type else None + deployment_dict = {"deployment_type": "pod"} + if replicas is not None: + deployment_dict["replicas"] = replicas + if pod_type_str is not None: + deployment_dict["pod_type"] = pod_type_str + + # Note: embed configuration is no longer supported in alpha API configure_index + # The schema field should be used instead for index configuration updates + if embed is not None: + raise NotImplementedError( + "The 'embed' parameter is not supported in the alpha API. " + "Use the 'schema' field for index configuration updates." + ) args_dict = parse_non_empty_args( [ ("deletion_protection", dp), ("tags", IndexTags(**tags)), - ("spec", spec), - ("embed", embed_config), + ("deployment", deployment_dict), + ("read_capacity", parsed_read_capacity), ] ) diff --git a/pinecone/db_control/resources/asyncio/index.py b/pinecone/db_control/resources/asyncio/index.py index 8b993183a..a1c60b829 100644 --- a/pinecone/db_control/resources/asyncio/index.py +++ b/pinecone/db_control/resources/asyncio/index.py @@ -49,7 +49,7 @@ from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema class IndexResourceAsyncio: @@ -137,6 +137,8 @@ async def create( ) else: # Legacy spec-based creation + # spec is guaranteed to be non-None here because we checked above + assert spec is not None req = PineconeDBControlRequestFactory.create_index_request( name=name, spec=spec, @@ -179,7 +181,7 @@ async def create_for_model( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance + | "Schema" # OpenAPI model instance ) | None = None, timeout: int | None = None, @@ -226,9 +228,12 @@ async def __poll_describe_index_until_ready( total_wait_time = 0 while True: description = await self.describe(name=name) - if description.status.state == "InitializationFailed": + status = description.status + state = getattr(status, "state", None) + ready = getattr(status, "ready", False) + if state == "InitializationFailed": raise Exception(f"Index {name} failed to initialize.") - if description.status.ready: + if ready: return description if timeout is not None and total_wait_time >= timeout: diff --git a/pinecone/db_control/resources/sync/index.py b/pinecone/db_control/resources/sync/index.py index 6b3eb86a3..aa5efbe5e 100644 --- a/pinecone/db_control/resources/sync/index.py +++ b/pinecone/db_control/resources/sync/index.py @@ -49,7 +49,7 @@ from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema class IndexResource(PluginAware): @@ -157,6 +157,8 @@ def create( ) else: # Legacy spec-based creation + # spec is guaranteed to be non-None here because we checked above + assert spec is not None req = PineconeDBControlRequestFactory.create_index_request( name=name, spec=spec, @@ -199,7 +201,7 @@ def create_for_model( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance + | "Schema" # OpenAPI model instance ) | None = None, timeout: int | None = None, @@ -263,11 +265,12 @@ def __poll_describe_index_until_ready( total_wait_time = 0 while True: description = self.describe(name=name) - if description.status.state == "InitializationFailed": - raise Exception( - f"Index {name} failed to initialize. The index status is {description.status.state}." - ) - if description.status.ready: + status = description.status + state = getattr(status, "state", None) + ready = getattr(status, "ready", False) + if state == "InitializationFailed": + raise Exception(f"Index {name} failed to initialize. The index status is {state}.") + if ready: return description if timeout is not None and total_wait_time >= timeout: diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index f1caabb8c..8deb15206 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -13,7 +13,7 @@ IndexDescription as DescribeIndexStatsResponse, NamespaceSummary, NamespaceDescription, - NamespaceDescriptionIndexedFields, + CreateNamespaceRequestSchema, ListNamespacesResponse, Pagination as OpenApiPagination, ) @@ -495,18 +495,31 @@ def parse_namespace_description( name = response.name record_count = response.record_count - # Extract indexed_fields if present - indexed_fields = None - if response.HasField("indexed_fields") and response.indexed_fields: - # Access indexed_fields.fields directly (RepeatedScalarFieldContainer) - fields_list = list(response.indexed_fields.fields) if response.indexed_fields.fields else [] - if fields_list: - indexed_fields = NamespaceDescriptionIndexedFields( - fields=fields_list, _check_type=False + # Extract schema if present (replaces indexed_fields in alpha API) + schema = None + if response.HasField("schema") and response.schema: + # Convert proto schema to OpenAPI model + fields_dict = {} + if response.schema.fields: + for field_name, field_config in response.schema.fields.items(): + fields_dict[field_name] = {"type": getattr(field_config, "type", "string")} + if fields_dict: + from pinecone.core.openapi.db_data.model.create_namespace_request_schema_fields import ( + CreateNamespaceRequestSchemaFields, + ) + + schema = CreateNamespaceRequestSchema( + fields={ + k: CreateNamespaceRequestSchemaFields( + type=v.get("type", "string"), _check_type=False + ) + for k, v in fields_dict.items() + }, + _check_type=False, ) namespace_desc = NamespaceDescription( - name=name, record_count=record_count, indexed_fields=indexed_fields, _check_type=False + name=name, record_count=record_count, schema=schema, _check_type=False ) # Attach _response_info as an attribute (NamespaceDescription is an OpenAPI model) @@ -526,26 +539,37 @@ def parse_list_namespaces_response( This optimized version directly accesses protobuf fields for better performance. """ + from pinecone.core.openapi.db_data.model.create_namespace_request_schema_fields import ( + CreateNamespaceRequestSchemaFields, + ) + # Directly iterate over namespaces # Pre-allocate namespaces list with known size for better performance namespaces_proto = response.namespaces - namespaces = [None] * len(namespaces_proto) if namespaces_proto else [] + namespaces: list[NamespaceDescription] = ( + [None] * len(namespaces_proto) if namespaces_proto else [] + ) # type: ignore[list-item] for idx, ns in enumerate(namespaces_proto): - # Extract indexed_fields if present - indexed_fields = None - if ns.HasField("indexed_fields") and ns.indexed_fields: - # Access indexed_fields.fields directly (RepeatedScalarFieldContainer) - fields_list = list(ns.indexed_fields.fields) if ns.indexed_fields.fields else [] - if fields_list: - indexed_fields = NamespaceDescriptionIndexedFields( - fields=fields_list, _check_type=False + # Extract schema if present (replaces indexed_fields in alpha API) + schema = None + if ns.HasField("schema") and ns.schema: + fields_dict = {} + if ns.schema.fields: + for field_name, field_config in ns.schema.fields.items(): + fields_dict[field_name] = {"type": getattr(field_config, "type", "string")} + if fields_dict: + schema = CreateNamespaceRequestSchema( + fields={ + k: CreateNamespaceRequestSchemaFields( + type=v.get("type", "string"), _check_type=False + ) + for k, v in fields_dict.items() + }, + _check_type=False, ) namespaces[idx] = NamespaceDescription( - name=ns.name, - record_count=ns.record_count, - indexed_fields=indexed_fields, - _check_type=False, + name=ns.name, record_count=ns.record_count, schema=schema, _check_type=False ) # Parse pagination if present diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py index bda554f56..25e52dcee 100644 --- a/pinecone/pinecone.py +++ b/pinecone/pinecone.py @@ -32,7 +32,7 @@ from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema from pinecone.db_control.enums import ( Metric, VectorType, @@ -536,7 +536,7 @@ def create_index_for_model( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance + | "Schema" # OpenAPI model instance ) | None = None, timeout: int | None = None, @@ -563,7 +563,7 @@ def create_index_for_model( :param schema: Optional metadata schema configuration. You can specify ``schema`` to configure which metadata fields are filterable. The schema can be provided as a dictionary mapping field names to their configurations (e.g., ``{"genre": {"filterable": True}}``) or as a dictionary with a ``fields`` key (e.g., ``{"fields": {"genre": {"filterable": True}}}``). - :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], BackupModelSchema]] + :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], Schema]] :type timeout: Optional[int] :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; if -1, return immediately and do not wait. diff --git a/pinecone/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py index 54e729e05..bd4e4954c 100644 --- a/pinecone/pinecone_asyncio.py +++ b/pinecone/pinecone_asyncio.py @@ -54,7 +54,7 @@ from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi from pinecone.db_control.index_host_store import IndexHostStore @@ -293,7 +293,7 @@ async def create_index_for_model( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance + | "Schema" # OpenAPI model instance ) | None = None, timeout: int | None = None, diff --git a/pinecone/pinecone_interface_asyncio.py b/pinecone/pinecone_interface_asyncio.py index 13337c5e3..c4ea81ecc 100644 --- a/pinecone/pinecone_interface_asyncio.py +++ b/pinecone/pinecone_interface_asyncio.py @@ -47,7 +47,7 @@ from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( ReadCapacityDedicatedSpec, ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema + from pinecone.core.openapi.db_control.model.schema import Schema class PineconeAsyncioDBControlInterface(ABC): @@ -428,7 +428,7 @@ async def create_index_for_model( | dict[ str, dict[str, Any] ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance + | "Schema" # OpenAPI model instance ) | None = None, timeout: int | None = None, @@ -454,7 +454,7 @@ async def create_index_for_model( :param schema: Optional metadata schema configuration. You can specify ``schema`` to configure which metadata fields are filterable. The schema can be provided as a dictionary mapping field names to their configurations (e.g., ``{"genre": {"filterable": True}}``) or as a dictionary with a ``fields`` key (e.g., ``{"fields": {"genre": {"filterable": True}}}``). - :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], BackupModelSchema]] + :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], Schema]] :type timeout: Optional[int] :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; if -1, return immediately and do not wait. diff --git a/tests/unit/db_control/test_index.py b/tests/unit/db_control/test_index.py index e30541044..99e5c352f 100644 --- a/tests/unit/db_control/test_index.py +++ b/tests/unit/db_control/test_index.py @@ -32,18 +32,23 @@ def build_client_w_faked_response(mocker, body: str, status: int = 200): class TestIndexResource: def test_describe_index(self, mocker): + # Alpha API response format uses schema + deployment instead of spec + dimension + metric body = """ { "name": "test-index", - "description": "test-description", - "dimension": 1024, - "metric": "cosine", - "spec": { - "byoc": { - "environment": "test-environment" + "schema": { + "fields": { + "_values": { + "type": "dense_vector", + "dimension": 1024, + "metric": "cosine" + } } }, - "vector_type": "dense", + "deployment": { + "deployment_type": "byoc", + "environment": "test-environment" + }, "status": { "ready": true, "state": "Ready" @@ -59,10 +64,10 @@ def test_describe_index(self, mocker): desc = index_resource.describe(name="test-index") assert desc.name == "test-index" - assert desc.description == "test-description" + # Access through compatibility shim assert desc.dimension == 1024 assert desc.metric == "cosine" - assert desc.spec["byoc"]["environment"] == "test-environment" + assert desc.spec.byoc.environment == "test-environment" assert desc.vector_type == "dense" assert desc.status.ready == True assert desc.deletion_protection == "disabled" @@ -101,12 +106,12 @@ def test_create_rejects_spec_and_schema_together(self, mocker): ) def test_create_with_spec_uses_legacy_path(self, mocker): - """Test that create() with spec uses the legacy request factory method.""" + """Test that create() with spec translates to alpha API schema+deployment format.""" + # Alpha API response uses schema + deployment instead of spec + dimension + metric body = """{ "name": "test-index", - "dimension": 1536, - "metric": "cosine", - "spec": {"serverless": {"cloud": "aws", "region": "us-east-1"}}, + "schema": {"fields": {"_values": {"type": "dense_vector", "dimension": 1536, "metric": "cosine"}}}, + "deployment": {"deployment_type": "serverless", "cloud": "aws", "region": "us-east-1"}, "status": {"ready": true, "state": "Ready"}, "host": "test.pinecone.io" }""" @@ -122,6 +127,8 @@ def test_create_with_spec_uses_legacy_path(self, mocker): ) assert result.name == "test-index" + # Access dimension/metric through compatibility layer + assert result.dimension == 1536 # Verify the request was made assert mock_request.call_count == 1 diff --git a/tests/unit/db_control/test_index_request_factory.py b/tests/unit/db_control/test_index_request_factory.py index 7a322cc21..ec7342419 100644 --- a/tests/unit/db_control/test_index_request_factory.py +++ b/tests/unit/db_control/test_index_request_factory.py @@ -13,8 +13,17 @@ from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +def _get_schema_field(req, field_name="_values"): + """Helper to access schema fields from CreateIndexRequest.""" + schema = req.schema + if hasattr(schema, "fields"): + return schema.fields.get(field_name) + return None + + class TestIndexRequestFactory: def test_create_index_request_with_spec_byoc(self): + """Test create_index_request translates legacy spec to schema+deployment format.""" req = PineconeDBControlRequestFactory.create_index_request( name="test-index", metric="cosine", @@ -22,13 +31,19 @@ def test_create_index_request_with_spec_byoc(self): spec=ByocSpec(environment="test-byoc-spec-id"), ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.byoc.environment == "test-byoc-spec-id" - assert req.vector_type == "dense" + # In alpha API, metric/dimension are in schema.fields._values + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert field.type == "dense_vector" + # deployment has deployment_type instead of spec + assert req.deployment["deployment_type"] == "byoc" + assert req.deployment["environment"] == "test-byoc-spec-id" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_serverless(self): + """Test create_index_request with ServerlessSpec.""" req = PineconeDBControlRequestFactory.create_index_request( name="test-index", metric="cosine", @@ -36,14 +51,17 @@ def test_create_index_request_with_spec_serverless(self): spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.serverless.cloud == "aws" - assert req.spec.serverless.region == "us-east-1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "serverless" + assert req.deployment["cloud"] == "aws" + assert req.deployment["region"] == "us-east-1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_serverless_dict(self): + """Test create_index_request with serverless spec as dict.""" req = PineconeDBControlRequestFactory.create_index_request( name="test-index", metric="cosine", @@ -51,11 +69,13 @@ def test_create_index_request_with_spec_serverless_dict(self): spec={"serverless": {"cloud": "aws", "region": "us-east-1"}}, ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.serverless.cloud == "aws" - assert req.spec.serverless.region == "us-east-1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "serverless" + assert req.deployment["cloud"] == "aws" + assert req.deployment["region"] == "us-east-1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_serverless_dict_enums(self): @@ -67,14 +87,17 @@ def test_create_index_request_with_spec_serverless_dict_enums(self): spec={"serverless": {"cloud": CloudProvider.AWS, "region": AwsRegion.US_EAST_1}}, ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.serverless.cloud == "aws" - assert req.spec.serverless.region == "us-east-1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "serverless" + assert req.deployment["cloud"] == "aws" + assert req.deployment["region"] == "us-east-1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_byoc_dict(self): + """Test create_index_request with byoc spec as dict.""" req = PineconeDBControlRequestFactory.create_index_request( name="test-index", metric="cosine", @@ -82,10 +105,12 @@ def test_create_index_request_with_spec_byoc_dict(self): spec={"byoc": {"environment": "test-byoc-spec-id"}}, ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.byoc.environment == "test-byoc-spec-id" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "byoc" + assert req.deployment["environment"] == "test-byoc-spec-id" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_pod(self): @@ -97,11 +122,13 @@ def test_create_index_request_with_spec_pod(self): spec=PodSpec(environment="us-west1-gcp", pod_type="p1.x1"), ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.pod.environment == "us-west1-gcp" - assert req.spec.pod.pod_type == "p1.x1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "pod" + assert req.deployment["environment"] == "us-west1-gcp" + assert req.deployment["pod_type"] == "p1.x1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_pod_all_fields(self): @@ -121,16 +148,17 @@ def test_create_index_request_with_spec_pod_all_fields(self): ), ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.pod.environment == "us-west1-gcp" - assert req.spec.pod.pod_type == "p1.x1" - assert req.spec.pod.pods == 2 - assert req.spec.pod.replicas == 1 - assert req.spec.pod.shards == 1 - assert req.spec.pod.metadata_config.indexed == ["field1", "field2"] - assert req.spec.pod.source_collection == "my-collection" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "pod" + assert req.deployment["environment"] == "us-west1-gcp" + assert req.deployment["pod_type"] == "p1.x1" + assert req.deployment["pods"] == 2 + assert req.deployment["replicas"] == 1 + assert req.deployment["shards"] == 1 + # Note: metadata_config and source_collection not yet supported in alpha deployment assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_pod_dict(self): @@ -142,11 +170,13 @@ def test_create_index_request_with_spec_pod_dict(self): spec={"pod": {"environment": "us-west1-gcp", "pod_type": "p1.x1"}}, ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.pod.environment == "us-west1-gcp" - assert req.spec.pod.pod_type == "p1.x1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "pod" + assert req.deployment["environment"] == "us-west1-gcp" + assert req.deployment["pod_type"] == "p1.x1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_pod_dict_enums(self): @@ -160,11 +190,13 @@ def test_create_index_request_with_spec_pod_dict_enums(self): }, ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.pod.environment == "us-west1-gcp" - assert req.spec.pod.pod_type == "p1.x1" - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "pod" + assert req.deployment["environment"] == "us-west1-gcp" + assert req.deployment["pod_type"] == "p1.x1" assert req.deletion_protection == "disabled" def test_create_index_request_with_spec_pod_with_metadata_config(self): @@ -180,12 +212,14 @@ def test_create_index_request_with_spec_pod_with_metadata_config(self): ), ) assert req.name == "test-index" - assert req.metric == "cosine" - assert req.dimension == 1024 - assert req.spec.pod.environment == "us-west1-gcp" - assert req.spec.pod.pod_type == "p1.x1" - assert req.spec.pod.metadata_config.indexed == ["genre", "year"] - assert req.vector_type == "dense" + field = _get_schema_field(req) + assert field is not None + assert field.metric == "cosine" + assert field.dimension == 1024 + assert req.deployment["deployment_type"] == "pod" + assert req.deployment["environment"] == "us-west1-gcp" + assert req.deployment["pod_type"] == "p1.x1" + # Note: metadata_config not yet supported in alpha deployment assert req.deletion_protection == "disabled" def test_parse_read_capacity_ondemand(self): @@ -199,7 +233,11 @@ def test_parse_read_capacity_ondemand(self): assert result.mode == "OnDemand" def test_parse_read_capacity_dedicated_with_manual(self): - """Test parsing Dedicated read capacity with manual scaling configuration.""" + """Test parsing Dedicated read capacity with manual scaling configuration. + + In alpha API, scaling is an object with strategy, replicas, and shards fields + instead of a string with separate manual object. + """ read_capacity = { "mode": "Dedicated", "dedicated": { @@ -214,69 +252,41 @@ def test_parse_read_capacity_dedicated_with_manual(self): ) ) assert result.mode == "Dedicated" - assert result.dedicated.node_type == "t1" - assert result.dedicated.scaling == "Manual" - assert result.dedicated.manual.shards == 2 - assert result.dedicated.manual.replicas == 3 - - def test_parse_read_capacity_dedicated_missing_manual(self): - """Test that missing manual configuration raises ValueError when scaling is Manual.""" + # In alpha API, node_type and scaling are top-level on ReadCapacityDedicatedSpec + assert result.node_type == "t1" + # scaling is now an object with strategy, replicas, shards + assert result.scaling.strategy == "Manual" + assert result.scaling.shards == 2 + assert result.scaling.replicas == 3 + + def test_parse_read_capacity_dedicated_with_defaults(self): + """Test that missing shards/replicas default to 1 in alpha API.""" read_capacity = {"mode": "Dedicated", "dedicated": {"node_type": "t1", "scaling": "Manual"}} - with pytest.raises(ValueError) as exc_info: + result = ( PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( read_capacity ) - assert "manual" in str(exc_info.value).lower() - assert "required" in str(exc_info.value).lower() - - def test_parse_read_capacity_dedicated_missing_shards(self): - """Test that missing shards in manual configuration raises ValueError.""" + ) + assert result.mode == "Dedicated" + assert result.node_type == "t1" + assert result.scaling.strategy == "Manual" + # Alpha API defaults shards and replicas to 1 + assert result.scaling.shards == 1 + assert result.scaling.replicas == 1 + + def test_parse_read_capacity_dedicated_partial_manual(self): + """Test that partial manual config uses defaults for missing values.""" read_capacity = { "mode": "Dedicated", "dedicated": {"node_type": "t1", "scaling": "Manual", "manual": {"replicas": 3}}, } - with pytest.raises(ValueError) as exc_info: - PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( - read_capacity - ) - assert "shards" in str(exc_info.value).lower() - - def test_parse_read_capacity_dedicated_missing_replicas(self): - """Test that missing replicas in manual configuration raises ValueError.""" - read_capacity = { - "mode": "Dedicated", - "dedicated": {"node_type": "t1", "scaling": "Manual", "manual": {"shards": 2}}, - } - with pytest.raises(ValueError) as exc_info: - PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( - read_capacity - ) - assert "replicas" in str(exc_info.value).lower() - - def test_parse_read_capacity_dedicated_missing_both_shards_and_replicas(self): - """Test that missing both shards and replicas raises appropriate error.""" - read_capacity = { - "mode": "Dedicated", - "dedicated": {"node_type": "t1", "scaling": "Manual", "manual": {}}, - } - with pytest.raises(ValueError) as exc_info: - PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( - read_capacity - ) - assert "shards" in str(exc_info.value).lower() - assert "replicas" in str(exc_info.value).lower() - - def test_parse_read_capacity_dedicated_invalid_manual_type(self): - """Test that invalid manual type (not a dict) raises ValueError.""" - read_capacity = { - "mode": "Dedicated", - "dedicated": {"node_type": "t1", "scaling": "Manual", "manual": "invalid"}, - } - with pytest.raises(ValueError) as exc_info: + result = ( PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( read_capacity ) - assert "dictionary" in str(exc_info.value).lower() + ) + assert result.scaling.replicas == 3 + assert result.scaling.shards == 1 # default def test_parse_read_capacity_dedicated_missing_node_type(self): """Test that missing node_type raises ValueError.""" @@ -287,14 +297,18 @@ def test_parse_read_capacity_dedicated_missing_node_type(self): ) assert "node_type" in str(exc_info.value).lower() - def test_parse_read_capacity_dedicated_missing_scaling(self): - """Test that missing scaling raises ValueError.""" + def test_parse_read_capacity_dedicated_default_scaling(self): + """Test that missing scaling defaults to Manual.""" read_capacity = {"mode": "Dedicated", "dedicated": {"node_type": "t1"}} - with pytest.raises(ValueError) as exc_info: + result = ( PineconeDBControlRequestFactory._PineconeDBControlRequestFactory__parse_read_capacity( read_capacity ) - assert "scaling" in str(exc_info.value).lower() + ) + assert result.mode == "Dedicated" + assert result.node_type == "t1" + # Alpha API defaults scaling strategy to "Manual" + assert result.scaling.strategy == "Manual" class TestTranslateLegacyRequest: diff --git a/tests/unit/models/test_index_list.py b/tests/unit/models/test_index_list.py index bdd9b2842..57334464c 100644 --- a/tests/unit/models/test_index_list.py +++ b/tests/unit/models/test_index_list.py @@ -5,45 +5,54 @@ IndexModel as OpenApiIndexModel, IndexModelStatus, ) +from pinecone.core.openapi.db_control.model.schema import Schema +from pinecone.core.openapi.db_control.model.schema_fields import SchemaFields +from pinecone.core.openapi.db_control.model.deployment import Deployment + + +def _create_test_schema(dimension=10): + """Create a test schema with a dense vector field.""" + return Schema( + fields={"_values": SchemaFields(type="dense_vector", dimension=dimension, metric="cosine")}, + _check_type=False, + ) + + +def _create_test_pod_deployment(): + """Create a test pod deployment.""" + return Deployment( + deployment_type="pod", + environment="us-west1-gcp", + pod_type="p1.x1", + pods=1, + replicas=1, + shards=1, + _check_type=False, + ) @pytest.fixture def index_list_response(): + """Fixture using alpha API structure with schema + deployment.""" return OpenApiIndexList( indexes=[ OpenApiIndexModel( name="test-index-1", - dimension=2, - metric="cosine", + schema=_create_test_schema(dimension=2), + deployment=_create_test_pod_deployment(), host="https://test-index-1.pinecone.io", - status=IndexModelStatus(ready=True, state="Ready"), + status=IndexModelStatus(ready=True, state="Ready", _check_type=False), deletion_protection="enabled", - spec={ - "pod": { - "environment": "us-west1-gcp", - "pod_type": "p1.x1", - "pods": 1, - "replicas": 1, - "shards": 1, - } - }, + _check_type=False, ), OpenApiIndexModel( name="test-index-2", - dimension=3, - metric="cosine", + schema=_create_test_schema(dimension=3), + deployment=_create_test_pod_deployment(), host="https://test-index-2.pinecone.io", - status=IndexModelStatus(ready=True, state="Ready"), + status=IndexModelStatus(ready=True, state="Ready", _check_type=False), deletion_protection="disabled", - spec={ - "pod": { - "environment": "us-west1-gcp", - "pod_type": "p1.x1", - "pods": 1, - "replicas": 1, - "shards": 1, - } - }, + _check_type=False, ), ], _check_type=False, @@ -57,8 +66,9 @@ def test_index_list_has_length(self, index_list_response): def test_index_list_is(self, index_list_response): iil = IndexList(index_list_response) assert [i["name"] for i in iil] == ["test-index-1", "test-index-2"] - assert [i["dimension"] for i in iil] == [2, 3] - assert [i["metric"] for i in iil] == ["cosine", "cosine"] + # dimension and metric are accessed through compatibility layer + assert [i.dimension for i in iil] == [2, 3] + assert [i.metric for i in iil] == ["cosine", "cosine"] def test_index_list_names_syntactic_sugar(self, index_list_response): iil = IndexList(index_list_response) @@ -66,15 +76,16 @@ def test_index_list_names_syntactic_sugar(self, index_list_response): def test_index_list_getitem(self, index_list_response): iil = IndexList(index_list_response) - input = index_list_response - assert input.indexes[0].name == iil[0].name - assert input.indexes[0].dimension == iil[0].dimension - assert input.indexes[0].metric == iil[0].metric - assert input.indexes[0].host == iil[0].host - assert input.indexes[0].deletion_protection == iil[0].deletion_protection + input_list = index_list_response + assert input_list.indexes[0].name == iil[0].name + # Access dimension/metric through compatibility layer + assert iil[0].dimension == 2 + assert iil[0].metric == "cosine" + assert input_list.indexes[0].host == iil[0].host + assert input_list.indexes[0].deletion_protection == iil[0].deletion_protection assert iil[0].deletion_protection == "enabled" - assert input.indexes[1].name == iil[1].name + assert input_list.indexes[1].name == iil[1].name def test_index_list_proxies_methods(self, index_list_response): # Forward compatibility, in case we add more attributes to IndexList for pagination diff --git a/tests/unit/openapi_support/test_api_client.py b/tests/unit/openapi_support/test_api_client.py index 35abc74bc..801279483 100644 --- a/tests/unit/openapi_support/test_api_client.py +++ b/tests/unit/openapi_support/test_api_client.py @@ -1,10 +1,28 @@ from pinecone.core.openapi.db_control.models import IndexModel, IndexModelStatus +from pinecone.core.openapi.db_control.model.schema import Schema +from pinecone.core.openapi.db_control.model.schema_fields import SchemaFields +from pinecone.core.openapi.db_control.model.deployment import Deployment from pinecone.core.openapi.db_data.models import VectorValues from pinecone.openapi_support.serializer import Serializer from pinecone.openapi_support.api_client_utils import parameters_to_tuples from datetime import date, datetime +def _create_test_schema(dimension=10, metric="cosine"): + """Create a test schema for IndexModel.""" + return Schema( + fields={"_values": SchemaFields(type="dense_vector", dimension=dimension, metric=metric)}, + _check_type=False, + ) + + +def _create_test_deployment(deployment_type="serverless"): + """Create a test deployment for IndexModel.""" + return Deployment( + deployment_type=deployment_type, cloud="aws", region="us-east-1", _check_type=False + ) + + class TestSanitization: def test_sanitize_for_serialization_returns_basic_types(self): # Return basic types without modification @@ -55,43 +73,38 @@ def test_sanitization_for_serialization_serializes_io(self): assert Serializer.sanitize_for_serialization(io.BytesIO(b"test")) == b"test" def test_sanitize_for_serialization_serializes_model_normal(self): + """Test that IndexModel with alpha API structure is properly serialized.""" + schema = _create_test_schema(dimension=10, metric="cosine") + deployment = _create_test_deployment() m = IndexModel( name="myindex", - dimension=10, - metric="cosine", + schema=schema, + deployment=deployment, host="localhost", - spec={}, - status=IndexModelStatus(ready=True, state="Ready"), - vector_type="dense", + status=IndexModelStatus(ready=True, state="Ready", _check_type=False), + _check_type=False, ) - assert Serializer.sanitize_for_serialization(m) == { - "name": "myindex", - "dimension": 10, - "metric": "cosine", - "host": "localhost", - "spec": {}, - "status": {"ready": True, "state": "Ready"}, - "vector_type": "dense", - } + result = Serializer.sanitize_for_serialization(m) + # Check key fields in the serialized output + assert result["name"] == "myindex" + assert result["host"] == "localhost" + assert result["status"] == {"ready": True, "state": "Ready"} + # In alpha API, schema and deployment are in the output + assert "schema" in result + assert "deployment" in result m2 = IndexModel( name="myindex2", - metric="cosine", + schema=_create_test_schema(dimension=5, metric="cosine"), + deployment=deployment, host="localhost", - spec={}, - status=IndexModelStatus(ready=True, state="Ready"), - vector_type="sparse", + status=IndexModelStatus(ready=True, state="Ready", _check_type=False), deletion_protection="enabled", + _check_type=False, ) - assert Serializer.sanitize_for_serialization(m2) == { - "name": "myindex2", - "metric": "cosine", - "host": "localhost", - "spec": {}, - "status": {"ready": True, "state": "Ready"}, - "vector_type": "sparse", - "deletion_protection": "enabled", - } + result2 = Serializer.sanitize_for_serialization(m2) + assert result2["name"] == "myindex2" + assert result2["deletion_protection"] == "enabled" def test_sanitize_for_serialization_serializes_model_simple(self): # ModelSimple is used to model named values which are not objects diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index a48544d87..f74beb664 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -12,56 +12,74 @@ PodType, ) from pinecone.core.openapi.db_control.models import IndexList, IndexModel, IndexModelStatus +from pinecone.core.openapi.db_control.model.schema import Schema +from pinecone.core.openapi.db_control.model.schema_fields import SchemaFields +from pinecone.core.openapi.db_control.model.deployment import Deployment from pinecone.utils import PluginAware import time +def _create_test_schema(): + """Create a test schema with a dense vector field.""" + return Schema( + fields={"_values": SchemaFields(type="dense_vector", dimension=10, metric="euclidean")}, + _check_type=False, + ) + + +def _create_test_deployment(): + """Create a test serverless deployment.""" + return Deployment( + deployment_type="serverless", cloud="aws", region="us-west-1", _check_type=False + ) + + def description_with_status(status: bool): state = "Ready" if status else "Initializing" return IndexModel( name="foo", - status=IndexModelStatus(ready=status, state=state), - dimension=10, - deletion_protection="enabled", + schema=_create_test_schema(), + deployment=_create_test_deployment(), host="https://foo.pinecone.io", - metric="euclidean", - spec={"serverless": {"cloud": "aws", "region": "us-west1"}}, + status=IndexModelStatus(ready=status, state=state, _check_type=False), + deletion_protection="enabled", + _check_type=False, ) @pytest.fixture def index_list_response(): + schema = _create_test_schema() + deployment = _create_test_deployment() + status = IndexModelStatus(ready=True, state="Ready", _check_type=False) return IndexList( indexes=[ IndexModel( name="index1", - dimension=10, - metric="euclidean", + schema=schema, + deployment=deployment, host="asdf.pinecone.io", - status={"ready": True}, - spec={}, + status=status, deletion_protection="enabled", _check_type=False, ), IndexModel( name="index2", - dimension=10, - metric="euclidean", + schema=schema, + deployment=deployment, host="asdf.pinecone.io", - status={"ready": True}, - spec={}, + status=status, deletion_protection="enabled", _check_type=False, ), IndexModel( name="index3", - dimension=10, - metric="euclidean", + schema=schema, + deployment=deployment, host="asdf.pinecone.io", - status={"ready": True}, - spec={}, + status=status, deletion_protection="disabled", _check_type=False, ), diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 6117d444a..f5137ab5f 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,10 +1,21 @@ import pandas as pd import pytest -from pinecone.db_data import _Index, _IndexAsyncio +from pinecone.db_data import _Index import pinecone.core.openapi.db_data.models as oai from pinecone import QueryResponse, UpsertResponse, Vector +# Optional asyncio support - may not be installed +# Check for aiohttp since _IndexAsyncio import succeeds but actual usage requires aiohttp +try: + import aiohttp # noqa: F401 + from pinecone.db_data import _IndexAsyncio + + HAS_ASYNCIO = True +except ImportError: + _IndexAsyncio = None # type: ignore + HAS_ASYNCIO = False + class TestRestIndex: def setup_method(self): @@ -633,6 +644,7 @@ def test_update_withDryRunAndAllParams_updateWithDryRunAndAllParams(self, mocker # region: asyncio update tests + @pytest.mark.skipif(not HAS_ASYNCIO, reason="asyncio dependencies not installed") @pytest.mark.asyncio async def test_asyncio_update_withDryRun_updateWithDryRun(self, mocker): """Test asyncio update with dry_run parameter.""" @@ -649,6 +661,7 @@ async def test_asyncio_update_withDryRun_updateWithDryRun(self, mocker): oai.UpdateRequest(filter=self.filter1, dry_run=True, namespace="ns") ) + @pytest.mark.skipif(not HAS_ASYNCIO, reason="asyncio dependencies not installed") @pytest.mark.asyncio async def test_asyncio_update_withDryRunAndSetMetadata_updateWithDryRunAndSetMetadata( self, mocker @@ -671,6 +684,7 @@ async def test_asyncio_update_withDryRunAndSetMetadata_updateWithDryRunAndSetMet ) ) + @pytest.mark.skipif(not HAS_ASYNCIO, reason="asyncio dependencies not installed") @pytest.mark.asyncio async def test_asyncio_update_withDryRunFalse_updateWithDryRunFalse(self, mocker): """Test asyncio update with dry_run=False.""" @@ -687,6 +701,7 @@ async def test_asyncio_update_withDryRunFalse_updateWithDryRunFalse(self, mocker oai.UpdateRequest(filter=self.filter1, dry_run=False, namespace="ns") ) + @pytest.mark.skipif(not HAS_ASYNCIO, reason="asyncio dependencies not installed") @pytest.mark.asyncio async def test_asyncio_update_withDryRunAndAllParams_updateWithDryRunAndAllParams(self, mocker): """Test asyncio update with dry_run and all parameters.""" From 104864bc5ac0b39aefeee305770d63f1ce92c00e Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Mon, 2 Feb 2026 15:36:07 -0500 Subject: [PATCH 2/2] refactor: address Bugbot feedback - Add type validation for 'manual' dict in __parse_read_capacity - Extract __schema_dict_to_openapi_schema helper to reduce duplication - Extract _parse_proto_schema_to_openapi helper in grpc/utils.py Co-authored-by: Cursor --- pinecone/db_control/request_factory.py | 49 ++++++++++--------- pinecone/grpc/utils.py | 68 ++++++++++++-------------- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index d1b2145e1..b25962484 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -121,6 +121,11 @@ def __parse_read_capacity( # Handle scaling configuration scaling_strategy = dedicated_dict.get("scaling", "Manual") manual_dict = dedicated_dict.get("manual", {}) + # Validate manual_dict type + if manual_dict and not isinstance(manual_dict, dict): + raise ValueError( + "The 'manual' field must be a dictionary with 'shards' and 'replicas' keys." + ) replicas = manual_dict.get("replicas", 1) if manual_dict else 1 shards = manual_dict.get("shards", 1) if manual_dict else 1 @@ -145,6 +150,24 @@ def __parse_read_capacity( # Already a ReadCapacity model instance return read_capacity + @staticmethod + def __schema_dict_to_openapi_schema(schema_dict: dict[str, Any]) -> OpenAPISchema: + """Convert a schema dict to an OpenAPI Schema object. + + :param schema_dict: Dict with 'fields' key containing field configurations. + :returns: OpenAPI Schema object. + """ + schema_fields: dict[str, OpenAPISchemaFields] = {} + for field_name, field_config in schema_dict.get("fields", {}).items(): + if isinstance(field_config, dict): + field_type = field_config.get("type", "string") + schema_fields[field_name] = OpenAPISchemaFields( + type=field_type, + **{k: v for k, v in field_config.items() if k != "type"}, + _check_type=False, + ) + return OpenAPISchema(fields=schema_fields, _check_type=False) + @staticmethod def __parse_schema( schema: ( @@ -509,18 +532,7 @@ def create_index_with_schema_request( deployment_dict = deployment.to_dict() schema_dict = PineconeDBControlRequestFactory._serialize_schema(schema) - - # Convert schema_dict to proper Schema OpenAPI object - schema_fields = {} - for field_name, field_config in schema_dict.get("fields", {}).items(): - if isinstance(field_config, dict): - field_type = field_config.get("type", "string") - schema_fields[field_name] = OpenAPISchemaFields( - type=field_type, - **{k: v for k, v in field_config.items() if k != "type"}, - _check_type=False, - ) - schema_obj = OpenAPISchema(fields=schema_fields, _check_type=False) + schema_obj = PineconeDBControlRequestFactory.__schema_dict_to_openapi_schema(schema_dict) args = parse_non_empty_args( [ @@ -565,18 +577,7 @@ def create_index_request( deployment_dict, schema_dict = PineconeDBControlRequestFactory._translate_legacy_request( spec=spec, dimension=dimension, metric=metric, vector_type=vector_type ) - - # Convert schema_dict to proper Schema OpenAPI object - schema_fields = {} - for field_name, field_config in schema_dict.get("fields", {}).items(): - if isinstance(field_config, dict): - field_type = field_config.get("type", "string") - schema_fields[field_name] = OpenAPISchemaFields( - type=field_type, - **{k: v for k, v in field_config.items() if k != "type"}, - _check_type=False, - ) - schema_obj = OpenAPISchema(fields=schema_fields, _check_type=False) + schema_obj = PineconeDBControlRequestFactory.__schema_dict_to_openapi_schema(schema_dict) # Deployment dict is passed directly - OpenAPI model accepts dicts with _check_type=False args = parse_non_empty_args( diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index 8deb15206..79d7171fa 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -482,6 +482,35 @@ def parse_stats_response( return cast(DescribeIndexStatsResponse, result) +def _parse_proto_schema_to_openapi(proto_schema: Any) -> CreateNamespaceRequestSchema | None: + """Convert a proto schema to an OpenAPI CreateNamespaceRequestSchema model. + + :param proto_schema: A protobuf schema object with a fields attribute. + :returns: OpenAPI schema model or None if no fields present. + """ + from pinecone.core.openapi.db_data.model.create_namespace_request_schema_fields import ( + CreateNamespaceRequestSchemaFields, + ) + + if not proto_schema or not proto_schema.fields: + return None + + fields_dict = {} + for field_name, field_config in proto_schema.fields.items(): + fields_dict[field_name] = {"type": getattr(field_config, "type", "string")} + + if not fields_dict: + return None + + return CreateNamespaceRequestSchema( + fields={ + k: CreateNamespaceRequestSchemaFields(type=v.get("type", "string"), _check_type=False) + for k, v in fields_dict.items() + }, + _check_type=False, + ) + + def parse_namespace_description( response: "ProtoNamespaceDescription", initial_metadata: dict[str, str] | None = None ) -> NamespaceDescription: @@ -498,25 +527,7 @@ def parse_namespace_description( # Extract schema if present (replaces indexed_fields in alpha API) schema = None if response.HasField("schema") and response.schema: - # Convert proto schema to OpenAPI model - fields_dict = {} - if response.schema.fields: - for field_name, field_config in response.schema.fields.items(): - fields_dict[field_name] = {"type": getattr(field_config, "type", "string")} - if fields_dict: - from pinecone.core.openapi.db_data.model.create_namespace_request_schema_fields import ( - CreateNamespaceRequestSchemaFields, - ) - - schema = CreateNamespaceRequestSchema( - fields={ - k: CreateNamespaceRequestSchemaFields( - type=v.get("type", "string"), _check_type=False - ) - for k, v in fields_dict.items() - }, - _check_type=False, - ) + schema = _parse_proto_schema_to_openapi(response.schema) namespace_desc = NamespaceDescription( name=name, record_count=record_count, schema=schema, _check_type=False @@ -539,10 +550,6 @@ def parse_list_namespaces_response( This optimized version directly accesses protobuf fields for better performance. """ - from pinecone.core.openapi.db_data.model.create_namespace_request_schema_fields import ( - CreateNamespaceRequestSchemaFields, - ) - # Directly iterate over namespaces # Pre-allocate namespaces list with known size for better performance namespaces_proto = response.namespaces @@ -553,20 +560,7 @@ def parse_list_namespaces_response( # Extract schema if present (replaces indexed_fields in alpha API) schema = None if ns.HasField("schema") and ns.schema: - fields_dict = {} - if ns.schema.fields: - for field_name, field_config in ns.schema.fields.items(): - fields_dict[field_name] = {"type": getattr(field_config, "type", "string")} - if fields_dict: - schema = CreateNamespaceRequestSchema( - fields={ - k: CreateNamespaceRequestSchemaFields( - type=v.get("type", "string"), _check_type=False - ) - for k, v in fields_dict.items() - }, - _check_type=False, - ) + schema = _parse_proto_schema_to_openapi(ns.schema) namespaces[idx] = NamespaceDescription( name=ns.name, record_count=ns.record_count, schema=schema, _check_type=False