From 4925446fdb7dc5a2993b0c718a3576faaa60724f Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 13 Mar 2026 12:04:00 +0000 Subject: [PATCH 01/99] Refactor hybrid queries to use `alpha_param` and remove `0.7` default in favour of `None` --- weaviate/collections/grpc/shared.py | 7 ++- .../queries/hybrid/generate/async_.pyi | 26 ++++---- .../queries/hybrid/generate/executor.py | 28 ++++----- .../queries/hybrid/generate/sync.pyi | 26 ++++---- .../queries/hybrid/query/async_.pyi | 26 ++++---- .../queries/hybrid/query/executor.py | 28 ++++----- .../collections/queries/hybrid/query/sync.pyi | 26 ++++---- weaviate/proto/v1/regen.sh | 2 +- weaviate/proto/v1/v4216/v1/base_search_pb2.py | 62 ++++++++++--------- .../proto/v1/v4216/v1/base_search_pb2.pyi | 6 +- weaviate/proto/v1/v5261/v1/base_search_pb2.py | 62 ++++++++++--------- .../proto/v1/v5261/v1/base_search_pb2.pyi | 6 +- weaviate/proto/v1/v6300/v1/base_search_pb2.py | 62 ++++++++++--------- .../proto/v1/v6300/v1/base_search_pb2.pyi | 6 +- 14 files changed, 195 insertions(+), 178 deletions(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 593811fb3..9a8949451 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -690,7 +690,12 @@ def _parse_hybrid( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=float(alpha) if alpha is not None else None, + alpha=None + if self._weaviate_version.is_at_least(1, 36, 0) + else (alpha) + if alpha is not None + else None, + alpha_param=alpha if self._weaviate_version.is_at_least(1, 36, 0) else None, fusion_type=( cast( base_search_pb2.Hybrid.FusionType, diff --git a/weaviate/collections/queries/hybrid/generate/async_.pyi b/weaviate/collections/queries/hybrid/generate/async_.pyi index a408a9696..182892fb9 100644 --- a/weaviate/collections/queries/hybrid/generate/async_.pyi +++ b/weaviate/collections/queries/hybrid/generate/async_.pyi @@ -42,7 +42,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -69,7 +69,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -96,7 +96,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -123,7 +123,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -150,7 +150,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -177,7 +177,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -204,7 +204,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -231,7 +231,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -258,7 +258,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -285,7 +285,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -312,7 +312,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -339,7 +339,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -366,7 +366,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/generate/executor.py b/weaviate/collections/queries/hybrid/generate/executor.py index 9b6762a2b..408e434b1 100644 --- a/weaviate/collections/queries/hybrid/generate/executor.py +++ b/weaviate/collections/queries/hybrid/generate/executor.py @@ -53,7 +53,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -81,7 +81,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -109,7 +109,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -137,7 +137,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -165,7 +165,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -193,7 +193,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -223,7 +223,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -251,7 +251,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -279,7 +279,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -307,7 +307,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -335,7 +335,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -363,7 +363,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -392,7 +392,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -421,7 +421,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/generate/sync.pyi b/weaviate/collections/queries/hybrid/generate/sync.pyi index d2befe438..fc430f8f3 100644 --- a/weaviate/collections/queries/hybrid/generate/sync.pyi +++ b/weaviate/collections/queries/hybrid/generate/sync.pyi @@ -41,7 +41,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -68,7 +68,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -95,7 +95,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -122,7 +122,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -176,7 +176,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -203,7 +203,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -230,7 +230,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -257,7 +257,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -284,7 +284,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -311,7 +311,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -338,7 +338,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -365,7 +365,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/async_.pyi b/weaviate/collections/queries/hybrid/query/async_.pyi index 96628ea81..855da7fc5 100644 --- a/weaviate/collections/queries/hybrid/query/async_.pyi +++ b/weaviate/collections/queries/hybrid/query/async_.pyi @@ -34,7 +34,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -57,7 +57,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -80,7 +80,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -103,7 +103,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -126,7 +126,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -172,7 +172,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -195,7 +195,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -218,7 +218,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -241,7 +241,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -264,7 +264,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -310,7 +310,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/executor.py b/weaviate/collections/queries/hybrid/query/executor.py index 7ed027abb..4c618f961 100644 --- a/weaviate/collections/queries/hybrid/query/executor.py +++ b/weaviate/collections/queries/hybrid/query/executor.py @@ -45,7 +45,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -69,7 +69,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -93,7 +93,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -117,7 +117,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -141,7 +141,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -165,7 +165,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -191,7 +191,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -215,7 +215,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -239,7 +239,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -263,7 +263,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -311,7 +311,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -336,7 +336,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -361,7 +361,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/sync.pyi b/weaviate/collections/queries/hybrid/query/sync.pyi index fcf5d559b..5d40db939 100644 --- a/weaviate/collections/queries/hybrid/query/sync.pyi +++ b/weaviate/collections/queries/hybrid/query/sync.pyi @@ -34,7 +34,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -57,7 +57,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -80,7 +80,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -103,7 +103,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -126,7 +126,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -172,7 +172,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -195,7 +195,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -218,7 +218,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -241,7 +241,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -264,7 +264,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -310,7 +310,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/proto/v1/regen.sh b/weaviate/proto/v1/regen.sh index 4fcf2cb16..56b5391aa 100755 --- a/weaviate/proto/v1/regen.sh +++ b/weaviate/proto/v1/regen.sh @@ -9,7 +9,7 @@ SCRIPT_DIR="${0%/*}" cd "$SCRIPT_DIR" PROJECT_ROOT=$(pwd) # Get weaviate dir from arg or by navigating up from script location -WEAVIATE_DIR="${1:-../../../../weaviate}" +WEAVIATE_DIR="${1:-../../../../core/weaviate}" echo "Weaviate directory: $WEAVIATE_DIR" echo "Project root: $PROJECT_ROOT" diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.py b/weaviate/proto/v1/v4216/v1/base_search_pb2.py index 5767fdb57..f263cb10c 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.py @@ -14,7 +14,7 @@ from weaviate.proto.v1.v4216.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -26,6 +26,8 @@ _VECTORFORTARGET.fields_by_name['vector_bytes']._serialized_options = b'\030\001' _HYBRID.fields_by_name['vector']._options = None _HYBRID.fields_by_name['vector']._serialized_options = b'\030\001' + _HYBRID.fields_by_name['alpha']._options = None + _HYBRID.fields_by_name['alpha']._serialized_options = b'\030\001' _HYBRID.fields_by_name['vector_bytes']._options = None _HYBRID.fields_by_name['vector_bytes']._serialized_options = b'\030\001' _HYBRID.fields_by_name['target_vectors']._options = None @@ -56,8 +58,8 @@ _NEARTHERMALSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' _NEARIMUSEARCH.fields_by_name['target_vectors']._options = None _NEARIMUSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3383 + _globals['_COMBINATIONMETHOD']._serialized_end=3621 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -69,31 +71,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1224 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 + _globals['_NEARVECTOR']._serialized_start=1227 + _globals['_NEARVECTOR']._serialized_end=1656 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 + _globals['_NEAROBJECT']._serialized_start=1659 + _globals['_NEAROBJECT']._serialized_end=1824 + _globals['_NEARTEXTSEARCH']._serialized_start=1827 + _globals['_NEARTEXTSEARCH']._serialized_end=2195 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 + _globals['_NEARIMAGESEARCH']._serialized_start=2198 + _globals['_NEARIMAGESEARCH']._serialized_end=2371 + _globals['_NEARAUDIOSEARCH']._serialized_start=2374 + _globals['_NEARAUDIOSEARCH']._serialized_end=2547 + _globals['_NEARVIDEOSEARCH']._serialized_start=2550 + _globals['_NEARVIDEOSEARCH']._serialized_end=2723 + _globals['_NEARDEPTHSEARCH']._serialized_start=2726 + _globals['_NEARDEPTHSEARCH']._serialized_end=2899 + _globals['_NEARTHERMALSEARCH']._serialized_start=2902 + _globals['_NEARTHERMALSEARCH']._serialized_end=3079 + _globals['_NEARIMUSEARCH']._serialized_start=3082 + _globals['_NEARIMUSEARCH']._serialized_end=3251 + _globals['_BM25']._serialized_start=3253 + _globals['_BM25']._serialized_end=3380 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi index bac1b47b8..ab8a08f38 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors"] + __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors"] class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -87,6 +87,7 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -100,9 +101,10 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ["vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors"] diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.py b/weaviate/proto/v1/v5261/v1/base_search_pb2.py index cde241b1e..cae3e4745 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.py @@ -15,7 +15,7 @@ from weaviate.proto.v1.v5261.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -27,6 +27,8 @@ _globals['_VECTORFORTARGET'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector']._serialized_options = b'\030\001' + _globals['_HYBRID'].fields_by_name['alpha']._loaded_options = None + _globals['_HYBRID'].fields_by_name['alpha']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector_bytes']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['target_vectors']._loaded_options = None @@ -57,8 +59,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3383 + _globals['_COMBINATIONMETHOD']._serialized_end=3621 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -70,31 +72,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1224 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 + _globals['_NEARVECTOR']._serialized_start=1227 + _globals['_NEARVECTOR']._serialized_end=1656 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 + _globals['_NEAROBJECT']._serialized_start=1659 + _globals['_NEAROBJECT']._serialized_end=1824 + _globals['_NEARTEXTSEARCH']._serialized_start=1827 + _globals['_NEARTEXTSEARCH']._serialized_end=2195 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 + _globals['_NEARIMAGESEARCH']._serialized_start=2198 + _globals['_NEARIMAGESEARCH']._serialized_end=2371 + _globals['_NEARAUDIOSEARCH']._serialized_start=2374 + _globals['_NEARAUDIOSEARCH']._serialized_end=2547 + _globals['_NEARVIDEOSEARCH']._serialized_start=2550 + _globals['_NEARVIDEOSEARCH']._serialized_end=2723 + _globals['_NEARDEPTHSEARCH']._serialized_start=2726 + _globals['_NEARDEPTHSEARCH']._serialized_end=2899 + _globals['_NEARTHERMALSEARCH']._serialized_start=2902 + _globals['_NEARTHERMALSEARCH']._serialized_end=3079 + _globals['_NEARIMUSEARCH']._serialized_start=3082 + _globals['_NEARIMUSEARCH']._serialized_end=3251 + _globals['_BM25']._serialized_start=3253 + _globals['_BM25']._serialized_end=3380 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi index 5f1871ac7..ca6f234bc 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -87,6 +87,7 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -100,9 +101,10 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.py b/weaviate/proto/v1/v6300/v1/base_search_pb2.py index cbf099302..bc7884f55 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.py @@ -25,7 +25,7 @@ from weaviate.proto.v1.v6300.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -37,6 +37,8 @@ _globals['_VECTORFORTARGET'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector']._serialized_options = b'\030\001' + _globals['_HYBRID'].fields_by_name['alpha']._loaded_options = None + _globals['_HYBRID'].fields_by_name['alpha']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector_bytes']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['target_vectors']._loaded_options = None @@ -67,8 +69,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3383 + _globals['_COMBINATIONMETHOD']._serialized_end=3621 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -80,31 +82,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1224 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 + _globals['_NEARVECTOR']._serialized_start=1227 + _globals['_NEARVECTOR']._serialized_end=1656 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 + _globals['_NEAROBJECT']._serialized_start=1659 + _globals['_NEAROBJECT']._serialized_end=1824 + _globals['_NEARTEXTSEARCH']._serialized_start=1827 + _globals['_NEARTEXTSEARCH']._serialized_end=2195 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 + _globals['_NEARIMAGESEARCH']._serialized_start=2198 + _globals['_NEARIMAGESEARCH']._serialized_end=2371 + _globals['_NEARAUDIOSEARCH']._serialized_start=2374 + _globals['_NEARAUDIOSEARCH']._serialized_end=2547 + _globals['_NEARVIDEOSEARCH']._serialized_start=2550 + _globals['_NEARVIDEOSEARCH']._serialized_end=2723 + _globals['_NEARDEPTHSEARCH']._serialized_start=2726 + _globals['_NEARDEPTHSEARCH']._serialized_end=2899 + _globals['_NEARTHERMALSEARCH']._serialized_start=2902 + _globals['_NEARTHERMALSEARCH']._serialized_end=3079 + _globals['_NEARIMUSEARCH']._serialized_start=3082 + _globals['_NEARIMUSEARCH']._serialized_end=3251 + _globals['_BM25']._serialized_start=3253 + _globals['_BM25']._serialized_end=3380 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi index 80abcb05d..9ad165a42 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi @@ -68,7 +68,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -88,6 +88,7 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -101,9 +102,10 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") From 1899ffdb5959a50e69ae7fe1afcac87ab72654b8 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:12:49 +0000 Subject: [PATCH 02/99] Remove mistakenly commited local change to `regen.sh` --- weaviate/proto/v1/regen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/proto/v1/regen.sh b/weaviate/proto/v1/regen.sh index 56b5391aa..4fcf2cb16 100755 --- a/weaviate/proto/v1/regen.sh +++ b/weaviate/proto/v1/regen.sh @@ -9,7 +9,7 @@ SCRIPT_DIR="${0%/*}" cd "$SCRIPT_DIR" PROJECT_ROOT=$(pwd) # Get weaviate dir from arg or by navigating up from script location -WEAVIATE_DIR="${1:-../../../../core/weaviate}" +WEAVIATE_DIR="${1:-../../../../weaviate}" echo "Weaviate directory: $WEAVIATE_DIR" echo "Project root: $PROJECT_ROOT" From c057270f76b495bafbc33ffe601452b4f482a2e0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:13:51 +0000 Subject: [PATCH 03/99] Update logic to use new proto message --- weaviate/collections/grpc/shared.py | 1 + weaviate/proto/v1/v4216/v1/base_search_pb2.py | 60 +++++++++---------- .../proto/v1/v4216/v1/base_search_pb2.pyi | 6 +- weaviate/proto/v1/v5261/v1/base_search_pb2.py | 60 +++++++++---------- .../proto/v1/v5261/v1/base_search_pb2.pyi | 6 +- weaviate/proto/v1/v6300/v1/base_search_pb2.py | 60 +++++++++---------- .../proto/v1/v6300/v1/base_search_pb2.pyi | 6 +- 7 files changed, 103 insertions(+), 96 deletions(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 9a8949451..92af251c5 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -696,6 +696,7 @@ def _parse_hybrid( if alpha is not None else None, alpha_param=alpha if self._weaviate_version.is_at_least(1, 36, 0) else None, + use_alpha_param=self._weaviate_version.is_at_least(1, 36, 0), fusion_type=( cast( base_search_pb2.Hybrid.FusionType, diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.py b/weaviate/proto/v1/v4216/v1/base_search_pb2.py index f263cb10c..3abd15ebf 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.py @@ -14,7 +14,7 @@ from weaviate.proto.v1.v4216.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -58,8 +58,8 @@ _NEARTHERMALSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' _NEARIMUSEARCH.fields_by_name['target_vectors']._options = None _NEARIMUSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3383 - _globals['_COMBINATIONMETHOD']._serialized_end=3621 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -71,31 +71,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1224 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 - _globals['_NEARVECTOR']._serialized_start=1227 - _globals['_NEARVECTOR']._serialized_end=1656 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 - _globals['_NEAROBJECT']._serialized_start=1659 - _globals['_NEAROBJECT']._serialized_end=1824 - _globals['_NEARTEXTSEARCH']._serialized_start=1827 - _globals['_NEARTEXTSEARCH']._serialized_end=2195 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 - _globals['_NEARIMAGESEARCH']._serialized_start=2198 - _globals['_NEARIMAGESEARCH']._serialized_end=2371 - _globals['_NEARAUDIOSEARCH']._serialized_start=2374 - _globals['_NEARAUDIOSEARCH']._serialized_end=2547 - _globals['_NEARVIDEOSEARCH']._serialized_start=2550 - _globals['_NEARVIDEOSEARCH']._serialized_end=2723 - _globals['_NEARDEPTHSEARCH']._serialized_start=2726 - _globals['_NEARDEPTHSEARCH']._serialized_end=2899 - _globals['_NEARTHERMALSEARCH']._serialized_start=2902 - _globals['_NEARTHERMALSEARCH']._serialized_end=3079 - _globals['_NEARIMUSEARCH']._serialized_start=3082 - _globals['_NEARIMUSEARCH']._serialized_end=3251 - _globals['_BM25']._serialized_start=3253 - _globals['_BM25']._serialized_end=3380 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi index ab8a08f38..440631c88 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors"] + __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors"] class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -88,6 +88,7 @@ class Hybrid(_message.Message): TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -102,9 +103,10 @@ class Hybrid(_message.Message): targets: Targets bm25_search_operator: SearchOperatorOptions alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ["vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors"] diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.py b/weaviate/proto/v1/v5261/v1/base_search_pb2.py index cae3e4745..2658b6cc5 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.py @@ -15,7 +15,7 @@ from weaviate.proto.v1.v5261.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -59,8 +59,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3383 - _globals['_COMBINATIONMETHOD']._serialized_end=3621 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -72,31 +72,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1224 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 - _globals['_NEARVECTOR']._serialized_start=1227 - _globals['_NEARVECTOR']._serialized_end=1656 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 - _globals['_NEAROBJECT']._serialized_start=1659 - _globals['_NEAROBJECT']._serialized_end=1824 - _globals['_NEARTEXTSEARCH']._serialized_start=1827 - _globals['_NEARTEXTSEARCH']._serialized_end=2195 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 - _globals['_NEARIMAGESEARCH']._serialized_start=2198 - _globals['_NEARIMAGESEARCH']._serialized_end=2371 - _globals['_NEARAUDIOSEARCH']._serialized_start=2374 - _globals['_NEARAUDIOSEARCH']._serialized_end=2547 - _globals['_NEARVIDEOSEARCH']._serialized_start=2550 - _globals['_NEARVIDEOSEARCH']._serialized_end=2723 - _globals['_NEARDEPTHSEARCH']._serialized_start=2726 - _globals['_NEARDEPTHSEARCH']._serialized_end=2899 - _globals['_NEARTHERMALSEARCH']._serialized_start=2902 - _globals['_NEARTHERMALSEARCH']._serialized_end=3079 - _globals['_NEARIMUSEARCH']._serialized_start=3082 - _globals['_NEARIMUSEARCH']._serialized_end=3251 - _globals['_BM25']._serialized_start=3253 - _globals['_BM25']._serialized_end=3380 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi index ca6f234bc..b89f04bfe 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -88,6 +88,7 @@ class Hybrid(_message.Message): TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -102,9 +103,10 @@ class Hybrid(_message.Message): targets: Targets bm25_search_operator: SearchOperatorOptions alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.py b/weaviate/proto/v1/v6300/v1/base_search_pb2.py index bc7884f55..9a19ea0ec 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.py @@ -25,7 +25,7 @@ from weaviate.proto.v1.v6300.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xfe\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -69,8 +69,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3383 - _globals['_COMBINATIONMETHOD']._serialized_end=3621 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -82,31 +82,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1224 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1073 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1170 - _globals['_NEARVECTOR']._serialized_start=1227 - _globals['_NEARVECTOR']._serialized_end=1656 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1575 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1629 - _globals['_NEAROBJECT']._serialized_start=1659 - _globals['_NEAROBJECT']._serialized_end=1824 - _globals['_NEARTEXTSEARCH']._serialized_start=1827 - _globals['_NEARTEXTSEARCH']._serialized_end=2195 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2088 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2142 - _globals['_NEARIMAGESEARCH']._serialized_start=2198 - _globals['_NEARIMAGESEARCH']._serialized_end=2371 - _globals['_NEARAUDIOSEARCH']._serialized_start=2374 - _globals['_NEARAUDIOSEARCH']._serialized_end=2547 - _globals['_NEARVIDEOSEARCH']._serialized_start=2550 - _globals['_NEARVIDEOSEARCH']._serialized_end=2723 - _globals['_NEARDEPTHSEARCH']._serialized_start=2726 - _globals['_NEARDEPTHSEARCH']._serialized_end=2899 - _globals['_NEARTHERMALSEARCH']._serialized_start=2902 - _globals['_NEARTHERMALSEARCH']._serialized_end=3079 - _globals['_NEARIMUSEARCH']._serialized_start=3082 - _globals['_NEARIMUSEARCH']._serialized_end=3251 - _globals['_BM25']._serialized_start=3253 - _globals['_BM25']._serialized_end=3380 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi index 9ad165a42..67a53e25a 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi @@ -68,7 +68,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -89,6 +89,7 @@ class Hybrid(_message.Message): TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -103,9 +104,10 @@ class Hybrid(_message.Message): targets: Targets bm25_search_operator: SearchOperatorOptions alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") From 8e5d54d7cd09fc20bc4f1f87b7678b1cdafaa05c Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:14:23 +0000 Subject: [PATCH 04/99] Change formatting --- weaviate/collections/grpc/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 92af251c5..c5131c51e 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -692,7 +692,7 @@ def _parse_hybrid( query=query, alpha=None if self._weaviate_version.is_at_least(1, 36, 0) - else (alpha) + else alpha if alpha is not None else None, alpha_param=alpha if self._weaviate_version.is_at_least(1, 36, 0) else None, From 367ce7965ad5785f54ac647dc16a23fe5c4894cb Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:15:54 +0000 Subject: [PATCH 05/99] Tidy version check code --- weaviate/collections/grpc/shared.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index c5131c51e..ff538020c 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -686,17 +686,14 @@ def _parse_hybrid( else: vector_bytes = vector_bytes_tmp + is_1_36 = self._weaviate_version.is_at_least(1, 36, 0) return ( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=None - if self._weaviate_version.is_at_least(1, 36, 0) - else alpha - if alpha is not None - else None, - alpha_param=alpha if self._weaviate_version.is_at_least(1, 36, 0) else None, - use_alpha_param=self._weaviate_version.is_at_least(1, 36, 0), + alpha=None if is_1_36 else (alpha if alpha is not None else None), + alpha_param=alpha if is_1_36 else None, + use_alpha_param=is_1_36, fusion_type=( cast( base_search_pb2.Hybrid.FusionType, From 81414a11736c60f532fee516e9ad7a4999092265 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:17:52 +0000 Subject: [PATCH 06/99] Parse correct default for BC if server < 1.36 --- weaviate/collections/grpc/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index ff538020c..6a08e9406 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -691,7 +691,7 @@ def _parse_hybrid( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=None if is_1_36 else (alpha if alpha is not None else None), + alpha=None if is_1_36 else (alpha if alpha is not None else 0.7), alpha_param=alpha if is_1_36 else None, use_alpha_param=is_1_36, fusion_type=( From a13da2373b9f26c5bb4bb9d136e5c53a81df1251 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 12:19:47 +0000 Subject: [PATCH 07/99] Update CI image --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 1fa7afb20..1edbe48cf 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.5 WEAVIATE_135: 1.35.0 WEAVIATE_136: 1.36.0 - WEAVIATE_137: 1.37.0-dev-29d5c87.amd64 + WEAVIATE_137: 1.37.0-dev-8ff93a2.amd64 jobs: From 7abe7f534e5976c28c6fc489111d2080489612e0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 15:00:29 +0000 Subject: [PATCH 08/99] Fix wrong version comparison --- weaviate/collections/grpc/shared.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 6a08e9406..cc3be7ec1 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -686,14 +686,14 @@ def _parse_hybrid( else: vector_bytes = vector_bytes_tmp - is_1_36 = self._weaviate_version.is_at_least(1, 36, 0) + is_1_37 = self._weaviate_version.is_at_least(1, 37, 0) return ( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=None if is_1_36 else (alpha if alpha is not None else 0.7), - alpha_param=alpha if is_1_36 else None, - use_alpha_param=is_1_36, + alpha=None if is_1_37 else (alpha if alpha is not None else 0.7), + alpha_param=alpha if is_1_37 else None, + use_alpha_param=is_1_37, fusion_type=( cast( base_search_pb2.Hybrid.FusionType, From b5f5e559e73dbefcc5b9487aa045ade07eee0062 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 15:12:09 +0000 Subject: [PATCH 09/99] Fix typo in ci --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 1edbe48cf..cef92023c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -310,7 +310,7 @@ jobs: $WEAVIATE_133, $WEAVIATE_134, $WEAVIATE_135, - $WEAVIATE_136 + $WEAVIATE_136, $WEAVIATE_137 ] steps: From 47841d4ed251bfd6b35da5a4030993d697a2598d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 16:13:15 +0000 Subject: [PATCH 10/99] Update CI image --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index cef92023c..43253fbb4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.5 WEAVIATE_135: 1.35.0 WEAVIATE_136: 1.36.0 - WEAVIATE_137: 1.37.0-dev-8ff93a2.amd64 + WEAVIATE_137: 1.37.0-dev-4e2d51d.amd64 jobs: From e305b334a2e9b05908801d79c31c1a11f68785be Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 16 Mar 2026 16:14:59 +0000 Subject: [PATCH 11/99] Remove client-side default from aggregate queries --- weaviate/collections/aggregations/hybrid/async_.pyi | 6 +++--- weaviate/collections/aggregations/hybrid/executor.py | 8 ++++---- weaviate/collections/aggregations/hybrid/sync.pyi | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/weaviate/collections/aggregations/hybrid/async_.pyi b/weaviate/collections/aggregations/hybrid/async_.pyi index 8f217a5d4..f1e4b72ed 100644 --- a/weaviate/collections/aggregations/hybrid/async_.pyi +++ b/weaviate/collections/aggregations/hybrid/async_.pyi @@ -19,7 +19,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -36,7 +36,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -53,7 +53,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, diff --git a/weaviate/collections/aggregations/hybrid/executor.py b/weaviate/collections/aggregations/hybrid/executor.py index fae005c9c..c2c87a68f 100644 --- a/weaviate/collections/aggregations/hybrid/executor.py +++ b/weaviate/collections/aggregations/hybrid/executor.py @@ -22,7 +22,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -40,7 +40,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -58,7 +58,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -75,7 +75,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, diff --git a/weaviate/collections/aggregations/hybrid/sync.pyi b/weaviate/collections/aggregations/hybrid/sync.pyi index 81d632f2e..8656fb319 100644 --- a/weaviate/collections/aggregations/hybrid/sync.pyi +++ b/weaviate/collections/aggregations/hybrid/sync.pyi @@ -19,7 +19,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -36,7 +36,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -53,7 +53,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, From 8520a1c4f418f767732404fc4c6c44e9eef722df Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 11:58:41 +0000 Subject: [PATCH 12/99] Update ver check and CI tags --- .github/workflows/main.yaml | 15 +++++++-------- weaviate/collections/grpc/shared.py | 10 ++++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 43253fbb4..d884891c0 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -22,14 +22,13 @@ env: WEAVIATE_128: 1.28.16 WEAVIATE_129: 1.29.11 WEAVIATE_130: 1.30.22 - WEAVIATE_131: 1.31.20 - WEAVIATE_132: 1.32.23 - WEAVIATE_133: 1.33.10 - WEAVIATE_134: 1.34.5 - WEAVIATE_135: 1.35.0 - WEAVIATE_136: 1.36.0 - WEAVIATE_137: 1.37.0-dev-4e2d51d.amd64 - + WEAVIATE_131: 1.31.22 + WEAVIATE_132: 1.32.27 + WEAVIATE_133: 1.33.18 + WEAVIATE_134: 1.34.19 + WEAVIATE_135: 1.35.15 + WEAVIATE_136: 1.36.6-1abf310 + WEAVIATE_137: 1.37.0-dev-7eb294e jobs: lint-and-format: diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index cc3be7ec1..714db67cb 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -686,14 +686,16 @@ def _parse_hybrid( else: vector_bytes = vector_bytes_tmp - is_1_37 = self._weaviate_version.is_at_least(1, 37, 0) + use_alpha_param = self._weaviate_version.is_at_least( + 1, 36, 6 + ) # TODO: change to 1.36.7 once it's released return ( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=None if is_1_37 else (alpha if alpha is not None else 0.7), - alpha_param=alpha if is_1_37 else None, - use_alpha_param=is_1_37, + alpha=None if use_alpha_param else (alpha if alpha is not None else 0.7), + alpha_param=alpha if use_alpha_param else None, + use_alpha_param=use_alpha_param, fusion_type=( cast( base_search_pb2.Hybrid.FusionType, From ed373755688b09d105a932d15326993ac54059f0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 12:30:15 +0000 Subject: [PATCH 13/99] Remove test of lazy loading shards --- integration/test_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/integration/test_client.py b/integration/test_client.py index 23ece9885..61d3eb64a 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -383,10 +383,7 @@ def test_client_cluster_without_lazy_shard_loading( ] assert nodes[0].shards[0].vector_queue_length == 0 assert nodes[0].shards[0].compressed is False - if collection._connection._weaviate_version.is_lower_than(1, 25, 0): - assert nodes[0].shards[0].loaded is True - else: - assert nodes[0].shards[0].loaded is False + assert nodes[0].shards[0].loaded is True finally: client.collections.delete(request.node.name) From 14339851c0c5fa9cd3a173369aeb65ccc3ca37f0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 13:01:58 +0000 Subject: [PATCH 14/99] Refactor client test for new server lazy shard loading --- integration/test_client.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/integration/test_client.py b/integration/test_client.py index 61d3eb64a..4166cb726 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -355,7 +355,10 @@ def test_client_cluster_with_lazy_shard_loading( ] assert nodes[0].shards[0].vector_queue_length == 0 assert nodes[0].shards[0].compressed is False - assert nodes[0].shards[0].loaded is True + if collection._connection._weaviate_version.is_lower_than(1, 36, 0): + assert nodes[0].shards[0].loaded is False + else: + assert nodes[0].shards[0].loaded is True finally: client.collections.delete(request.node.name) @@ -383,7 +386,10 @@ def test_client_cluster_without_lazy_shard_loading( ] assert nodes[0].shards[0].vector_queue_length == 0 assert nodes[0].shards[0].compressed is False - assert nodes[0].shards[0].loaded is True + if collection._connection._weaviate_version.is_lower_than(1, 25, 0): + assert nodes[0].shards[0].loaded is True + else: + assert nodes[0].shards[0].loaded is False finally: client.collections.delete(request.node.name) From dd6c835f9d4f66409e3010840da76945ddc58e7f Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 13:18:16 +0000 Subject: [PATCH 15/99] Debug failing ci test --- integration/test_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/integration/test_client.py b/integration/test_client.py index 4166cb726..23ece9885 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -355,10 +355,7 @@ def test_client_cluster_with_lazy_shard_loading( ] assert nodes[0].shards[0].vector_queue_length == 0 assert nodes[0].shards[0].compressed is False - if collection._connection._weaviate_version.is_lower_than(1, 36, 0): - assert nodes[0].shards[0].loaded is False - else: - assert nodes[0].shards[0].loaded is True + assert nodes[0].shards[0].loaded is True finally: client.collections.delete(request.node.name) From cf96ccb40309a1001abb6c533c6d9b8cec2e9723 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 13:41:55 +0000 Subject: [PATCH 16/99] Remove outdated lazy shard load test --- integration/test_client.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/integration/test_client.py b/integration/test_client.py index 23ece9885..3560cbbb7 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -334,7 +334,7 @@ def test_collection_name_capitalization( client.collections.delete(name_big) -def test_client_cluster_with_lazy_shard_loading( +def test_client_cluster_without_lazy_shard_loading( client: weaviate.WeaviateClient, request: SubRequest ) -> None: try: @@ -360,37 +360,6 @@ def test_client_cluster_with_lazy_shard_loading( client.collections.delete(request.node.name) -def test_client_cluster_without_lazy_shard_loading( - client_factory: ClientFactory, request: SubRequest -) -> None: - client = client_factory(8090, 50061) - - try: - collection = client.collections.create( - name=request.node.name, vectorizer_config=Configure.Vectorizer.none() - ) - - nodes = client.cluster.nodes(collection.name, output="verbose") - assert len(nodes) == 1 - assert len(nodes[0].shards) == 1 - assert nodes[0].shards[0].collection == collection.name - assert nodes[0].shards[0].object_count == 0 - assert nodes[0].shards[0].vector_indexing_status in [ - "READONLY", - "INDEXING", - "READY", - "LAZY_LOADING", - ] - assert nodes[0].shards[0].vector_queue_length == 0 - assert nodes[0].shards[0].compressed is False - if collection._connection._weaviate_version.is_lower_than(1, 25, 0): - assert nodes[0].shards[0].loaded is True - else: - assert nodes[0].shards[0].loaded is False - finally: - client.collections.delete(request.node.name) - - def test_client_cluster_multitenant(client: weaviate.WeaviateClient, request: SubRequest) -> None: try: collection = client.collections.create( From 4b669e10706fca04573f5f767d11c6e848b0b02d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 15:21:09 +0000 Subject: [PATCH 17/99] Update CI images --- .github/workflows/main.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d884891c0..7cc9934e7 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -27,8 +27,8 @@ env: WEAVIATE_133: 1.33.18 WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.15 - WEAVIATE_136: 1.36.6-1abf310 - WEAVIATE_137: 1.37.0-dev-7eb294e + WEAVIATE_136: 1.36.6-21aaadc + WEAVIATE_137: 1.37.0-dev-8fb696b jobs: lint-and-format: From 890a414aa43eeb355f40c7ed9402a1edd45f40cd Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 15:59:52 +0000 Subject: [PATCH 18/99] Add per-test timeouts and stack dump on timeout --- integration/conftest.py | 63 ++++++++++++++++++++++++++++++++++++++++- pytest.ini | 4 ++- requirements-test.txt | 1 + 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/integration/conftest.py b/integration/conftest.py index 256517ea6..891c66307 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -1,5 +1,10 @@ import os import time +import sys +import threading +import traceback +import signal +import pytest from typing import ( Any, AsyncGenerator, @@ -14,7 +19,6 @@ ) from typing import Callable, TypeVar -import pytest import pytest_asyncio from _pytest.fixtures import SubRequest @@ -500,3 +504,60 @@ def retry_on_http_error( raise # This should never be reached, but satisfies the type checker raise last_exception # type: ignore + + +TIMEOUT_SECONDS = 30 + + +def dump_all_stacks(): + """Print stack traces for every live thread.""" + frames = sys._current_frames() + lines = ["\n===== DEADLOCK DETECTED — THREAD DUMP =====\n"] + for thread in threading.enumerate(): + frame = frames.get(thread.ident) # pyright: ignore + lines.append(f"\n--- Thread: {thread.name} (id={thread.ident}, daemon={thread.daemon}) ---") + if frame: + lines.append("".join(traceback.format_stack(frame))) + else: + lines.append(" (no frame available)\n") + lines.append("===========================================\n") + return "\n".join(lines) + + +class DeadlockWatchdog: + def __init__(self, timeout): + self.timeout = timeout + self._timer = None + self._test_name = None + + def start(self, test_name): + self._test_name = test_name + self._timer = threading.Timer(self.timeout, self._on_timeout) + self._timer.daemon = True + self._timer.start() + + def stop(self): + if self._timer: + self._timer.cancel() + self._timer = None + + def _on_timeout(self): + dump = dump_all_stacks() + # Write to stderr so it's always visible even if captured + sys.stderr.write(f"\n[WATCHDOG] Test '{self._test_name}' timed out after {self.timeout}s\n") + sys.stderr.write(dump) + sys.stderr.flush() + # Force-kill the process so CI doesn't hang forever + signal.raise_signal(signal.SIGTERM) + + +_watchdog = DeadlockWatchdog(TIMEOUT_SECONDS) + + +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_call(item): + _watchdog.start(item.nodeid) + try: + yield + finally: + _watchdog.stop() diff --git a/pytest.ini b/pytest.ini index 36321aa1d..09bfb6983 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,4 +2,6 @@ addopts = -m 'not profiling' --benchmark-skip -l markers = profiling: marks tests that can be profiled -asyncio_default_fixture_loop_scope = function \ No newline at end of file +asyncio_default_fixture_loop_scope = function +timeout = 600 # 10 minutes +timeout_method = thread \ No newline at end of file diff --git a/requirements-test.txt b/requirements-test.txt index 9817a6d5e..40121fcce 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,6 +3,7 @@ pytest-cov==6.2.1 pytest-asyncio==1.3.0 pytest-benchmark==5.1.0 pytest-profiling==1.8.1 +pytest-timeout==2.4.0 coverage==7.10.7 pytest-xdist==3.7.0 werkzeug==3.1.6 From 2893822041959749f6cd12554f304c114d4015e0 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:01:09 +0000 Subject: [PATCH 19/99] Reduce per-test timeout to 5 mins --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 09bfb6983..36de4d551 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,5 +3,5 @@ addopts = -m 'not profiling' --benchmark-skip -l markers = profiling: marks tests that can be profiled asyncio_default_fixture_loop_scope = function -timeout = 600 # 10 minutes +timeout = 300 # 5 minutes timeout_method = thread \ No newline at end of file From 6edd0e155630549f65ead7ff9a2c63baedca363d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:03:32 +0000 Subject: [PATCH 20/99] Fix inc backups test --- integration/test_backup_v4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/test_backup_v4.py b/integration/test_backup_v4.py index 1204a1269..6da10bc7c 100644 --- a/integration/test_backup_v4.py +++ b/integration/test_backup_v4.py @@ -764,7 +764,7 @@ def test_incremental_backup(client: weaviate.WeaviateClient, request: SubRequest backend=BACKEND, include_collections=["Article"], wait_for_completion=True, - incremental_backup_base_id=base_backup_id, + incremental_base_backup_id=base_backup_id, ) # remove existing class From e7cb6979581e87a5836d6e1dbc983a989eed284b Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:04:07 +0000 Subject: [PATCH 21/99] Add server version check for incremental backups --- weaviate/backup/executor.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/weaviate/backup/executor.py b/weaviate/backup/executor.py index e8c52239d..09a47bd0a 100644 --- a/weaviate/backup/executor.py +++ b/weaviate/backup/executor.py @@ -88,6 +88,16 @@ def create( wait_for_completion=wait_for_completion, ) + if ( + incremental_base_backup_id is not None + and self._connection._weaviate_version.is_lower_than(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "Incremental backups", + str(self._connection._weaviate_version), + "1.37.0", + ) + payload: dict = { "id": backup_id, "include": include_collections, From 12d064ff617a400c3348ad8eac42980efea99597 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:09:03 +0000 Subject: [PATCH 22/99] Remove comment --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 36de4d551..8b61c6947 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,5 +3,5 @@ addopts = -m 'not profiling' --benchmark-skip -l markers = profiling: marks tests that can be profiled asyncio_default_fixture_loop_scope = function -timeout = 300 # 5 minutes +timeout = 300 timeout_method = thread \ No newline at end of file From fe5c5223901ebcf67f7f693e72278d99a9bed5cc Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:34:16 +0000 Subject: [PATCH 23/99] Hard kill the process on timeout detection --- integration/conftest.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/integration/conftest.py b/integration/conftest.py index 891c66307..73a032f57 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -3,7 +3,6 @@ import sys import threading import traceback -import signal import pytest from typing import ( Any, @@ -506,20 +505,19 @@ def retry_on_http_error( raise last_exception # type: ignore -TIMEOUT_SECONDS = 30 +TIMEOUT_SECONDS = 300 def dump_all_stacks(): - """Print stack traces for every live thread.""" frames = sys._current_frames() lines = ["\n===== DEADLOCK DETECTED — THREAD DUMP =====\n"] for thread in threading.enumerate(): frame = frames.get(thread.ident) # pyright: ignore - lines.append(f"\n--- Thread: {thread.name} (id={thread.ident}, daemon={thread.daemon}) ---") + lines.append(f"\n--- Thread: {thread.name} (id={thread.ident}) ---") if frame: lines.append("".join(traceback.format_stack(frame))) else: - lines.append(" (no frame available)\n") + lines.append(" (no frame)\n") lines.append("===========================================\n") return "\n".join(lines) @@ -528,10 +526,9 @@ class DeadlockWatchdog: def __init__(self, timeout): self.timeout = timeout self._timer = None - self._test_name = None - def start(self, test_name): - self._test_name = test_name + def start(self, label): + self._label = label self._timer = threading.Timer(self.timeout, self._on_timeout) self._timer.daemon = True self._timer.start() @@ -542,22 +539,30 @@ def stop(self): self._timer = None def _on_timeout(self): - dump = dump_all_stacks() - # Write to stderr so it's always visible even if captured - sys.stderr.write(f"\n[WATCHDOG] Test '{self._test_name}' timed out after {self.timeout}s\n") - sys.stderr.write(dump) + sys.stderr.write(f"\n[WATCHDOG] Hung at: '{self._label}' after {self.timeout}s\n") + sys.stderr.write(dump_all_stacks()) sys.stderr.flush() - # Force-kill the process so CI doesn't hang forever - signal.raise_signal(signal.SIGTERM) + os._exit(1) # Hard kill — works reliably in xdist workers _watchdog = DeadlockWatchdog(TIMEOUT_SECONDS) +# Covers setup + call + teardown @pytest.hookimpl(hookwrapper=True) -def pytest_runtest_call(item): +def pytest_runtest_protocol(item, nextitem): _watchdog.start(item.nodeid) try: yield finally: _watchdog.stop() + + +# Separately watch session-scoped fixture setup +@pytest.hookimpl(hookwrapper=True) +def pytest_sessionstart(session): + _watchdog.start("session startup / session-scoped fixtures") + try: + yield + finally: + _watchdog.stop() From 2d961d18842bcf678f62d666b7a2a44388ea4dbc Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 16:58:40 +0000 Subject: [PATCH 24/99] Timeout putting the sentinel to avoid deadlocking --- weaviate/collections/batch/async_.py | 2 +- weaviate/collections/batch/sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/weaviate/collections/batch/async_.py b/weaviate/collections/batch/async_.py index b71f8be39..b86bc80d3 100644 --- a/weaviate/collections/batch/async_.py +++ b/weaviate/collections/batch/async_.py @@ -254,7 +254,7 @@ async def __loop(self) -> None: and not self.__is_shutting_down.is_set() and not self.__is_oom.is_set() ): - await self.__reqs.put(None) + await asyncio.wait_for(self.__reqs.put(None), timeout=60) self.__sent_sentinel.set() await asyncio.sleep(refresh_time) diff --git a/weaviate/collections/batch/sync.py b/weaviate/collections/batch/sync.py index f219de563..eaa96b1c5 100644 --- a/weaviate/collections/batch/sync.py +++ b/weaviate/collections/batch/sync.py @@ -206,7 +206,7 @@ def __loop(self) -> None: and not self.__is_shutting_down.is_set() and not self.__is_oom.is_set() ): - self.__reqs.put(None) + self.__reqs.put(None, timeout=60) self.__sent_sentinel.set() time.sleep(refresh_time) From fadcebb906644fdba18be771ebbb1d4e4fdeec13 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 20 Mar 2026 17:00:55 +0000 Subject: [PATCH 25/99] Handle the timeout of sentinel pushing gracefully --- weaviate/collections/batch/async_.py | 9 ++++++++- weaviate/collections/batch/sync.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/weaviate/collections/batch/async_.py b/weaviate/collections/batch/async_.py index b86bc80d3..cb7454b30 100644 --- a/weaviate/collections/batch/async_.py +++ b/weaviate/collections/batch/async_.py @@ -254,7 +254,14 @@ async def __loop(self) -> None: and not self.__is_shutting_down.is_set() and not self.__is_oom.is_set() ): - await asyncio.wait_for(self.__reqs.put(None), timeout=60) + try: + await asyncio.wait_for(self.__reqs.put(None), timeout=60) + except asyncio.TimeoutError as e: + logger.warning( + "Batch queue is blocked for more than 60 seconds while trying to send shutdown signal. Exiting the loop" + ) + self.__bg_exception = e + return self.__sent_sentinel.set() await asyncio.sleep(refresh_time) diff --git a/weaviate/collections/batch/sync.py b/weaviate/collections/batch/sync.py index eaa96b1c5..cf7ac2532 100644 --- a/weaviate/collections/batch/sync.py +++ b/weaviate/collections/batch/sync.py @@ -206,7 +206,14 @@ def __loop(self) -> None: and not self.__is_shutting_down.is_set() and not self.__is_oom.is_set() ): - self.__reqs.put(None, timeout=60) + try: + self.__reqs.put(None, timeout=60) + except Full as e: + logger.warning( + "Batch queue is blocked for more than 60 seconds while trying to send shutdown signal. Exiting the loop" + ) + self.__bg_exception = e + return self.__sent_sentinel.set() time.sleep(refresh_time) From 78ff5ecfdd24587f3c1dbc7d8b06b54ce4f164d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 14:35:00 +0100 Subject: [PATCH 26/99] feat: add TextAnalyzerConfig for ASCII folding in text properties --- integration/test_collection_config.py | 73 +++++++++++++++++ test/collection/test_config.py | 80 +++++++++++++++++++ test/collection/test_config_methods.py | 80 ++++++++++++++++++- weaviate/classes/config.py | 2 + weaviate/collections/classes/config.py | 35 ++++++++ .../collections/classes/config_methods.py | 13 +++ 6 files changed, 282 insertions(+), 1 deletion(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 371405a1d..4ba57532c 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -30,6 +30,7 @@ PQEncoderType, PQEncoderDistribution, StopwordsPreset, + TextAnalyzerConfig, VectorDistances, VectorIndexType, Vectorizers, @@ -2196,3 +2197,75 @@ def test_delete_property_index( assert config.properties[0].index_range_filters is False assert config.properties[0].index_searchable is _index_searchable assert config.properties[0].index_filterable is _index_filterable + + +def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory) -> None: + """Create a collection with ascii folding configured and verify it round-trips.""" + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, ascii_fold_ignore=["é"] + ), + ), + Property( + name="body", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + ), + ], + ) + + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + body = next(p for p in config.properties if p.name == "body") + + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + # Properties without a text_analyzer should not have one in the parsed config. + assert body.text_analyzer is None + + # Folding actually takes effect: 'école' is searchable as 'ecole' but 'é' is preserved. + collection.data.insert({"title": "école française", "body": "école française"}) + res = collection.query.bm25(query="ecole", query_properties=["title"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query="ecole", query_properties=["body"]) + assert len(res.objects) == 0 + + +def test_property_text_analyzer_ascii_fold_in_nested_property( + collection_factory: CollectionFactory, +) -> None: + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="meta", + data_type=DataType.OBJECT, + nested_properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, ascii_fold_ignore=["ñ"] + ), + ), + ], + ), + ], + ) + + config = collection.config.get() + meta = next(p for p in config.properties if p.name == "meta") + assert meta.nested_properties is not None + nested_title = next(np for np in meta.nested_properties if np.name == "title") + assert nested_title.text_analyzer is not None + assert nested_title.text_analyzer.ascii_fold is True + assert nested_title.text_analyzer.ascii_fold_ignore == ["ñ"] diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 523ddc980..bae46f6c1 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -12,6 +12,8 @@ Property, Reconfigure, ReferenceProperty, + TextAnalyzerConfig, + Tokenization, Vectorizers, _CollectionConfigCreate, _GenerativeProvider, @@ -3021,3 +3023,81 @@ def test_nested_property_with_id_name_is_allowed() -> None: ], ) assert prop.nestedProperties[0].name == "id" + + +class TestTextAnalyzerConfig: + def test_property_without_text_analyzer_omits_key(self) -> None: + prop = Property(name="title", data_type=DataType.TEXT) + assert "textAnalyzer" not in prop._to_dict() + + def test_property_with_ascii_fold_only(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=TextAnalyzerConfig(ascii_fold=True), + ) + assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} + + def test_property_with_ascii_fold_and_ignore(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, ascii_fold_ignore=["é", "ñ"] + ), + ) + out = prop._to_dict() + assert out["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é", "ñ"], + } + assert out["tokenization"] == "word" + + def test_text_analyzer_default_omits_unset_fields(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=TextAnalyzerConfig(), + ) + # exclude_none drops both unset fields, leaving an empty dict + assert prop._to_dict()["textAnalyzer"] == {} + + def test_text_analyzer_only_ignore_list(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=TextAnalyzerConfig(ascii_fold_ignore=["é"]), + ) + assert prop._to_dict()["textAnalyzer"] == {"asciiFoldIgnore": ["é"]} + + def test_nested_property_with_text_analyzer(self) -> None: + prop = Property( + name="meta", + data_type=DataType.OBJECT, + nested_properties=[ + Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, ascii_fold_ignore=["ñ"] + ), + ), + ], + ) + out = prop._to_dict() + assert out["nestedProperties"][0]["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["ñ"], + } + + def test_text_analyzer_accepts_snake_case_alias(self) -> None: + ta = TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]) + assert ta.asciiFold is True + assert ta.asciiFoldIgnore == ["é"] + + def test_text_analyzer_rejects_wrong_types(self) -> None: + with pytest.raises(ValidationError): + TextAnalyzerConfig(ascii_fold="yes") # type: ignore[arg-type] + with pytest.raises(ValidationError): + TextAnalyzerConfig(ascii_fold_ignore="é") # type: ignore[arg-type] diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py index fbc33b702..d934053c9 100644 --- a/test/collection/test_config_methods.py +++ b/test/collection/test_config_methods.py @@ -1,4 +1,8 @@ -from weaviate.collections.classes.config_methods import _collection_configs_simple_from_json +from weaviate.collections.classes.config_methods import ( + _collection_configs_simple_from_json, + _nested_properties_from_config, + _properties_from_config, +) def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None: @@ -68,3 +72,77 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non assert "default" in vec_config assert vec_config["default"].vectorizer.model == {} assert vec_config["default"].vectorizer.source_properties is None + + +def _make_text_prop(name: str, **extra) -> dict: + base = { + "name": name, + "dataType": ["text"], + "indexFilterable": True, + "indexSearchable": True, + "indexRangeFilters": False, + "tokenization": "word", + } + base.update(extra) + return base + + +def test_properties_from_config_parses_text_analyzer() -> None: + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop( + "title", + textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["é"]}, + ), + _make_text_prop("body"), + ], + } + props = _properties_from_config(schema) + title = next(p for p in props if p.name == "title") + body = next(p for p in props if p.name == "body") + + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + assert body.text_analyzer is None + + # The dataclass round-trips back to the wire format. + assert title.to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é"], + } + assert "textAnalyzer" not in body.to_dict() + + +def test_properties_from_config_text_analyzer_defaults_when_partial() -> None: + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop("title", textAnalyzer={"asciiFoldIgnore": ["é"]}), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is not None + # asciiFold defaults to False when omitted from the server response + assert title.text_analyzer.ascii_fold is False + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + +def test_nested_properties_from_config_parses_text_analyzer() -> None: + nested = _nested_properties_from_config( + [ + _make_text_prop( + "title", + textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["ñ"]}, + ), + ] + ) + assert nested[0].text_analyzer is not None + assert nested[0].text_analyzer.ascii_fold is True + assert nested[0].text_analyzer.ascii_fold_ignore == ["ñ"] + assert nested[0].to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["ñ"], + } diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py index ce1faf993..868cd1c79 100644 --- a/weaviate/classes/config.py +++ b/weaviate/classes/config.py @@ -12,6 +12,7 @@ ReplicationDeletionStrategy, Rerankers, StopwordsPreset, + TextAnalyzerConfig, Tokenization, VectorDistances, ) @@ -39,6 +40,7 @@ "ReferenceProperty", "Rerankers", "StopwordsPreset", + "TextAnalyzerConfig", "Tokenization", "Vectorizers", "VectorDistances", diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 2a8b8d600..415d7cc47 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1671,6 +1671,12 @@ class _PropertyVectorizerConfig: PropertyVectorizerConfig = _PropertyVectorizerConfig +@dataclass +class _TextAnalyzerConfig(_ConfigBase): + ascii_fold: bool + ascii_fold_ignore: Optional[List[str]] + + @dataclass class _NestedProperty(_ConfigBase): data_type: DataType @@ -1679,6 +1685,7 @@ class _NestedProperty(_ConfigBase): index_searchable: bool name: str nested_properties: Optional[List["NestedProperty"]] + text_analyzer: Optional[_TextAnalyzerConfig] tokenization: Optional[Tokenization] def to_dict(self) -> Dict[str, Any]: @@ -1712,6 +1719,7 @@ class _Property(_PropertyBase): index_range_filters: bool index_searchable: bool nested_properties: Optional[List[NestedProperty]] + text_analyzer: Optional[_TextAnalyzerConfig] tokenization: Optional[Tokenization] vectorizer_config: Optional[PropertyVectorizerConfig] vectorizer: Optional[str] @@ -1724,6 +1732,8 @@ def to_dict(self) -> Dict[str, Any]: out["indexSearchable"] = self.index_searchable out["indexRangeFilters"] = self.index_range_filters out["tokenization"] = self.tokenization.value if self.tokenization else None + if self.text_analyzer is not None: + out["textAnalyzer"] = self.text_analyzer.to_dict() if self.nested_properties is not None and len(self.nested_properties) > 0: out["nestedProperties"] = [np.to_dict() for np in self.nested_properties] module_config: Dict[str, Any] = {} @@ -2161,6 +2171,27 @@ class _ShardStatus: ShardStatus = _ShardStatus +class TextAnalyzerConfig(_ConfigCreateModel): + """Text analysis options for a property. + + Configures ASCII folding behavior for `text` and `text[]` properties that use an + inverted index (searchable or filterable). When enabled, accent/diacritic marks are + folded to their base characters during indexing and search (e.g. 'école' matches + 'ecole'). + + Attributes: + ascii_fold: If True, accent/diacritic marks are folded to their base characters + during indexing and search. Defaults to False. + ascii_fold_ignore: Optional list of characters that should be excluded from + ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). + + Both settings are immutable after the property is created. + """ + + asciiFold: Optional[bool] = Field(default=None, alias="ascii_fold") + asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore") + + class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. @@ -2173,6 +2204,9 @@ class Property(_ConfigCreateModel): index_searchable: Whether the property should be searchable in the inverted index. nested_properties: nested properties for data type OBJECT and OBJECT_ARRAY`. skip_vectorization: Whether to skip vectorization of the property. Defaults to `False`. + text_analyzer: Text analysis options for the property. Configures ASCII folding + behavior for text and text[] properties using an inverted index. Immutable + after the property is created. tokenization: The tokenization method to use for the inverted index. Defaults to `None`. vectorize_property_name: Whether to vectorize the property name. Defaults to `True`. """ @@ -2187,6 +2221,7 @@ class Property(_ConfigCreateModel): default=None, alias="nested_properties" ) skip_vectorization: bool = Field(default=False) + textAnalyzer: Optional[TextAnalyzerConfig] = Field(default=None, alias="text_analyzer") tokenization: Optional[Tokenization] = Field(default=None) vectorize_property_name: bool = Field(default=True) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index c150394f1..4bb8a53b9 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -39,6 +39,7 @@ _ShardingConfig, _SQConfig, _StopwordsConfig, + _TextAnalyzerConfig, _VectorIndexConfigDynamic, _VectorIndexConfigFlat, _VectorIndexConfigHFresh, @@ -462,6 +463,16 @@ def _collection_configs_simple_from_json( return dict(sorted(configs.items())) +def _text_analyzer_from_config(prop: Dict[str, Any]) -> Optional[_TextAnalyzerConfig]: + ta = prop.get("textAnalyzer") + if ta is None: + return None + return _TextAnalyzerConfig( + ascii_fold=ta.get("asciiFold", False), + ascii_fold_ignore=ta.get("asciiFoldIgnore"), + ) + + def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedProperty]: return [ _NestedProperty( @@ -475,6 +486,7 @@ def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedP if prop.get("nestedProperties") is not None else None ), + text_analyzer=_text_analyzer_from_config(prop), tokenization=( Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), @@ -497,6 +509,7 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: if prop.get("nestedProperties") is not None else None ), + text_analyzer=_text_analyzer_from_config(prop), tokenization=( Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), From 6931a6f180ebb6955cb2ad799b7c84fdd5141b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 14:38:13 +0100 Subject: [PATCH 27/99] refactor: ruff format --- integration/test_collection_config.py | 8 ++------ test/collection/test_config.py | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 4ba57532c..99305adb7 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2208,9 +2208,7 @@ def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( - ascii_fold=True, ascii_fold_ignore=["é"] - ), + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), ), Property( name="body", @@ -2253,9 +2251,7 @@ def test_property_text_analyzer_ascii_fold_in_nested_property( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( - ascii_fold=True, ascii_fold_ignore=["ñ"] - ), + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ], ), diff --git a/test/collection/test_config.py b/test/collection/test_config.py index bae46f6c1..4693a44aa 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -3043,9 +3043,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( - ascii_fold=True, ascii_fold_ignore=["é", "ñ"] - ), + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3079,9 +3077,7 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=TextAnalyzerConfig( - ascii_fold=True, ascii_fold_ignore=["ñ"] - ), + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ], ) From bda3008db236c891d6f67094ca2bdd70ba00ab9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 14:46:58 +0100 Subject: [PATCH 28/99] feat: add min version check --- integration/test_collection_config.py | 36 +++++++++++++++++++- weaviate/collections/collections/executor.py | 22 +++++++++++- weaviate/collections/config/executor.py | 20 +++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 99305adb7..ef1705b39 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -43,7 +43,11 @@ IndexName, ) from weaviate.collections.classes.tenants import Tenant -from weaviate.exceptions import UnexpectedStatusCodeError, WeaviateInvalidInputError +from weaviate.exceptions import ( + UnexpectedStatusCodeError, + WeaviateInvalidInputError, + WeaviateUnsupportedFeatureError, +) from integration.conftest import retry_on_http_error @@ -2201,6 +2205,10 @@ def test_delete_property_index( def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory) -> None: """Create a collection with ascii folding configured and verify it round-trips.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Property text_analyzer (asciiFold) requires Weaviate >= 1.37.0") + collection = collection_factory( vectorizer_config=Configure.Vectorizer.none(), properties=[ @@ -2240,6 +2248,10 @@ def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory def test_property_text_analyzer_ascii_fold_in_nested_property( collection_factory: CollectionFactory, ) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Property text_analyzer (asciiFold) requires Weaviate >= 1.37.0") + collection = collection_factory( vectorizer_config=Configure.Vectorizer.none(), properties=[ @@ -2265,3 +2277,25 @@ def test_property_text_analyzer_ascii_fold_in_nested_property( assert nested_title.text_analyzer is not None assert nested_title.text_analyzer.ascii_fold is True assert nested_title.text_analyzer.ascii_fold_ignore == ["ñ"] + + +def test_property_text_analyzer_ascii_fold_version_gate( + collection_factory: CollectionFactory, +) -> None: + """On Weaviate < 1.37 the client must raise before sending the request.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + + with pytest.raises(WeaviateUnsupportedFeatureError): + collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True), + ), + ], + ) diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index 8497cdf51..eb90df5d7 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -50,7 +50,7 @@ ConnectionType, _ExpectedStatusCodes, ) -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.util import _capitalize_first_letter, _decode_json_response_dict from weaviate.validator import _validate_input, _ValidateArgument from weaviate.warnings import _Warnings @@ -58,6 +58,19 @@ CollectionType = TypeVar("CollectionType", Collection, CollectionAsync) +def _any_property_has_text_analyzer(properties: Sequence[Property]) -> bool: + for prop in properties: + if prop.textAnalyzer is not None: + return True + nested = prop.nestedProperties + if nested is None: + continue + nested_list = nested if isinstance(nested, list) else [nested] + if _any_property_has_text_analyzer(nested_list): + return True + return False + + class _CollectionsExecutor(Generic[ConnectionType]): def __init__(self, connection: ConnectionType): self._connection = connection @@ -213,6 +226,13 @@ def create( _Warnings.vectorizer_config_in_config_create() if vector_index_config is not None: _Warnings.vector_index_config_in_config_create() + if properties is not None and _any_property_has_text_analyzer(properties): + if not self._connection._weaviate_version.is_at_least(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Property text_analyzer (asciiFold)", + str(self._connection._weaviate_version), + "1.37.0", + ) try: config = _CollectionConfigCreate( description=description, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index c95cba5a3..92dc1d792 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -53,6 +53,7 @@ from weaviate.connect.v4 import ConnectionAsync, ConnectionType, _ExpectedStatusCodes from weaviate.exceptions import ( WeaviateInvalidInputError, + WeaviateUnsupportedFeatureError, ) from weaviate.util import ( _capitalize_first_letter, @@ -63,6 +64,16 @@ from weaviate.warnings import _Warnings +def _property_has_text_analyzer(prop: Property) -> bool: + if prop.textAnalyzer is not None: + return True + nested = prop.nestedProperties + if nested is None: + return False + nested_list = nested if isinstance(nested, list) else [nested] + return any(_property_has_text_analyzer(np) for np in nested_list) + + class _ConfigCollectionExecutor(Generic[ConnectionType]): def __init__( self, @@ -244,6 +255,15 @@ async def _execute() -> None: return executor.result(resp(schema)) def __add_property(self, additional_property: PropertyType) -> executor.Result[None]: + if isinstance(additional_property, Property) and _property_has_text_analyzer( + additional_property + ): + if not self._connection._weaviate_version.is_at_least(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Property text_analyzer (asciiFold)", + str(self._connection._weaviate_version), + "1.37.0", + ) path = f"/schema/{self._name}/properties" obj = additional_property._to_dict() From 77fc0ffc9c57d6a83a52a8ffa313c4ff9b3f0079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 21:50:42 +0100 Subject: [PATCH 29/99] feat: update TextAnalyzerConfig docstring for ascii_fold attributes --- weaviate/collections/classes/config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 415d7cc47..e15f33dc6 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2181,9 +2181,11 @@ class TextAnalyzerConfig(_ConfigCreateModel): Attributes: ascii_fold: If True, accent/diacritic marks are folded to their base characters - during indexing and search. Defaults to False. + during indexing and search. If omitted, the field is not sent to the server + and the server default (False) applies. ascii_fold_ignore: Optional list of characters that should be excluded from - ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). + ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). If omitted, + the field is not sent to the server. Both settings are immutable after the property is created. """ From a8d6927dd79953dc4800461ab163cd823b5b39a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 21:50:51 +0100 Subject: [PATCH 30/99] feat: add asciiFold check in _text_analyzer_from_config function --- weaviate/collections/classes/config_methods.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index 4bb8a53b9..e79ff45e9 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -467,6 +467,8 @@ def _text_analyzer_from_config(prop: Dict[str, Any]) -> Optional[_TextAnalyzerCo ta = prop.get("textAnalyzer") if ta is None: return None + if "asciiFold" not in ta: + return None return _TextAnalyzerConfig( ascii_fold=ta.get("asciiFold", False), ascii_fold_ignore=ta.get("asciiFoldIgnore"), From e8919a3a9c2778ced02bda5927443aa7dc34aa8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 21:51:07 +0100 Subject: [PATCH 31/99] test: fix ASCII folding tests --- integration/test_collection_config.py | 65 ++++++++++++++++++++------ test/collection/test_config_methods.py | 9 ++-- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index ef1705b39..6a4cd70b0 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2212,14 +2212,23 @@ def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory collection = collection_factory( vectorizer_config=Configure.Vectorizer.none(), properties=[ + # Folds all accents. Property( - name="title", + name="folded", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True), + ), + # Folds all accents EXCEPT 'é'. + Property( + name="folded_except_e_acute", data_type=DataType.TEXT, tokenization=Tokenization.WORD, text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), ), + # No folding at all. Property( - name="body", + name="plain", data_type=DataType.TEXT, tokenization=Tokenization.WORD, ), @@ -2227,21 +2236,51 @@ def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory ) config = collection.config.get() - title = next(p for p in config.properties if p.name == "title") - body = next(p for p in config.properties if p.name == "body") + folded = next(p for p in config.properties if p.name == "folded") + folded_ignore = next(p for p in config.properties if p.name == "folded_except_e_acute") + plain = next(p for p in config.properties if p.name == "plain") + + assert folded.text_analyzer is not None + assert folded.text_analyzer.ascii_fold is True + assert folded.text_analyzer.ascii_fold_ignore is None + + assert folded_ignore.text_analyzer is not None + assert folded_ignore.text_analyzer.ascii_fold is True + assert folded_ignore.text_analyzer.ascii_fold_ignore == ["é"] + + assert plain.text_analyzer is None - assert title.text_analyzer is not None - assert title.text_analyzer.ascii_fold is True - assert title.text_analyzer.ascii_fold_ignore == ["é"] + # Insert one doc that contains both an ignored accent ('é') and a non-ignored + # accent ('ç') so the test can distinguish "folded" from "ignored" behavior. + collection.data.insert( + { + "folded": "école française", + "folded_except_e_acute": "école française", + "plain": "école française", + } + ) - # Properties without a text_analyzer should not have one in the parsed config. - assert body.text_analyzer is None + # `folded` folds everything → both 'ecole' and 'francaise' match. + res = collection.query.bm25(query="ecole", query_properties=["folded"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query="francaise", query_properties=["folded"]) + assert len(res.objects) == 1 - # Folding actually takes effect: 'école' is searchable as 'ecole' but 'é' is preserved. - collection.data.insert({"title": "école française", "body": "école française"}) - res = collection.query.bm25(query="ecole", query_properties=["title"]) + # `folded_except_e_acute` preserves 'é' but still folds 'ç' → 'c'. + # Querying 'ecole' must NOT match because 'é' is in the ignore list. + res = collection.query.bm25(query="ecole", query_properties=["folded_except_e_acute"]) + assert len(res.objects) == 0 + # Querying the original 'école' still matches. + res = collection.query.bm25(query="école", query_properties=["folded_except_e_acute"]) assert len(res.objects) == 1 - res = collection.query.bm25(query="ecole", query_properties=["body"]) + # 'ç' is not in the ignore list, so it still folds and 'francaise' matches. + res = collection.query.bm25(query="francaise", query_properties=["folded_except_e_acute"]) + assert len(res.objects) == 1 + + # `plain` does no folding at all → ASCII queries don't match accented tokens. + res = collection.query.bm25(query="ecole", query_properties=["plain"]) + assert len(res.objects) == 0 + res = collection.query.bm25(query="francaise", query_properties=["plain"]) assert len(res.objects) == 0 diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py index d934053c9..472213ff9 100644 --- a/test/collection/test_config_methods.py +++ b/test/collection/test_config_methods.py @@ -116,18 +116,17 @@ def test_properties_from_config_parses_text_analyzer() -> None: assert "textAnalyzer" not in body.to_dict() -def test_properties_from_config_text_analyzer_defaults_when_partial() -> None: +def test_properties_from_config_text_analyzer_omitted_when_no_ascii_fold() -> None: + """If the server response omits asciiFold, the client treats text_analyzer as unset.""" schema = { "vectorizer": "none", "properties": [ + # Server response with textAnalyzer present but no asciiFold key _make_text_prop("title", textAnalyzer={"asciiFoldIgnore": ["é"]}), ], } title = _properties_from_config(schema)[0] - assert title.text_analyzer is not None - # asciiFold defaults to False when omitted from the server response - assert title.text_analyzer.ascii_fold is False - assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer is None def test_nested_properties_from_config_parses_text_analyzer() -> None: From 3cc63063de362d03a9a8cb5b40c3496ebc6c62ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 22:09:42 +0100 Subject: [PATCH 32/99] feat: add support for stopword presets in inverted index configuration and text analyzer --- weaviate/collections/classes/config.py | 56 +++++++++++++++---- weaviate/collections/classes/config_base.py | 2 +- .../collections/classes/config_methods.py | 6 +- weaviate/collections/collections/executor.py | 12 +++- weaviate/collections/config/executor.py | 10 ++++ 5 files changed, 73 insertions(+), 13 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index e15f33dc6..3d837b81c 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -380,12 +380,14 @@ class _InvertedIndexConfigCreate(_ConfigCreateModel): indexPropertyLength: Optional[bool] indexNullState: Optional[bool] stopwords: _StopwordsCreate + stopwordPresets: Optional[Dict[str, List[str]]] = None class _InvertedIndexConfigUpdate(_ConfigUpdateModel): bm25: Optional[_BM25ConfigUpdate] cleanupIntervalSeconds: Optional[int] stopwords: Optional[_StopwordsUpdate] + stopwordPresets: Optional[Dict[str, List[str]]] = None class _MultiTenancyConfigCreate(_ConfigCreateModel): @@ -1647,6 +1649,7 @@ class _InvertedIndexConfig(_ConfigBase): index_property_length: bool index_timestamps: bool stopwords: StopwordsConfig + stopword_presets: Optional[Dict[str, List[str]]] = None InvertedIndexConfig = _InvertedIndexConfig @@ -1675,6 +1678,7 @@ class _PropertyVectorizerConfig: class _TextAnalyzerConfig(_ConfigBase): ascii_fold: bool ascii_fold_ignore: Optional[List[str]] + stopword_preset: Optional[str] @dataclass @@ -2174,24 +2178,42 @@ class _ShardStatus: class TextAnalyzerConfig(_ConfigCreateModel): """Text analysis options for a property. - Configures ASCII folding behavior for `text` and `text[]` properties that use an - inverted index (searchable or filterable). When enabled, accent/diacritic marks are - folded to their base characters during indexing and search (e.g. 'école' matches - 'ecole'). + Configures per-property text analysis for `text` and `text[]` properties that use an + inverted index (searchable or filterable). Supports ASCII folding (accent/diacritic + handling) and selecting a stopword preset that overrides the collection-level + `invertedIndexConfig.stopwords` setting for this property only. Attributes: ascii_fold: If True, accent/diacritic marks are folded to their base characters - during indexing and search. If omitted, the field is not sent to the server - and the server default (False) applies. + during indexing and search (e.g. 'école' matches 'ecole'). If omitted, the + field is not sent to the server and the server default (False) applies. ascii_fold_ignore: Optional list of characters that should be excluded from ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). If omitted, the field is not sent to the server. - - Both settings are immutable after the property is created. + stopword_preset: Stopword preset name. Overrides the collection-level + `invertedIndexConfig.stopwords` for this property. Only applies to + properties using `Tokenization.WORD`. Accepts a built-in preset + (`StopwordsPreset.EN` or `StopwordsPreset.NONE`) or the name of a + user-defined preset declared in + `Configure.inverted_index(stopword_presets=...)`. + + All settings are immutable after the property is created. """ asciiFold: Optional[bool] = Field(default=None, alias="ascii_fold") asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore") + stopwordPreset: Optional[Union[StopwordsPreset, str]] = Field( + default=None, alias="stopword_preset" + ) + + @field_validator("stopwordPreset", mode="before") + @classmethod + def _coerce_stopword_preset(cls, v: Any) -> Any: + # Pydantic preserves the StopwordsPreset enum instance through model_dump, + # but the wire format must be a plain string. Coerce at construction time. + if isinstance(v, StopwordsPreset): + return v.value + return v class Property(_ConfigCreateModel): @@ -2615,11 +2637,17 @@ def inverted_index( stopwords_preset: Optional[StopwordsPreset] = None, stopwords_additions: Optional[List[str]] = None, stopwords_removals: Optional[List[str]] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> _InvertedIndexConfigCreate: """Create an `InvertedIndexConfigCreate` object to be used when defining the configuration of the keyword searching algorithm of Weaviate. Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details! + stopword_presets: User-defined named stopword lists keyed by preset name. Each value + is a flat list of stopword strings. A preset can be referenced from a property's + `text_analyzer.stopword_preset` to override the collection-level stopwords for + that property only. Requires Weaviate >= 1.37.0. + + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details on the other parameters. """ # noqa: D417 (missing argument descriptions in the docstring) if bm25_b is None and bm25_k1 is not None or bm25_k1 is None and bm25_b is not None: raise ValueError("bm25_b and bm25_k1 must be specified together") @@ -2639,6 +2667,7 @@ def inverted_index( additions=stopwords_additions, removals=stopwords_removals, ), + stopwordPresets=stopword_presets, ) @staticmethod @@ -2913,13 +2942,19 @@ def inverted_index( stopwords_additions: Optional[List[str]] = None, stopwords_preset: Optional[StopwordsPreset] = None, stopwords_removals: Optional[List[str]] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> _InvertedIndexConfigUpdate: """Create an `InvertedIndexConfigUpdate` object. Use this method when defining the `inverted_index_config` argument in `collection.update()`. Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! + stopword_presets: User-defined named stopword lists keyed by preset name. Each value + is a flat list of stopword strings. Passing this replaces the entire user-defined + stopword preset map for the collection. Removing a preset still referenced by a + property is rejected by the server. Requires Weaviate >= 1.37.0. + + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details on the other parameters. """ # noqa: D417 (missing argument descriptions in the docstring) return _InvertedIndexConfigUpdate( bm25=_BM25ConfigUpdate(b=bm25_b, k1=bm25_k1), @@ -2929,6 +2964,7 @@ def inverted_index( additions=stopwords_additions, removals=stopwords_removals, ), + stopwordPresets=stopword_presets, ) @staticmethod diff --git a/weaviate/collections/classes/config_base.py b/weaviate/collections/classes/config_base.py index fc696fdfb..aa572795e 100644 --- a/weaviate/collections/classes/config_base.py +++ b/weaviate/collections/classes/config_base.py @@ -29,7 +29,7 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: continue if isinstance(val, Enum): schema[cls_field] = str(val.value) - elif isinstance(val, (int, float, bool, str, list)): + elif isinstance(val, (int, float, bool, str, list, dict)): schema[cls_field] = val elif isinstance(val, _QuantizerConfigUpdate): quantizers = ["pq", "bq", "sq"] diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index e79ff45e9..691cf208d 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -357,6 +357,7 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: additions=schema["invertedIndexConfig"]["stopwords"]["additions"], removals=schema["invertedIndexConfig"]["stopwords"]["removals"], ), + stopword_presets=schema["invertedIndexConfig"].get("stopwordPresets"), ), multi_tenancy_config=_MultiTenancyConfig( enabled=schema.get("multiTenancyConfig", {}).get("enabled", False), @@ -467,11 +468,14 @@ def _text_analyzer_from_config(prop: Dict[str, Any]) -> Optional[_TextAnalyzerCo ta = prop.get("textAnalyzer") if ta is None: return None - if "asciiFold" not in ta: + # The server normalizes an empty TextAnalyzer to nil (see usecases/schema/validation.go), + # so the only meaningful signal is the presence of one of the configured fields. + if "asciiFold" not in ta and "stopwordPreset" not in ta: return None return _TextAnalyzerConfig( ascii_fold=ta.get("asciiFold", False), ascii_fold_ignore=ta.get("asciiFoldIgnore"), + stopword_preset=ta.get("stopwordPreset"), ) diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index eb90df5d7..cdd46fda9 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -229,10 +229,20 @@ def create( if properties is not None and _any_property_has_text_analyzer(properties): if not self._connection._weaviate_version.is_at_least(1, 37, 0): raise WeaviateUnsupportedFeatureError( - "Property text_analyzer (asciiFold)", + "Property text_analyzer (asciiFold / stopword_preset)", str(self._connection._weaviate_version), "1.37.0", ) + if ( + inverted_index_config is not None + and inverted_index_config.stopwordPresets is not None + and not self._connection._weaviate_version.is_at_least(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "InvertedIndexConfig stopword_presets", + str(self._connection._weaviate_version), + "1.37.0", + ) try: config = _CollectionConfigCreate( description=description, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index 92dc1d792..ca431198e 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -210,6 +210,16 @@ def update( ), ): _Warnings.vectorizer_config_in_config_update() + if ( + inverted_index_config is not None + and inverted_index_config.stopwordPresets is not None + and not self._connection._weaviate_version.is_at_least(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "InvertedIndexConfig stopword_presets", + str(self._connection._weaviate_version), + "1.37.0", + ) try: config = _CollectionConfigUpdate( description=description, From ef04dea6478be2c17635a750038b340219eb5b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 22:10:04 +0100 Subject: [PATCH 33/99] test: added live and config tests --- integration/test_collection_config.py | 569 +++++++++++++++++++++++++ test/collection/test_config.py | 98 +++++ test/collection/test_config_methods.py | 100 +++++ 3 files changed, 767 insertions(+) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 6a4cd70b0..f53667b3e 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -42,6 +42,7 @@ _VectorizerConfigCreate, IndexName, ) +from weaviate.collections.classes.filters import Filter from weaviate.collections.classes.tenants import Tenant from weaviate.exceptions import ( UnexpectedStatusCodeError, @@ -2338,3 +2339,571 @@ def test_property_text_analyzer_ascii_fold_version_gate( ), ], ) + + +def test_collection_stopword_presets(collection_factory: CollectionFactory) -> None: + """User-defined stopword presets defined at collection level apply to properties + that reference them via text_analyzer.stopword_preset, and built-in presets can + coexist with user-defined ones.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + # User-defined French preset. + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ), + # Built-in English preset, set per property. + Property( + name="title_en", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset=StopwordsPreset.EN), + ), + # No stopword override → uses the collection-level default. + Property( + name="plain", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + title_fr = next(p for p in config.properties if p.name == "title_fr") + title_en = next(p for p in config.properties if p.name == "title_en") + plain = next(p for p in config.properties if p.name == "plain") + assert title_fr.text_analyzer is not None + assert title_fr.text_analyzer.stopword_preset == "fr" + assert title_en.text_analyzer is not None + assert title_en.text_analyzer.stopword_preset == "en" + assert plain.text_analyzer is None + + collection.data.insert( + { + "title_fr": "le chat noir", + "title_en": "the black cat", + "plain": "the black cat", + } + ) + + # title_fr filters 'le' (in user preset) → BM25 for 'le' yields no match, + # but 'chat' still matches. + res = collection.query.bm25(query="le", query_properties=["title_fr"]) + assert len(res.objects) == 0 + res = collection.query.bm25(query="chat", query_properties=["title_fr"]) + assert len(res.objects) == 1 + + # title_en filters 'the' (built-in en) but matches 'cat'. + res = collection.query.bm25(query="the", query_properties=["title_en"]) + assert len(res.objects) == 0 + res = collection.query.bm25(query="cat", query_properties=["title_en"]) + assert len(res.objects) == 1 + + +def test_collection_stopword_presets_update(collection_factory: CollectionFactory) -> None: + """Updating the contents of an existing user-defined stopword preset takes effect + immediately for queries against properties that reference it.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le"]}, + ), + properties=[ + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ), + ], + ) + + collection.data.insert({"title_fr": "le chat et la souris"}) + + # Baseline: 'le' is filtered, 'la' is not (it isn't in the preset yet). + res = collection.query.bm25(query="le", query_properties=["title_fr"]) + assert len(res.objects) == 0 + res = collection.query.bm25(query="la", query_properties=["title_fr"]) + assert len(res.objects) == 1 + + # Replace the preset contents: drop 'le', add 'la'. + collection.config.update( + inverted_index_config=Reconfigure.inverted_index( + stopword_presets={"fr": ["la"]}, + ), + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["la"]} + + # After update: 'le' now passes through, 'la' is filtered. + res = collection.query.bm25(query="le", query_properties=["title_fr"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query="la", query_properties=["title_fr"]) + assert len(res.objects) == 0 + + +def test_collection_stopword_presets_remove_in_use_is_rejected( + collection_factory: CollectionFactory, +) -> None: + """The server rejects removing a stopword preset still referenced by a property.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ), + ], + ) + + with pytest.raises(UnexpectedStatusCodeError): + collection.config.update( + inverted_index_config=Reconfigure.inverted_index(stopword_presets={}), + ) + + # The original preset must still be present after the rejected update. + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_inverted_index_stopword_presets_version_gate( + collection_factory: CollectionFactory, +) -> None: + """On Weaviate < 1.37 the client must raise before sending the request.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + + with pytest.raises(WeaviateUnsupportedFeatureError): + collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la"]}, + ), + ) + + +def test_collection_stopword_presets_remove_unused_is_allowed( + collection_factory: CollectionFactory, +) -> None: + """Removing a preset that no property references must succeed.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ), + ], + ) + + # Drop only 'es' (unused). 'fr' is still referenced by title. + collection.config.update( + inverted_index_config=Reconfigure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + ) + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rejected( + collection_factory: CollectionFactory, +) -> None: + """A removed preset still referenced by a nested property must be rejected by the server.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="doc", + data_type=DataType.OBJECT, + nested_properties=[ + Property( + name="body", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ), + ], + ), + ], + ) + + with pytest.raises(UnexpectedStatusCodeError): + collection.config.update( + inverted_index_config=Reconfigure.inverted_index(stopword_presets={}), + ) + + # The original preset must still be present after the rejected update. + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_collection_user_defined_stopword_preset_overrides_builtin( + collection_factory: CollectionFactory, +) -> None: + """A user-defined preset named 'en' replaces the built-in 'en' for properties of this collection.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + # Shadow the built-in 'en' with a user-defined preset that filters + # 'hello' but NOT 'the'. + stopword_presets={"en": ["hello"]}, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="en"), + ), + ], + ) + + collection.data.insert({"title": "the quick hello world"}) + + # 'the' is no longer filtered (built-in en is overridden) → matches. + res = collection.query.bm25(query="the", query_properties=["title"]) + assert len(res.objects) == 1 + # 'hello' was added by the override → filtered. + res = collection.query.bm25(query="hello", query_properties=["title"]) + assert len(res.objects) == 0 + # 'quick' is in neither list → matches. + res = collection.query.bm25(query="quick", query_properties=["title"]) + assert len(res.objects) == 1 + + +def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( + collection_factory: CollectionFactory, +) -> None: + """A single property may combine ascii_fold and stopword_preset.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, + stopword_preset=StopwordsPreset.EN, + ), + ), + ], + ) + + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.stopword_preset == "en" + + collection.data.insert({"title": "The école française"}) + + # 'the' is filtered by built-in en → no match. + res = collection.query.bm25(query="the", query_properties=["title"]) + assert len(res.objects) == 0 + # 'ecole' matches because ascii_fold folds 'école'. + res = collection.query.bm25(query="ecole", query_properties=["title"]) + assert len(res.objects) == 1 + # 'francaise' matches because ascii_fold folds 'française'. + res = collection.query.bm25(query="francaise", query_properties=["title"]) + assert len(res.objects) == 1 + + +def test_property_text_analyzer_ascii_fold_immutable( + collection_factory: CollectionFactory, +) -> None: + """The asciiFold setting is immutable; updating it via add_property on a renamed + property is the only way to change behavior, but the original property cannot + have its analyzer mutated.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + ), + ], + ) + + # The config exposes the original ignore list and there's no client API + # surface to mutate text_analyzer on an existing property — it can only be + # set at create time. + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + # Adding a *new* property with a different analyzer is allowed. + collection.config.add_property( + Property( + name="title2", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), + ), + ) + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + title2 = next(p for p in config.properties if p.name == "title2") + # Original property's analyzer is unchanged. + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold_ignore == ["é"] + # New property has its own analyzer. + assert title2.text_analyzer is not None + assert title2.text_analyzer.ascii_fold_ignore == ["ñ"] + + +@pytest.mark.parametrize( + "tokenization,query_match,query_no_match", + [ + (Tokenization.WORD, "école", "ecole"), + (Tokenization.LOWERCASE, "l'école", "l'ecole"), + (Tokenization.WHITESPACE, "L'école", "L'ecole"), + (Tokenization.FIELD, "L'école est fermée", "L'ecole est fermee"), + (Tokenization.TRIGRAM, "éco", "eco"), + ], + ids=["word", "lowercase", "whitespace", "field", "trigram"], +) +def test_property_ascii_fold_across_tokenizations( + collection_factory: CollectionFactory, + tokenization: Tokenization, + query_match: str, + query_no_match: str, +) -> None: + """ascii_fold + ignore list ['é'] preserves é under every supported tokenization.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="prop", + data_type=DataType.TEXT, + tokenization=tokenization, + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + ), + Property( + name="prop_no_ignore", + data_type=DataType.TEXT, + tokenization=tokenization, + text_analyzer=TextAnalyzerConfig(ascii_fold=True), + ), + ], + ) + collection.data.insert( + { + "prop": "L'école est fermée", + "prop_no_ignore": "L'école est fermée", + } + ) + + # On the property with the ignore list, the accented form matches but the + # ASCII-folded form does not. + res = collection.query.bm25(query=query_match, query_properties=["prop"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query=query_no_match, query_properties=["prop"]) + assert len(res.objects) == 0 + + # On the property without the ignore list, both forms match. + res = collection.query.bm25(query=query_match, query_properties=["prop_no_ignore"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query=query_no_match, query_properties=["prop_no_ignore"]) + assert len(res.objects) == 1 + + +def test_property_ascii_fold_multi_char_ignore( + collection_factory: CollectionFactory, +) -> None: + """An ignore list with multiple characters preserves all of them while still + folding the rest.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="multi_ignore", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, + ascii_fold_ignore=["é", "ü", "ñ", "ø"], + ), + ), + ], + ) + collection.data.insert_many( + [ + {"multi_ignore": "résumé"}, + {"multi_ignore": "über"}, + {"multi_ignore": "El Niño"}, + {"multi_ignore": "Ørsted"}, + {"multi_ignore": "São Paulo"}, + {"multi_ignore": "naïve café"}, + ] + ) + + # All four ignored chars are preserved → ASCII-folded form does not match, + # but the accented form does. + for accented, folded in [ + ("résumé", "resume"), + ("über", "uber"), + ("niño", "nino"), + ("ørsted", "orsted"), + ]: + match = collection.query.bm25(query=accented, query_properties=["multi_ignore"]) + no_match = collection.query.bm25(query=folded, query_properties=["multi_ignore"]) + assert len(match.objects) == 1, f"{accented} should match" + assert len(no_match.objects) == 0, f"{folded} should not match (char in ignore list)" + + # Non-ignored accents (ã, ï) still fold normally. + res = collection.query.bm25(query="sao", query_properties=["multi_ignore"]) + assert len(res.objects) == 1 + res = collection.query.bm25(query="naive", query_properties=["multi_ignore"]) + assert len(res.objects) == 1 + + # The "naïve café" doc has both an ignored char (é) and a non-ignored one (ï): + # 'naive' matches (ï folded), 'cafe' does not (é preserved). + res = collection.query.fetch_objects( + filters=Filter.by_property("multi_ignore").equal("naive café"), + limit=10, + ) + assert len(res.objects) == 1 + + +def test_property_ascii_fold_with_filters( + collection_factory: CollectionFactory, +) -> None: + """Equal and Like filters respect ascii_fold and ascii_fold_ignore.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + # Folds everything. + Property( + name="body", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True), + ), + # Folds everything except 'é'. + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + ), + ], + ) + collection.data.insert_many( + [ + {"title": "L'école est fermée", "body": "L'école est fermée"}, + {"title": "cafe résumé", "body": "cafe résumé"}, + ] + ) + + # Equal on body (full fold): 'ecole' matches. + res = collection.query.fetch_objects( + filters=Filter.by_property("body").equal("ecole"), + limit=5, + ) + assert len(res.objects) == 1 + + # Equal on title (é preserved): 'ecole' does not, but 'école' does. + res = collection.query.fetch_objects( + filters=Filter.by_property("title").equal("école"), + limit=5, + ) + assert len(res.objects) == 1 + res = collection.query.fetch_objects( + filters=Filter.by_property("title").equal("ecole"), + limit=5, + ) + assert len(res.objects) == 0 + + # Like on body (full fold): 'ecol*' matches. + res = collection.query.fetch_objects( + filters=Filter.by_property("body").like("ecol*"), + limit=5, + ) + assert len(res.objects) == 1 + + # Like on title (é preserved): 'écol*' matches, 'ecol*' does not. + res = collection.query.fetch_objects( + filters=Filter.by_property("title").like("écol*"), + limit=5, + ) + assert len(res.objects) == 1 + res = collection.query.fetch_objects( + filters=Filter.by_property("title").like("ecol*"), + limit=5, + ) + assert len(res.objects) == 0 diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 4693a44aa..fcae98d74 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -12,6 +12,7 @@ Property, Reconfigure, ReferenceProperty, + StopwordsPreset, TextAnalyzerConfig, Tokenization, Vectorizers, @@ -3097,3 +3098,100 @@ def test_text_analyzer_rejects_wrong_types(self) -> None: TextAnalyzerConfig(ascii_fold="yes") # type: ignore[arg-type] with pytest.raises(ValidationError): TextAnalyzerConfig(ascii_fold_ignore="é") # type: ignore[arg-type] + + def test_text_analyzer_stopword_preset_builtin_enum(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset=StopwordsPreset.EN), + ) + assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"} + + def test_text_analyzer_stopword_preset_user_defined_string(self) -> None: + prop = Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ) + assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"} + + def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" + ), + ) + assert prop._to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é"], + "stopwordPreset": "fr", + } + + def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + ) + out = prop._to_dict() + assert "asciiFold" not in out["textAnalyzer"] + assert "asciiFoldIgnore" not in out["textAnalyzer"] + + +class TestInvertedIndexStopwordPresets: + def test_configure_inverted_index_with_stopword_presets(self) -> None: + ic = Configure.inverted_index( + stopword_presets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ) + out = ic._to_dict() + assert out["stopwordPresets"] == { + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + } + + def test_configure_inverted_index_without_stopword_presets_omits_key(self) -> None: + ic = Configure.inverted_index() + assert "stopwordPresets" not in ic._to_dict() + + def test_reconfigure_inverted_index_merges_stopword_presets(self) -> None: + rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le", "la"]}) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + } + merged = rc.merge_with_existing(existing) + assert merged["stopwordPresets"] == {"fr": ["le", "la"]} + # other fields untouched + assert merged["stopwords"]["preset"] == "en" + assert merged["bm25"]["b"] == 0.75 + + def test_reconfigure_inverted_index_replaces_existing_stopword_presets(self) -> None: + rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le"]}) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "stopwordPresets": {"fr": ["le", "la", "les"], "es": ["el"]}, + } + merged = rc.merge_with_existing(existing) + # The new value fully replaces the prior dict (this matches the server-side + # PUT semantics — see test_tokenize.py::test_remove_unused_preset_is_allowed). + assert merged["stopwordPresets"] == {"fr": ["le"]} + + def test_reconfigure_inverted_index_without_stopword_presets_leaves_existing(self) -> None: + rc = Reconfigure.inverted_index(bm25_b=0.7, bm25_k1=1.1) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "bm25": {"b": 0.75, "k1": 1.2}, + "stopwordPresets": {"fr": ["le", "la"]}, + } + merged = rc.merge_with_existing(existing) + assert merged["stopwordPresets"] == {"fr": ["le", "la"]} diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py index 472213ff9..2e40acacc 100644 --- a/test/collection/test_config_methods.py +++ b/test/collection/test_config_methods.py @@ -1,4 +1,5 @@ from weaviate.collections.classes.config_methods import ( + _collection_config_from_json, _collection_configs_simple_from_json, _nested_properties_from_config, _properties_from_config, @@ -145,3 +146,102 @@ def test_nested_properties_from_config_parses_text_analyzer() -> None: "asciiFold": True, "asciiFoldIgnore": ["ñ"], } + + +def test_properties_from_config_parses_stopword_preset_only() -> None: + """A property with only stopwordPreset (no asciiFold) must still produce a text_analyzer.""" + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop("title", textAnalyzer={"stopwordPreset": "fr"}), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is False + assert title.text_analyzer.ascii_fold_ignore is None + assert title.text_analyzer.stopword_preset == "fr" + + +def test_properties_from_config_parses_combined_text_analyzer() -> None: + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop( + "title", + textAnalyzer={ + "asciiFold": True, + "asciiFoldIgnore": ["é"], + "stopwordPreset": "fr", + }, + ), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "fr" + + +def _full_schema(class_name: str, **inverted_overrides) -> dict: + inverted = { + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + "stopwords": {"preset": "en", "additions": None, "removals": None}, + } + inverted.update(inverted_overrides) + return { + "class": class_name, + "vectorizer": "none", + "properties": [], + "invertedIndexConfig": inverted, + "replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"}, + "shardingConfig": { + "virtualPerPhysical": 128, + "desiredCount": 1, + "actualCount": 1, + "desiredVirtualCount": 128, + "actualVirtualCount": 128, + "key": "_id", + "strategy": "hash", + "function": "murmur3", + }, + "vectorIndexType": "hnsw", + "vectorIndexConfig": { + "skip": False, + "cleanupIntervalSeconds": 300, + "maxConnections": 64, + "efConstruction": 128, + "ef": -1, + "dynamicEfMin": 100, + "dynamicEfMax": 500, + "dynamicEfFactor": 8, + "vectorCacheMaxObjects": 1000000000000, + "flatSearchCutoff": 40000, + "distance": "cosine", + }, + } + + +def test_collection_config_parses_stopword_presets() -> None: + """The inverted index config exposes stopwordPresets when present in the schema.""" + schema = _full_schema( + "TestStopwordPresets", + stopwordPresets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ) + full = _collection_config_from_json(schema) + assert full.inverted_index_config.stopword_presets == { + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + } + + +def test_collection_config_stopword_presets_absent() -> None: + """If the server response omits stopwordPresets, the parsed value is None.""" + schema = _full_schema("TestNoStopwordPresets") + full = _collection_config_from_json(schema) + assert full.inverted_index_config.stopword_presets is None From 8f1b33b8fe5868860258f5516cf92f7420ae34c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Thu, 9 Apr 2026 22:13:19 +0100 Subject: [PATCH 34/99] refactor: improve docstrings for stopword presets and asciiFold tests --- integration/test_collection_config.py | 29 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index f53667b3e..b5ff89b93 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2342,9 +2342,11 @@ def test_property_text_analyzer_ascii_fold_version_gate( def test_collection_stopword_presets(collection_factory: CollectionFactory) -> None: - """User-defined stopword presets defined at collection level apply to properties - that reference them via text_analyzer.stopword_preset, and built-in presets can - coexist with user-defined ones.""" + """User-defined stopword presets apply to properties that reference them. + + Properties can reference user-defined presets via text_analyzer.stopword_preset, + and built-in presets can coexist with user-defined ones. + """ dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("stopword_presets requires Weaviate >= 1.37.0") @@ -2413,8 +2415,11 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N def test_collection_stopword_presets_update(collection_factory: CollectionFactory) -> None: - """Updating the contents of an existing user-defined stopword preset takes effect - immediately for queries against properties that reference it.""" + """Updating the contents of an existing stopword preset takes effect immediately. + + Queries against properties that reference the preset see the new contents on the + next request. + """ dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("stopword_presets requires Weaviate >= 1.37.0") @@ -2667,9 +2672,11 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( def test_property_text_analyzer_ascii_fold_immutable( collection_factory: CollectionFactory, ) -> None: - """The asciiFold setting is immutable; updating it via add_property on a renamed - property is the only way to change behavior, but the original property cannot - have its analyzer mutated.""" + """The asciiFold setting is immutable on an existing property. + + Adding a new property via add_property is the only way to introduce a different + analyzer; the original property's analyzer cannot be mutated. + """ dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("text_analyzer requires Weaviate >= 1.37.0") @@ -2777,8 +2784,10 @@ def test_property_ascii_fold_across_tokenizations( def test_property_ascii_fold_multi_char_ignore( collection_factory: CollectionFactory, ) -> None: - """An ignore list with multiple characters preserves all of them while still - folding the rest.""" + """An ignore list with multiple characters preserves all of them. + + Characters not in the ignore list are still folded normally. + """ dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("text_analyzer requires Weaviate >= 1.37.0") From 03d6ff44172d81111b6cfc1601f867d36540fd46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 14:14:09 +0100 Subject: [PATCH 35/99] refactor: simplify _any_property_has_text_analyzer function using _property_has_text_analyzer --- weaviate/collections/collections/executor.py | 14 +------------- weaviate/collections/config/executor.py | 5 +++++ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index eb90df5d7..6b73cb53a 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -50,6 +50,7 @@ ConnectionType, _ExpectedStatusCodes, ) +from weaviate.collections.config.executor import _any_property_has_text_analyzer from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.util import _capitalize_first_letter, _decode_json_response_dict from weaviate.validator import _validate_input, _ValidateArgument @@ -58,19 +59,6 @@ CollectionType = TypeVar("CollectionType", Collection, CollectionAsync) -def _any_property_has_text_analyzer(properties: Sequence[Property]) -> bool: - for prop in properties: - if prop.textAnalyzer is not None: - return True - nested = prop.nestedProperties - if nested is None: - continue - nested_list = nested if isinstance(nested, list) else [nested] - if _any_property_has_text_analyzer(nested_list): - return True - return False - - class _CollectionsExecutor(Generic[ConnectionType]): def __init__(self, connection: ConnectionType): self._connection = connection diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index 92dc1d792..51adbd8a0 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -5,6 +5,7 @@ Generic, List, Literal, + Sequence, Optional, Tuple, Union, @@ -64,6 +65,10 @@ from weaviate.warnings import _Warnings +def _any_property_has_text_analyzer(properties: Sequence[Property]) -> bool: + return any(_property_has_text_analyzer(p) for p in properties) + + def _property_has_text_analyzer(prop: Property) -> bool: if prop.textAnalyzer is not None: return True From 1342204274cc29829e115660d279b2dbd987cba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 14:26:27 +0100 Subject: [PATCH 36/99] test: remove redundant insertion ascii fold tests from test_collection_config --- integration/test_collection_config.py | 115 -------------------------- 1 file changed, 115 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 6a4cd70b0..93ac923a0 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2203,121 +2203,6 @@ def test_delete_property_index( assert config.properties[0].index_filterable is _index_filterable -def test_property_text_analyzer_ascii_fold(collection_factory: CollectionFactory) -> None: - """Create a collection with ascii folding configured and verify it round-trips.""" - dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): - pytest.skip("Property text_analyzer (asciiFold) requires Weaviate >= 1.37.0") - - collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - # Folds all accents. - Property( - name="folded", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True), - ), - # Folds all accents EXCEPT 'é'. - Property( - name="folded_except_e_acute", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), - ), - # No folding at all. - Property( - name="plain", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - ), - ], - ) - - config = collection.config.get() - folded = next(p for p in config.properties if p.name == "folded") - folded_ignore = next(p for p in config.properties if p.name == "folded_except_e_acute") - plain = next(p for p in config.properties if p.name == "plain") - - assert folded.text_analyzer is not None - assert folded.text_analyzer.ascii_fold is True - assert folded.text_analyzer.ascii_fold_ignore is None - - assert folded_ignore.text_analyzer is not None - assert folded_ignore.text_analyzer.ascii_fold is True - assert folded_ignore.text_analyzer.ascii_fold_ignore == ["é"] - - assert plain.text_analyzer is None - - # Insert one doc that contains both an ignored accent ('é') and a non-ignored - # accent ('ç') so the test can distinguish "folded" from "ignored" behavior. - collection.data.insert( - { - "folded": "école française", - "folded_except_e_acute": "école française", - "plain": "école française", - } - ) - - # `folded` folds everything → both 'ecole' and 'francaise' match. - res = collection.query.bm25(query="ecole", query_properties=["folded"]) - assert len(res.objects) == 1 - res = collection.query.bm25(query="francaise", query_properties=["folded"]) - assert len(res.objects) == 1 - - # `folded_except_e_acute` preserves 'é' but still folds 'ç' → 'c'. - # Querying 'ecole' must NOT match because 'é' is in the ignore list. - res = collection.query.bm25(query="ecole", query_properties=["folded_except_e_acute"]) - assert len(res.objects) == 0 - # Querying the original 'école' still matches. - res = collection.query.bm25(query="école", query_properties=["folded_except_e_acute"]) - assert len(res.objects) == 1 - # 'ç' is not in the ignore list, so it still folds and 'francaise' matches. - res = collection.query.bm25(query="francaise", query_properties=["folded_except_e_acute"]) - assert len(res.objects) == 1 - - # `plain` does no folding at all → ASCII queries don't match accented tokens. - res = collection.query.bm25(query="ecole", query_properties=["plain"]) - assert len(res.objects) == 0 - res = collection.query.bm25(query="francaise", query_properties=["plain"]) - assert len(res.objects) == 0 - - -def test_property_text_analyzer_ascii_fold_in_nested_property( - collection_factory: CollectionFactory, -) -> None: - dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): - pytest.skip("Property text_analyzer (asciiFold) requires Weaviate >= 1.37.0") - - collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - Property( - name="meta", - data_type=DataType.OBJECT, - nested_properties=[ - Property( - name="title", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), - ), - ], - ), - ], - ) - - config = collection.config.get() - meta = next(p for p in config.properties if p.name == "meta") - assert meta.nested_properties is not None - nested_title = next(np for np in meta.nested_properties if np.name == "title") - assert nested_title.text_analyzer is not None - assert nested_title.text_analyzer.ascii_fold is True - assert nested_title.text_analyzer.ascii_fold_ignore == ["ñ"] - - def test_property_text_analyzer_ascii_fold_version_gate( collection_factory: CollectionFactory, ) -> None: From cb53d6a0e630b1f32694063272c0c36934a2ef1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 14:31:02 +0100 Subject: [PATCH 37/99] test: add stopwords roundtrip test for collection configuration --- integration/test_collection_config.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 93ac923a0..43b3f6ee1 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2223,3 +2223,26 @@ def test_property_text_analyzer_ascii_fold_version_gate( ), ], ) + + +def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> None: + collection = collection_factory( + inverted_index_config=Configure.inverted_index( + stopwords_additions=["a"], + stopwords_preset=StopwordsPreset.EN, + stopwords_removals=["the"], + ), + ) + config = collection.config.get() + assert config.inverted_index_config.stopwords.preset == StopwordsPreset.EN + assert config.inverted_index_config.stopwords.removals == ["the"] + + name = f"TestStopwordsRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) From 9de03f3d62f75e5f8ed02fc2f7939904bbc049ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 14:32:25 +0100 Subject: [PATCH 38/99] feat: add model validator to enforce asciiFoldIgnore constraints in TextAnalyzerConfig --- weaviate/collections/classes/config.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index e15f33dc6..33d0d7858 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -15,7 +15,7 @@ ) from deprecation import deprecated as docstring_deprecated -from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator +from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator, model_validator from typing_extensions import TypeAlias from typing_extensions import deprecated as typing_deprecated @@ -2193,6 +2193,12 @@ class TextAnalyzerConfig(_ConfigCreateModel): asciiFold: Optional[bool] = Field(default=None, alias="ascii_fold") asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore") + @model_validator(mode="after") + def _validate_ascii_fold_ignore(self) -> "TextAnalyzerConfig": + if self.asciiFold is not True and self.asciiFoldIgnore is not None: + raise ValueError("asciiFoldIgnore cannot be set when asciiFold is not enabled") + return self + class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. From 7018927e01cc177ce10fc1d1c360d93cd4f1a8c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 14:39:55 +0100 Subject: [PATCH 39/99] feat: add factory class for text analyzer configurations with ASCII folding support --- weaviate/collections/classes/config.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 33d0d7858..677258860 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2200,6 +2200,25 @@ def _validate_ascii_fold_ignore(self) -> "TextAnalyzerConfig": return self +class _TextAnalyzer: + """Factory class for creating text analyzer configurations. + + Use ``Configure.TextAnalyzer`` to access these methods. + """ + + @staticmethod + def ascii_fold( + ignore: Optional[List[str]] = None, + ) -> _TextAnalyzerConfig: + """Create a text analyzer config with ASCII folding enabled. + + Args: + ignore: Optional list of characters that should be excluded from + ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to 'e'). + """ + return _TextAnalyzerConfig(asciiFold=True, asciiFoldIgnore=ignore) + + class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. @@ -2609,6 +2628,7 @@ class Configure: MultiVectors = _MultiVectors ObjectTTL = _ObjectTTL Replication = _Replication + TextAnalyzer = _TextAnalyzer @staticmethod def inverted_index( From 8e919846e3703abbed7ea8249e8f16cb46deb74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 17:44:03 +0100 Subject: [PATCH 40/99] refactor: update TextAnalyzerConfig usage to new Configure class methods --- integration/test_collection_config.py | 3 +- test/collection/test_config.py | 44 ++++++++------------------ weaviate/collections/classes/config.py | 22 +++++++++---- 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 43b3f6ee1..ddd059b45 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -30,7 +30,6 @@ PQEncoderType, PQEncoderDistribution, StopwordsPreset, - TextAnalyzerConfig, VectorDistances, VectorIndexType, Vectorizers, @@ -2219,7 +2218,7 @@ def test_property_text_analyzer_ascii_fold_version_gate( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer.ascii_fold(), ), ], ) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 4693a44aa..4acf6bcd3 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -4,20 +4,20 @@ from pydantic import ValidationError from weaviate.collections.classes.config import ( - _AsyncReplicationConfig, - _ReplicationConfig, - _ReplicationConfigUpdate, Configure, DataType, Property, Reconfigure, ReferenceProperty, - TextAnalyzerConfig, Tokenization, Vectorizers, + _AsyncReplicationConfig, _CollectionConfigCreate, _GenerativeProvider, + _ReplicationConfig, + _ReplicationConfigUpdate, _RerankerProvider, + _TextAnalyzerConfigCreate, _VectorizerConfigCreate, _ReplicationConfigCreate, ReplicationDeletionStrategy, @@ -3025,7 +3025,7 @@ def test_nested_property_with_id_name_is_allowed() -> None: assert prop.nestedProperties[0].name == "id" -class TestTextAnalyzerConfig: +class Test_TextAnalyzerConfigCreate: def test_property_without_text_analyzer_omits_key(self) -> None: prop = Property(name="title", data_type=DataType.TEXT) assert "textAnalyzer" not in prop._to_dict() @@ -3034,7 +3034,7 @@ def test_property_with_ascii_fold_only(self) -> None: prop = Property( name="title", data_type=DataType.TEXT, - text_analyzer=TextAnalyzerConfig(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer.ascii_fold(), ) assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} @@ -3043,7 +3043,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), + text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["é", "ñ"]), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3052,22 +3052,9 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: } assert out["tokenization"] == "word" - def test_text_analyzer_default_omits_unset_fields(self) -> None: - prop = Property( - name="title", - data_type=DataType.TEXT, - text_analyzer=TextAnalyzerConfig(), - ) - # exclude_none drops both unset fields, leaving an empty dict - assert prop._to_dict()["textAnalyzer"] == {} - - def test_text_analyzer_only_ignore_list(self) -> None: - prop = Property( - name="title", - data_type=DataType.TEXT, - text_analyzer=TextAnalyzerConfig(ascii_fold_ignore=["é"]), - ) - assert prop._to_dict()["textAnalyzer"] == {"asciiFoldIgnore": ["é"]} + def test_text_analyzer_rejects_ignore_without_ascii_fold(self) -> None: + with pytest.raises(ValidationError): + _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"]) def test_nested_property_with_text_analyzer(self) -> None: prop = Property( @@ -3077,7 +3064,7 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["ñ"]), ), ], ) @@ -3087,13 +3074,8 @@ def test_nested_property_with_text_analyzer(self) -> None: "asciiFoldIgnore": ["ñ"], } - def test_text_analyzer_accepts_snake_case_alias(self) -> None: - ta = TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]) - assert ta.asciiFold is True - assert ta.asciiFoldIgnore == ["é"] - def test_text_analyzer_rejects_wrong_types(self) -> None: with pytest.raises(ValidationError): - TextAnalyzerConfig(ascii_fold="yes") # type: ignore[arg-type] + _TextAnalyzerConfigCreate(ascii_fold="yes") # type: ignore[arg-type] with pytest.raises(ValidationError): - TextAnalyzerConfig(ascii_fold_ignore="é") # type: ignore[arg-type] + _TextAnalyzerConfigCreate(ascii_fold_ignore="é") # type: ignore[arg-type] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 677258860..6d2dd42cd 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -15,7 +15,14 @@ ) from deprecation import deprecated as docstring_deprecated -from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator, model_validator +from pydantic import ( + AnyHttpUrl, + Field, + TypeAdapter, + ValidationInfo, + field_validator, + model_validator, +) from typing_extensions import TypeAlias from typing_extensions import deprecated as typing_deprecated @@ -1677,6 +1684,9 @@ class _TextAnalyzerConfig(_ConfigBase): ascii_fold_ignore: Optional[List[str]] +TextAnalyzerConfig = _TextAnalyzerConfig + + @dataclass class _NestedProperty(_ConfigBase): data_type: DataType @@ -2171,7 +2181,7 @@ class _ShardStatus: ShardStatus = _ShardStatus -class TextAnalyzerConfig(_ConfigCreateModel): +class _TextAnalyzerConfigCreate(_ConfigCreateModel): """Text analysis options for a property. Configures ASCII folding behavior for `text` and `text[]` properties that use an @@ -2194,7 +2204,7 @@ class TextAnalyzerConfig(_ConfigCreateModel): asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore") @model_validator(mode="after") - def _validate_ascii_fold_ignore(self) -> "TextAnalyzerConfig": + def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": if self.asciiFold is not True and self.asciiFoldIgnore is not None: raise ValueError("asciiFoldIgnore cannot be set when asciiFold is not enabled") return self @@ -2209,14 +2219,14 @@ class _TextAnalyzer: @staticmethod def ascii_fold( ignore: Optional[List[str]] = None, - ) -> _TextAnalyzerConfig: + ) -> _TextAnalyzerConfigCreate: """Create a text analyzer config with ASCII folding enabled. Args: ignore: Optional list of characters that should be excluded from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to 'e'). """ - return _TextAnalyzerConfig(asciiFold=True, asciiFoldIgnore=ignore) + return _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=ignore) class Property(_ConfigCreateModel): @@ -2248,7 +2258,7 @@ class Property(_ConfigCreateModel): default=None, alias="nested_properties" ) skip_vectorization: bool = Field(default=False) - textAnalyzer: Optional[TextAnalyzerConfig] = Field(default=None, alias="text_analyzer") + textAnalyzer: Optional[_TextAnalyzerConfigCreate] = Field(default=None, alias="text_analyzer") tokenization: Optional[Tokenization] = Field(default=None) vectorize_property_name: bool = Field(default=True) From db3009c935b8da9c62071bc175771d52577c89d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 22:20:45 +0100 Subject: [PATCH 41/99] test: remove redundant line in stopword presets merge test --- test/collection/test_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 3d271693a..6d2bb0e74 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -3178,4 +3178,3 @@ def test_reconfigure_inverted_index_without_stopword_presets_leaves_existing(sel } merged = rc.merge_with_existing(existing) assert merged["stopwordPresets"] == {"fr": ["le", "la"]} - From 50f7768224ca402b5dd340f0f7e6969df0788748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 13 Apr 2026 22:29:05 +0100 Subject: [PATCH 42/99] refactor: use factory pattern --- integration/test_collection_config.py | 32 +++++++++++++------------- test/collection/test_config.py | 15 ++++++------ weaviate/collections/classes/config.py | 25 +++++++++++++++----- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index fd8208ca2..a125a3ef2 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2219,7 +2219,7 @@ def test_property_text_analyzer_ascii_fold_version_gate( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.ascii_fold(), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), ), ], ) @@ -2246,14 +2246,14 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ), # Built-in English preset, set per property. Property( name="title_en", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN), ), # No stopword override → uses the collection-level default. Property( @@ -2318,7 +2318,7 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ), ], ) @@ -2366,7 +2366,7 @@ def test_collection_stopword_presets_remove_in_use_is_rejected( name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ), ], ) @@ -2419,7 +2419,7 @@ def test_collection_stopword_presets_remove_unused_is_allowed( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ), ], ) @@ -2456,7 +2456,7 @@ def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rej name="body", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ), ], ), @@ -2493,7 +2493,7 @@ def test_collection_user_defined_stopword_preset_overrides_builtin( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="en"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="en"), ), ], ) @@ -2526,7 +2526,7 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( + text_analyzer=Configure.TextAnalyzer.custom( ascii_fold=True, stopword_preset=StopwordsPreset.EN, ), @@ -2572,7 +2572,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), ), ], ) @@ -2591,7 +2591,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title2", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ) config = collection.config.get() @@ -2634,13 +2634,13 @@ def test_property_ascii_fold_across_tokenizations( name="prop", data_type=DataType.TEXT, tokenization=tokenization, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), ), Property( name="prop_no_ignore", data_type=DataType.TEXT, tokenization=tokenization, - text_analyzer=TextAnalyzerConfig(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), ), ], ) @@ -2683,7 +2683,7 @@ def test_property_ascii_fold_multi_char_ignore( name="multi_ignore", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( + text_analyzer=Configure.TextAnalyzer.custom( ascii_fold=True, ascii_fold_ignore=["é", "ü", "ñ", "ø"], ), @@ -2745,14 +2745,14 @@ def test_property_ascii_fold_with_filters( name="body", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), ), # Folds everything except 'é'. Property( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), ), ], ) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 6d2bb0e74..67e4f6da3 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -10,7 +10,6 @@ Reconfigure, ReferenceProperty, StopwordsPreset, - TextAnalyzerConfig, Tokenization, Vectorizers, _AsyncReplicationConfig, @@ -3036,7 +3035,7 @@ def test_property_with_ascii_fold_only(self) -> None: prop = Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer.ascii_fold(), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), ) assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} @@ -3045,7 +3044,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["é", "ñ"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3066,7 +3065,7 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer.ascii_fold(ignore=["ñ"]), + text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ], ) @@ -3087,7 +3086,7 @@ def test_text_analyzer_stopword_preset_builtin_enum(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"} @@ -3096,7 +3095,7 @@ def test_text_analyzer_stopword_preset_user_defined_string(self) -> None: name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"} @@ -3105,7 +3104,7 @@ def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig( + text_analyzer=Configure.TextAnalyzer.custom( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" ), ) @@ -3120,7 +3119,7 @@ def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=TextAnalyzerConfig(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), ) out = prop._to_dict() assert "asciiFold" not in out["textAnalyzer"] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 7658f40cc..3e6867985 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2239,16 +2239,29 @@ class _TextAnalyzer: """ @staticmethod - def ascii_fold( - ignore: Optional[List[str]] = None, + def custom( + ascii_fold: Optional[bool] = None, + ascii_fold_ignore: Optional[List[str]] = None, + stopword_preset: Optional[Union[StopwordsPreset, str]] = None, ) -> _TextAnalyzerConfigCreate: - """Create a text analyzer config with ASCII folding enabled. + """Create a text analyzer config with custom settings. Args: - ignore: Optional list of characters that should be excluded from - ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to 'e'). + ascii_fold: If True, accent/diacritic marks are folded to their base + characters during indexing and search (e.g. 'école' matches 'ecole'). + ascii_fold_ignore: Optional list of characters that should be excluded + from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to + 'e'). Requires ``ascii_fold=True``. + stopword_preset: Stopword preset name to override the collection-level + stopwords for this property. Accepts a ``StopwordsPreset`` or a + user-defined preset name. """ - return _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=ignore) + return _TextAnalyzerConfigCreate( + ascii_fold=ascii_fold, + ascii_fold_ignore=ascii_fold_ignore, + stopword_preset=stopword_preset, + ) + class Property(_ConfigCreateModel): From 6a1b0bc88b5daa97b219a2eabcd3a7dc3c3ec35b Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 14 Apr 2026 09:06:26 +0200 Subject: [PATCH 43/99] Add MCP permission --- integration/test_rbac.py | 38 ++++++++++++++++++++++++++++++++++++++ weaviate/rbac/models.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/integration/test_rbac.py b/integration/test_rbac.py index d98d238a7..53206abac 100644 --- a/integration/test_rbac.py +++ b/integration/test_rbac.py @@ -14,6 +14,7 @@ CollectionsPermissionOutput, DataPermissionOutput, GroupsPermissionOutput, + MCPPermissionOutput, NodesPermissionOutput, Role, ReplicatePermissionOutput, @@ -44,6 +45,7 @@ backups_permissions=[ BackupsPermissionOutput(collection="Test", actions={Actions.Backups.MANAGE}) ], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -62,6 +64,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -84,6 +87,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -104,6 +108,7 @@ DataPermissionOutput(collection="*", tenant="*", actions={Actions.Data.CREATE}) ], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -137,6 +142,7 @@ ), ], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -155,6 +161,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[ NodesPermissionOutput( verbosity="verbose", actions={Actions.Nodes.READ}, collection="Test" @@ -177,6 +184,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[ NodesPermissionOutput( verbosity="minimal", actions={Actions.Nodes.READ}, collection="*" @@ -203,6 +211,7 @@ ], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -221,6 +230,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[ TenantsPermissionOutput( @@ -247,6 +257,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[ TenantsPermissionOutput( @@ -290,6 +301,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -310,6 +322,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[ @@ -355,6 +368,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -379,6 +393,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -403,6 +418,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -410,6 +426,27 @@ ), 32, # Minimum version for alias permissions ), + ( + Permissions.mcp(manage=True), + Role( + name="ManageMCP", + alias_permissions=[], + cluster_permissions=[], + users_permissions=[], + collections_permissions=[], + roles_permissions=[], + data_permissions=[], + backups_permissions=[], + mcp_permissions=[ + MCPPermissionOutput(actions={Actions.MCP.MANAGE}) + ], + nodes_permissions=[], + tenants_permissions=[], + replicate_permissions=[], + groups_permissions=[], + ), + 37, # Minimum version for MCP permissions + ), ( Permissions.Groups.oidc(group="MyGroup", read=True), Role( @@ -421,6 +458,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], diff --git a/weaviate/rbac/models.py b/weaviate/rbac/models.py index df5a230a5..dfdbc48f4 100644 --- a/weaviate/rbac/models.py +++ b/weaviate/rbac/models.py @@ -252,6 +252,14 @@ def values() -> List[str]: return [action.value for action in BackupsAction] +class MCPAction(str, _Action, Enum): + MANAGE = "manage_mcp" + + @staticmethod + def values() -> List[str]: + return [action.value for action in MCPAction] + + class ReplicateAction(str, _Action, Enum): CREATE = "create_replicate" READ = "read_replicate" @@ -407,6 +415,16 @@ def _to_weaviate(self) -> List[WeaviatePermission]: ] +class _MCPPermission(_Permission[MCPAction]): + def _to_weaviate(self) -> List[WeaviatePermission]: + return [ + { + "action": action, + } + for action in self.actions + ] + + class _ClusterPermission(_Permission[ClusterAction]): def _to_weaviate(self) -> List[WeaviatePermission]: return [ @@ -470,6 +488,10 @@ class BackupsPermissionOutput(_BackupsPermission): pass +class MCPPermissionOutput(_MCPPermission): + pass + + class NodesPermissionOutput(_NodesPermission): pass @@ -486,6 +508,7 @@ class TenantsPermissionOutput(_TenantsPermission): RolesPermissionOutput, UsersPermissionOutput, BackupsPermissionOutput, + MCPPermissionOutput, NodesPermissionOutput, TenantsPermissionOutput, ReplicatePermissionOutput, @@ -507,6 +530,7 @@ class Role(RoleBase): roles_permissions: List[RolesPermissionOutput] users_permissions: List[UsersPermissionOutput] backups_permissions: List[BackupsPermissionOutput] + mcp_permissions: List[MCPPermissionOutput] nodes_permissions: List[NodesPermissionOutput] tenants_permissions: List[TenantsPermissionOutput] replicate_permissions: List[ReplicatePermissionOutput] @@ -522,6 +546,7 @@ def permissions(self) -> List[PermissionsOutputType]: permissions.extend(self.roles_permissions) permissions.extend(self.users_permissions) permissions.extend(self.backups_permissions) + permissions.extend(self.mcp_permissions) permissions.extend(self.nodes_permissions) permissions.extend(self.tenants_permissions) permissions.extend(self.replicate_permissions) @@ -537,6 +562,7 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": roles_permissions: List[RolesPermissionOutput] = [] data_permissions: List[DataPermissionOutput] = [] backups_permissions: List[BackupsPermissionOutput] = [] + mcp_permissions: List[MCPPermissionOutput] = [] nodes_permissions: List[NodesPermissionOutput] = [] tenants_permissions: List[TenantsPermissionOutput] = [] replicate_permissions: List[ReplicatePermissionOutput] = [] @@ -605,6 +631,10 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": actions={BackupsAction(permission["action"])}, ) ) + elif permission["action"] in MCPAction.values(): + mcp_permissions.append( + MCPPermissionOutput(actions={MCPAction(permission["action"])}) + ) elif permission["action"] in NodesAction.values(): nodes = permission.get("nodes") if nodes is not None: @@ -658,6 +688,7 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": groups_permissions=_join_permissions(groups_permissions), data_permissions=_join_permissions(data_permissions), backups_permissions=_join_permissions(backups_permissions), + mcp_permissions=_join_permissions(mcp_permissions), nodes_permissions=_join_permissions(nodes_permissions), tenants_permissions=_join_permissions(tenants_permissions), replicate_permissions=_join_permissions(replicate_permissions), @@ -710,6 +741,7 @@ class Actions: Cluster = ClusterAction Nodes = NodesAction Backups = BackupsAction + MCP = MCPAction Tenants = TenantsAction Users = UsersAction Replicate = ReplicateAction @@ -1020,6 +1052,12 @@ def backup( permissions.append(permission) return permissions + @staticmethod + def mcp(*, manage: bool = False) -> PermissionsCreateType: + if manage: + return [_MCPPermission(actions={MCPAction.MANAGE})] + return [] + @staticmethod def cluster(*, read: bool = False) -> PermissionsCreateType: if read: From a0efe43887b305c5f96f6d2f9c24d7701c803a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 08:18:25 +0100 Subject: [PATCH 44/99] refactor: format text analyzer configuration for better readability --- integration/test_collection_config.py | 12 +++++++++--- test/collection/test_config.py | 8 ++++++-- weaviate/collections/classes/config.py | 1 - 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index a125a3ef2..38ed3f141 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2572,7 +2572,9 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, ascii_fold_ignore=["é"] + ), ), ], ) @@ -2634,7 +2636,9 @@ def test_property_ascii_fold_across_tokenizations( name="prop", data_type=DataType.TEXT, tokenization=tokenization, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, ascii_fold_ignore=["é"] + ), ), Property( name="prop_no_ignore", @@ -2752,7 +2756,9 @@ def test_property_ascii_fold_with_filters( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, ascii_fold_ignore=["é"] + ), ), ], ) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 67e4f6da3..5c468d5ea 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -3044,7 +3044,9 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, ascii_fold_ignore=["é", "ñ"] + ), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3065,7 +3067,9 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, ascii_fold_ignore=["ñ"] + ), ), ], ) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 3e6867985..a4af8aadd 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2263,7 +2263,6 @@ def custom( ) - class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. From fa92fc28f23078baced7fb01f8cbb49e9ecf69da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 08:22:26 +0100 Subject: [PATCH 45/99] refactor: remove server side behavior tests --- integration/test_collection_config.py | 277 +------------------------- 1 file changed, 9 insertions(+), 268 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 38ed3f141..285c09fe2 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -41,7 +41,6 @@ _VectorizerConfigCreate, IndexName, ) -from weaviate.collections.classes.filters import Filter from weaviate.collections.classes.tenants import Tenant from weaviate.exceptions import ( UnexpectedStatusCodeError, @@ -2276,34 +2275,9 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N assert title_en.text_analyzer.stopword_preset == "en" assert plain.text_analyzer is None - collection.data.insert( - { - "title_fr": "le chat noir", - "title_en": "the black cat", - "plain": "the black cat", - } - ) - - # title_fr filters 'le' (in user preset) → BM25 for 'le' yields no match, - # but 'chat' still matches. - res = collection.query.bm25(query="le", query_properties=["title_fr"]) - assert len(res.objects) == 0 - res = collection.query.bm25(query="chat", query_properties=["title_fr"]) - assert len(res.objects) == 1 - - # title_en filters 'the' (built-in en) but matches 'cat'. - res = collection.query.bm25(query="the", query_properties=["title_en"]) - assert len(res.objects) == 0 - res = collection.query.bm25(query="cat", query_properties=["title_en"]) - assert len(res.objects) == 1 - def test_collection_stopword_presets_update(collection_factory: CollectionFactory) -> None: - """Updating the contents of an existing stopword preset takes effect immediately. - - Queries against properties that reference the preset see the new contents on the - next request. - """ + """Updating a stopword preset is reflected in the config.""" dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("stopword_presets requires Weaviate >= 1.37.0") @@ -2323,15 +2297,9 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor ], ) - collection.data.insert({"title_fr": "le chat et la souris"}) - - # Baseline: 'le' is filtered, 'la' is not (it isn't in the preset yet). - res = collection.query.bm25(query="le", query_properties=["title_fr"]) - assert len(res.objects) == 0 - res = collection.query.bm25(query="la", query_properties=["title_fr"]) - assert len(res.objects) == 1 + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le"]} - # Replace the preset contents: drop 'le', add 'la'. collection.config.update( inverted_index_config=Reconfigure.inverted_index( stopword_presets={"fr": ["la"]}, @@ -2341,12 +2309,6 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor config = collection.config.get() assert config.inverted_index_config.stopword_presets == {"fr": ["la"]} - # After update: 'le' now passes through, 'la' is filtered. - res = collection.query.bm25(query="le", query_properties=["title_fr"]) - assert len(res.objects) == 1 - res = collection.query.bm25(query="la", query_properties=["title_fr"]) - assert len(res.objects) == 0 - def test_collection_stopword_presets_remove_in_use_is_rejected( collection_factory: CollectionFactory, @@ -2476,7 +2438,7 @@ def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rej def test_collection_user_defined_stopword_preset_overrides_builtin( collection_factory: CollectionFactory, ) -> None: - """A user-defined preset named 'en' replaces the built-in 'en' for properties of this collection.""" + """A user-defined preset named 'en' is accepted and reflected in the config.""" dummy = collection_factory("dummy") if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): pytest.skip("stopword_presets requires Weaviate >= 1.37.0") @@ -2484,8 +2446,6 @@ def test_collection_user_defined_stopword_preset_overrides_builtin( collection = collection_factory( vectorizer_config=Configure.Vectorizer.none(), inverted_index_config=Configure.inverted_index( - # Shadow the built-in 'en' with a user-defined preset that filters - # 'hello' but NOT 'the'. stopword_presets={"en": ["hello"]}, ), properties=[ @@ -2498,17 +2458,11 @@ def test_collection_user_defined_stopword_preset_overrides_builtin( ], ) - collection.data.insert({"title": "the quick hello world"}) - - # 'the' is no longer filtered (built-in en is overridden) → matches. - res = collection.query.bm25(query="the", query_properties=["title"]) - assert len(res.objects) == 1 - # 'hello' was added by the override → filtered. - res = collection.query.bm25(query="hello", query_properties=["title"]) - assert len(res.objects) == 0 - # 'quick' is in neither list → matches. - res = collection.query.bm25(query="quick", query_properties=["title"]) - assert len(res.objects) == 1 + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"en": ["hello"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.stopword_preset == "en" def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( @@ -2540,18 +2494,6 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( assert title.text_analyzer.ascii_fold is True assert title.text_analyzer.stopword_preset == "en" - collection.data.insert({"title": "The école française"}) - - # 'the' is filtered by built-in en → no match. - res = collection.query.bm25(query="the", query_properties=["title"]) - assert len(res.objects) == 0 - # 'ecole' matches because ascii_fold folds 'école'. - res = collection.query.bm25(query="ecole", query_properties=["title"]) - assert len(res.objects) == 1 - # 'francaise' matches because ascii_fold folds 'française'. - res = collection.query.bm25(query="francaise", query_properties=["title"]) - assert len(res.objects) == 1 - def test_property_text_analyzer_ascii_fold_immutable( collection_factory: CollectionFactory, @@ -2607,207 +2549,6 @@ def test_property_text_analyzer_ascii_fold_immutable( assert title2.text_analyzer.ascii_fold_ignore == ["ñ"] -@pytest.mark.parametrize( - "tokenization,query_match,query_no_match", - [ - (Tokenization.WORD, "école", "ecole"), - (Tokenization.LOWERCASE, "l'école", "l'ecole"), - (Tokenization.WHITESPACE, "L'école", "L'ecole"), - (Tokenization.FIELD, "L'école est fermée", "L'ecole est fermee"), - (Tokenization.TRIGRAM, "éco", "eco"), - ], - ids=["word", "lowercase", "whitespace", "field", "trigram"], -) -def test_property_ascii_fold_across_tokenizations( - collection_factory: CollectionFactory, - tokenization: Tokenization, - query_match: str, - query_no_match: str, -) -> None: - """ascii_fold + ignore list ['é'] preserves é under every supported tokenization.""" - dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): - pytest.skip("text_analyzer requires Weaviate >= 1.37.0") - - collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - Property( - name="prop", - data_type=DataType.TEXT, - tokenization=tokenization, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, ascii_fold_ignore=["é"] - ), - ), - Property( - name="prop_no_ignore", - data_type=DataType.TEXT, - tokenization=tokenization, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), - ), - ], - ) - collection.data.insert( - { - "prop": "L'école est fermée", - "prop_no_ignore": "L'école est fermée", - } - ) - - # On the property with the ignore list, the accented form matches but the - # ASCII-folded form does not. - res = collection.query.bm25(query=query_match, query_properties=["prop"]) - assert len(res.objects) == 1 - res = collection.query.bm25(query=query_no_match, query_properties=["prop"]) - assert len(res.objects) == 0 - - # On the property without the ignore list, both forms match. - res = collection.query.bm25(query=query_match, query_properties=["prop_no_ignore"]) - assert len(res.objects) == 1 - res = collection.query.bm25(query=query_no_match, query_properties=["prop_no_ignore"]) - assert len(res.objects) == 1 - - -def test_property_ascii_fold_multi_char_ignore( - collection_factory: CollectionFactory, -) -> None: - """An ignore list with multiple characters preserves all of them. - - Characters not in the ignore list are still folded normally. - """ - dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): - pytest.skip("text_analyzer requires Weaviate >= 1.37.0") - - collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - Property( - name="multi_ignore", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, - ascii_fold_ignore=["é", "ü", "ñ", "ø"], - ), - ), - ], - ) - collection.data.insert_many( - [ - {"multi_ignore": "résumé"}, - {"multi_ignore": "über"}, - {"multi_ignore": "El Niño"}, - {"multi_ignore": "Ørsted"}, - {"multi_ignore": "São Paulo"}, - {"multi_ignore": "naïve café"}, - ] - ) - - # All four ignored chars are preserved → ASCII-folded form does not match, - # but the accented form does. - for accented, folded in [ - ("résumé", "resume"), - ("über", "uber"), - ("niño", "nino"), - ("ørsted", "orsted"), - ]: - match = collection.query.bm25(query=accented, query_properties=["multi_ignore"]) - no_match = collection.query.bm25(query=folded, query_properties=["multi_ignore"]) - assert len(match.objects) == 1, f"{accented} should match" - assert len(no_match.objects) == 0, f"{folded} should not match (char in ignore list)" - - # Non-ignored accents (ã, ï) still fold normally. - res = collection.query.bm25(query="sao", query_properties=["multi_ignore"]) - assert len(res.objects) == 1 - res = collection.query.bm25(query="naive", query_properties=["multi_ignore"]) - assert len(res.objects) == 1 - - # The "naïve café" doc has both an ignored char (é) and a non-ignored one (ï): - # 'naive' matches (ï folded), 'cafe' does not (é preserved). - res = collection.query.fetch_objects( - filters=Filter.by_property("multi_ignore").equal("naive café"), - limit=10, - ) - assert len(res.objects) == 1 - - -def test_property_ascii_fold_with_filters( - collection_factory: CollectionFactory, -) -> None: - """Equal and Like filters respect ascii_fold and ascii_fold_ignore.""" - dummy = collection_factory("dummy") - if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): - pytest.skip("text_analyzer requires Weaviate >= 1.37.0") - - collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - # Folds everything. - Property( - name="body", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), - ), - # Folds everything except 'é'. - Property( - name="title", - data_type=DataType.TEXT, - tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, ascii_fold_ignore=["é"] - ), - ), - ], - ) - collection.data.insert_many( - [ - {"title": "L'école est fermée", "body": "L'école est fermée"}, - {"title": "cafe résumé", "body": "cafe résumé"}, - ] - ) - - # Equal on body (full fold): 'ecole' matches. - res = collection.query.fetch_objects( - filters=Filter.by_property("body").equal("ecole"), - limit=5, - ) - assert len(res.objects) == 1 - - # Equal on title (é preserved): 'ecole' does not, but 'école' does. - res = collection.query.fetch_objects( - filters=Filter.by_property("title").equal("école"), - limit=5, - ) - assert len(res.objects) == 1 - res = collection.query.fetch_objects( - filters=Filter.by_property("title").equal("ecole"), - limit=5, - ) - assert len(res.objects) == 0 - - # Like on body (full fold): 'ecol*' matches. - res = collection.query.fetch_objects( - filters=Filter.by_property("body").like("ecol*"), - limit=5, - ) - assert len(res.objects) == 1 - - # Like on title (é preserved): 'écol*' matches, 'ecol*' does not. - res = collection.query.fetch_objects( - filters=Filter.by_property("title").like("écol*"), - limit=5, - ) - assert len(res.objects) == 1 - res = collection.query.fetch_objects( - filters=Filter.by_property("title").like("ecol*"), - limit=5, - ) - assert len(res.objects) == 0 - - def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> None: collection = collection_factory( inverted_index_config=Configure.inverted_index( From 27cd0a4ddd1a5dd34c1629083b7461930abf7c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 08:25:19 +0100 Subject: [PATCH 46/99] test: add stopword presets roundtrip tests for Weaviate collections --- integration/test_collection_config.py | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 285c09fe2..39c40a366 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2570,3 +2570,83 @@ def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> assert config == new assert config.to_dict() == new.to_dict() client.collections.delete(name) + + +def test_stopword_presets_roundtrip_from_dict( + collection_factory: CollectionFactory, +) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.stopword_preset == "fr" + + name = f"TestPresetRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) + + +def test_text_analyzer_roundtrip_from_dict( + collection_factory: CollectionFactory, +) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.TextAnalyzer.custom( + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ), + ), + ], + ) + + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "en" + + name = f"TestAnalyzerRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) From a241d8c6343b246b93f76b0913ade5b934b78df4 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 14 Apr 2026 09:30:52 +0200 Subject: [PATCH 47/99] Fix formatting --- integration/test_rbac.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/integration/test_rbac.py b/integration/test_rbac.py index 53206abac..86719b6ce 100644 --- a/integration/test_rbac.py +++ b/integration/test_rbac.py @@ -437,9 +437,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], - mcp_permissions=[ - MCPPermissionOutput(actions={Actions.MCP.MANAGE}) - ], + mcp_permissions=[MCPPermissionOutput(actions={Actions.MCP.MANAGE})], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], From 83c2431e151bed0d8225aaf9bf7b553650c3a502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 08:31:59 +0100 Subject: [PATCH 48/99] refactor: remove unnecessary stopword preset coercion from _TextAnalyzerConfigCreate --- weaviate/collections/classes/config.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index a4af8aadd..476fb302e 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2216,15 +2216,6 @@ class _TextAnalyzerConfigCreate(_ConfigCreateModel): default=None, alias="stopword_preset" ) - @field_validator("stopwordPreset", mode="before") - @classmethod - def _coerce_stopword_preset(cls, v: Any) -> Any: - # Pydantic preserves the StopwordsPreset enum instance through model_dump, - # but the wire format must be a plain string. Coerce at construction time. - if isinstance(v, StopwordsPreset): - return v.value - return v - @model_validator(mode="after") def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": if self.asciiFold is not True and self.asciiFoldIgnore is not None: @@ -2324,6 +2315,8 @@ def _to_dict( if isinstance(self.nestedProperties, list) else [self.nestedProperties._to_dict()] ) + if self.textAnalyzer is not None: + ret_dict["textAnalyzer"] = self.textAnalyzer._to_dict() return ret_dict From 4e0a0f20503dcfead5417e1fbc6686070b860c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 09:11:04 +0100 Subject: [PATCH 49/99] refactor: replace custom text analyzer method with a direct function call --- integration/test_collection_config.py | 28 +++++++------- test/collection/test_config.py | 18 ++++----- weaviate/collections/classes/config.py | 51 +++++++++++--------------- 3 files changed, 42 insertions(+), 55 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 39c40a366..527bad130 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2218,7 +2218,7 @@ def test_property_text_analyzer_ascii_fold_version_gate( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True), ), ], ) @@ -2245,14 +2245,14 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), # Built-in English preset, set per property. Property( name="title_en", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN), ), # No stopword override → uses the collection-level default. Property( @@ -2292,7 +2292,7 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), ], ) @@ -2328,7 +2328,7 @@ def test_collection_stopword_presets_remove_in_use_is_rejected( name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), ], ) @@ -2381,7 +2381,7 @@ def test_collection_stopword_presets_remove_unused_is_allowed( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), ], ) @@ -2418,7 +2418,7 @@ def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rej name="body", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), ], ), @@ -2453,7 +2453,7 @@ def test_collection_user_defined_stopword_preset_overrides_builtin( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="en"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="en"), ), ], ) @@ -2480,7 +2480,7 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( + text_analyzer=Configure.TextAnalyzer( ascii_fold=True, stopword_preset=StopwordsPreset.EN, ), @@ -2514,9 +2514,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, ascii_fold_ignore=["é"] - ), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é"]), ), ], ) @@ -2535,7 +2533,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title2", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ) config = collection.config.get() @@ -2589,7 +2587,7 @@ def test_stopword_presets_roundtrip_from_dict( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ), ], ) @@ -2625,7 +2623,7 @@ def test_text_analyzer_roundtrip_from_dict( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( + text_analyzer=Configure.TextAnalyzer( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN, diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 5c468d5ea..f06144ddb 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -3035,7 +3035,7 @@ def test_property_with_ascii_fold_only(self) -> None: prop = Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer.custom(ascii_fold=True), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True), ) assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} @@ -3044,9 +3044,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, ascii_fold_ignore=["é", "ñ"] - ), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3067,9 +3065,7 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer.custom( - ascii_fold=True, ascii_fold_ignore=["ñ"] - ), + text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ], ) @@ -3090,7 +3086,7 @@ def test_text_analyzer_stopword_preset_builtin_enum(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"} @@ -3099,7 +3095,7 @@ def test_text_analyzer_stopword_preset_user_defined_string(self) -> None: name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"} @@ -3108,7 +3104,7 @@ def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom( + text_analyzer=Configure.TextAnalyzer( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" ), ) @@ -3123,7 +3119,7 @@ def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer.custom(stopword_preset="fr"), + text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), ) out = prop._to_dict() assert "asciiFold" not in out["textAnalyzer"] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 476fb302e..d8c06cdd1 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2223,35 +2223,28 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": return self -class _TextAnalyzer: - """Factory class for creating text analyzer configurations. - - Use ``Configure.TextAnalyzer`` to access these methods. +def _text_analyzer( + ascii_fold: Optional[bool] = None, + ascii_fold_ignore: Optional[List[str]] = None, + stopword_preset: Optional[Union[StopwordsPreset, str]] = None, +) -> _TextAnalyzerConfigCreate: + """Create a text analyzer config for a property. + + Args: + ascii_fold: If True, accent/diacritic marks are folded to their base + characters during indexing and search (e.g. 'école' matches 'ecole'). + ascii_fold_ignore: Optional list of characters that should be excluded + from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to + 'e'). Requires ``ascii_fold=True``. + stopword_preset: Stopword preset name to override the collection-level + stopwords for this property. Accepts a ``StopwordsPreset`` or a + user-defined preset name. """ - - @staticmethod - def custom( - ascii_fold: Optional[bool] = None, - ascii_fold_ignore: Optional[List[str]] = None, - stopword_preset: Optional[Union[StopwordsPreset, str]] = None, - ) -> _TextAnalyzerConfigCreate: - """Create a text analyzer config with custom settings. - - Args: - ascii_fold: If True, accent/diacritic marks are folded to their base - characters during indexing and search (e.g. 'école' matches 'ecole'). - ascii_fold_ignore: Optional list of characters that should be excluded - from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to - 'e'). Requires ``ascii_fold=True``. - stopword_preset: Stopword preset name to override the collection-level - stopwords for this property. Accepts a ``StopwordsPreset`` or a - user-defined preset name. - """ - return _TextAnalyzerConfigCreate( - ascii_fold=ascii_fold, - ascii_fold_ignore=ascii_fold_ignore, - stopword_preset=stopword_preset, - ) + return _TextAnalyzerConfigCreate( + ascii_fold=ascii_fold, + ascii_fold_ignore=ascii_fold_ignore, + stopword_preset=stopword_preset, + ) class Property(_ConfigCreateModel): @@ -2665,7 +2658,7 @@ class Configure: MultiVectors = _MultiVectors ObjectTTL = _ObjectTTL Replication = _Replication - TextAnalyzer = _TextAnalyzer + TextAnalyzer = staticmethod(_text_analyzer) @staticmethod def inverted_index( From 38c7f44c9febfb7aaabe6c04ad4181079a44ce77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 09:45:19 +0100 Subject: [PATCH 50/99] chore: remove unused deprecated import from config.py --- weaviate/collections/classes/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 624baf9dd..5ea64e183 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -14,7 +14,6 @@ cast, ) -from deprecation import deprecated as docstring_deprecated from pydantic import ( AnyHttpUrl, Field, From b3eb0ac219d38e0bc3419194840e1016ab169a6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 12:24:10 +0100 Subject: [PATCH 51/99] chore: update WEAVIATE_137 version to 1.37.0-rc.1-578c4eb in workflow --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8ad6083a0..d3dfd1fa4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.16-efdedfa WEAVIATE_136: 1.36.9-d905e6c - WEAVIATE_137: 1.37.0-rc.0-b313954.amd64 + WEAVIATE_137: 1.37.0-rc.1-578c4eb jobs: lint-and-format: From ceef2712fe2e1b2e637129b4ef45fbe0ecafd6c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 12:27:22 +0100 Subject: [PATCH 52/99] refactor: update text analyzer method to use new static method in Configure --- integration/test_collection_config.py | 26 +++++++------- test/collection/test_config.py | 14 ++++---- weaviate/collections/classes/config.py | 49 +++++++++++++------------- 3 files changed, 44 insertions(+), 45 deletions(-) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 25f04e9ae..45fa6ab17 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2222,7 +2222,7 @@ def test_property_text_analyzer_ascii_fold_version_gate( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True), + text_analyzer=Configure.text_analyzer(ascii_fold=True), ), ], ) @@ -2249,14 +2249,14 @@ def test_collection_stopword_presets(collection_factory: CollectionFactory) -> N name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), # Built-in English preset, set per property. Property( name="title_en", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.text_analyzer(stopword_preset=StopwordsPreset.EN), ), # No stopword override → uses the collection-level default. Property( @@ -2296,7 +2296,7 @@ def test_collection_stopword_presets_update(collection_factory: CollectionFactor name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), ], ) @@ -2332,7 +2332,7 @@ def test_collection_stopword_presets_remove_in_use_is_rejected( name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), ], ) @@ -2385,7 +2385,7 @@ def test_collection_stopword_presets_remove_unused_is_allowed( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), ], ) @@ -2422,7 +2422,7 @@ def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rej name="body", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), ], ), @@ -2457,7 +2457,7 @@ def test_collection_user_defined_stopword_preset_overrides_builtin( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="en"), + text_analyzer=Configure.text_analyzer(stopword_preset="en"), ), ], ) @@ -2484,7 +2484,7 @@ def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer( + text_analyzer=Configure.text_analyzer( ascii_fold=True, stopword_preset=StopwordsPreset.EN, ), @@ -2518,7 +2518,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é"]), + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["é"]), ), ], ) @@ -2537,7 +2537,7 @@ def test_property_text_analyzer_ascii_fold_immutable( name="title2", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ) config = collection.config.get() @@ -2591,7 +2591,7 @@ def test_stopword_presets_roundtrip_from_dict( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ), ], ) @@ -2627,7 +2627,7 @@ def test_text_analyzer_roundtrip_from_dict( name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer( + text_analyzer=Configure.text_analyzer( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN, diff --git a/test/collection/test_config.py b/test/collection/test_config.py index f06144ddb..5a69b21c5 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -3035,7 +3035,7 @@ def test_property_with_ascii_fold_only(self) -> None: prop = Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True), + text_analyzer=Configure.text_analyzer(ascii_fold=True), ) assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} @@ -3044,7 +3044,7 @@ def test_property_with_ascii_fold_and_ignore(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), ) out = prop._to_dict() assert out["textAnalyzer"] == { @@ -3065,7 +3065,7 @@ def test_nested_property_with_text_analyzer(self) -> None: Property( name="title", data_type=DataType.TEXT, - text_analyzer=Configure.TextAnalyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), ), ], ) @@ -3086,7 +3086,7 @@ def test_text_analyzer_stopword_preset_builtin_enum(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset=StopwordsPreset.EN), + text_analyzer=Configure.text_analyzer(stopword_preset=StopwordsPreset.EN), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"} @@ -3095,7 +3095,7 @@ def test_text_analyzer_stopword_preset_user_defined_string(self) -> None: name="title_fr", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ) assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"} @@ -3104,7 +3104,7 @@ def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer( + text_analyzer=Configure.text_analyzer( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" ), ) @@ -3119,7 +3119,7 @@ def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None: name="title", data_type=DataType.TEXT, tokenization=Tokenization.WORD, - text_analyzer=Configure.TextAnalyzer(stopword_preset="fr"), + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), ) out = prop._to_dict() assert "asciiFold" not in out["textAnalyzer"] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index f746dfd03..7874cd8e2 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -2222,30 +2222,6 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": return self -def _text_analyzer( - ascii_fold: Optional[bool] = None, - ascii_fold_ignore: Optional[List[str]] = None, - stopword_preset: Optional[Union[StopwordsPreset, str]] = None, -) -> _TextAnalyzerConfigCreate: - """Create a text analyzer config for a property. - - Args: - ascii_fold: If True, accent/diacritic marks are folded to their base - characters during indexing and search (e.g. 'école' matches 'ecole'). - ascii_fold_ignore: Optional list of characters that should be excluded - from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to - 'e'). Requires ``ascii_fold=True``. - stopword_preset: Stopword preset name to override the collection-level - stopwords for this property. Accepts a ``StopwordsPreset`` or a - user-defined preset name. - """ - return _TextAnalyzerConfigCreate( - ascii_fold=ascii_fold, - ascii_fold_ignore=ascii_fold_ignore, - stopword_preset=stopword_preset, - ) - - class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. @@ -2657,7 +2633,30 @@ class Configure: MultiVectors = _MultiVectors ObjectTTL = _ObjectTTL Replication = _Replication - TextAnalyzer = staticmethod(_text_analyzer) + + @staticmethod + def text_analyzer( + ascii_fold: Optional[bool] = None, + ascii_fold_ignore: Optional[List[str]] = None, + stopword_preset: Optional[Union[StopwordsPreset, str]] = None, + ) -> _TextAnalyzerConfigCreate: + """Create a text analyzer config for a property. + + Args: + ascii_fold: If True, accent/diacritic marks are folded to their base + characters during indexing and search (e.g. 'école' matches 'ecole'). + ascii_fold_ignore: Optional list of characters that should be excluded + from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to + 'e'). Requires ``ascii_fold=True``. + stopword_preset: Stopword preset name to override the collection-level + stopwords for this property. Accepts a ``StopwordsPreset`` or a + user-defined preset name. + """ + return _TextAnalyzerConfigCreate( + ascii_fold=ascii_fold, + ascii_fold_ignore=ascii_fold_ignore, + stopword_preset=stopword_preset, + ) @staticmethod def inverted_index( From 5e751bfbbffaa236c083ec1cfac00815f9962701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 12:29:50 +0100 Subject: [PATCH 53/99] test: add stopwords roundtrip test with ASCII folding configuration --- integration/test_collection_config.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 45fa6ab17..3f33a30b7 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -2552,16 +2552,38 @@ def test_property_text_analyzer_ascii_fold_immutable( def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), inverted_index_config=Configure.inverted_index( stopwords_additions=["a"], stopwords_preset=StopwordsPreset.EN, stopwords_removals=["the"], + stopword_presets={"fr": ["le", "la", "les"]}, ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" + ), + ), + ], ) config = collection.config.get() assert config.inverted_index_config.stopwords.preset == StopwordsPreset.EN assert config.inverted_index_config.stopwords.removals == ["the"] + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "fr" name = f"TestStopwordsRoundtrip{collection.name}" config.name = name From 9c4295b5402c17ffad6d954a125225c3ac8f4370 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:01:06 +0200 Subject: [PATCH 54/99] Add query profiling --- integration/test_collection_query_profile.py | 217 ++++++++++++++++++ weaviate/collections/classes/grpc.py | 4 + weaviate/collections/classes/internal.py | 29 +++ weaviate/collections/grpc/query.py | 1 + weaviate/collections/queries/base_executor.py | 42 +++- weaviate/outputs/query.py | 6 + weaviate/proto/v1/v4216/v1/search_get_pb2.py | 62 +++-- weaviate/proto/v1/v4216/v1/search_get_pb2.pyi | 46 +++- weaviate/proto/v1/v5261/v1/search_get_pb2.py | 62 +++-- weaviate/proto/v1/v5261/v1/search_get_pb2.pyi | 46 +++- weaviate/proto/v1/v6300/v1/search_get_pb2.py | 62 +++-- weaviate/proto/v1/v6300/v1/search_get_pb2.pyi | 46 +++- 12 files changed, 536 insertions(+), 87 deletions(-) create mode 100644 integration/test_collection_query_profile.py diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py new file mode 100644 index 000000000..021c83802 --- /dev/null +++ b/integration/test_collection_query_profile.py @@ -0,0 +1,217 @@ +import re + +import pytest + +import weaviate +from weaviate.collections.classes.config import Configure, DataType, Property +from weaviate.collections.classes.data import DataObject +from weaviate.collections.classes.grpc import MetadataQuery +from weaviate.collections.classes.internal import SearchProfileReturn + +GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)") + + +def assert_go_duration(value: str, label: str = "") -> None: + """Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs').""" + assert GO_DURATION_RE.fullmatch( + value + ), f"Expected Go duration format for {label!r}, got {value!r}" + + +def assert_common_profile(profile: SearchProfileReturn) -> None: + """Assertions shared by every search profile regardless of type.""" + assert len(profile.details) > 0, "Profile details should not be empty" + assert "total_took" in profile.details + assert_go_duration(profile.details["total_took"], "total_took") + for key, value in profile.details.items(): + assert isinstance(key, str) and key != "" + assert isinstance(value, str) and value != "" + + +@pytest.fixture(scope="module") +def client(): + client = weaviate.connect_to_local() + yield client + client.close() + + +@pytest.fixture(scope="module") +def collection_with_data(client: weaviate.WeaviateClient): + name = "TestQueryProfile" + client.collections.delete(name) + collection = client.collections.create( + name=name, + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property(name="text", data_type=DataType.TEXT), + ], + ) + collection.data.insert_many( + [ + DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]), + DataObject(properties={"text": "goodbye world"}, vector=[0.0, 1.0, 0.0]), + DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]), + ] + ) + yield collection + client.collections.delete(name) + + +def test_fetch_objects_with_query_profile(collection_with_data): + """Test that query profiling works with fetch_objects (object lookup).""" + result = collection_with_data.query.fetch_objects( + return_metadata=MetadataQuery(query_profile=True), + ) + assert len(result.objects) == 3 + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert shard.name != "" + assert shard.node != "" + + assert "object" in shard.searches + assert_common_profile(shard.searches["object"]) + + +def test_near_vector_with_query_profile(collection_with_data): + """Test that query profiling works with near_vector search.""" + result = collection_with_data.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True, distance=True), + limit=2, + ) + assert len(result.objects) == 2 + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + vector_profile = shard.searches["vector"] + assert_common_profile(vector_profile) + + assert "vector_search_took" in vector_profile.details + assert_go_duration( + vector_profile.details["vector_search_took"], "vector_search_took" + ) + + assert "hnsw_flat_search" in vector_profile.details + assert vector_profile.details["hnsw_flat_search"] in ("true", "false") + + layer_keys = [ + k for k in vector_profile.details if k.startswith("knn_search_layer_") + ] + assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key" + for k in layer_keys: + assert_go_duration(vector_profile.details[k], k) + + assert "objects_took" in vector_profile.details + assert_go_duration(vector_profile.details["objects_took"], "objects_took") + + +def test_bm25_with_query_profile(collection_with_data): + """Test that query profiling works with BM25 keyword search.""" + result = collection_with_data.query.bm25( + query="hello", + return_metadata=MetadataQuery(query_profile=True, score=True), + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "keyword" in shard.searches + keyword_profile = shard.searches["keyword"] + assert_common_profile(keyword_profile) + + assert "kwd_method" in keyword_profile.details + assert keyword_profile.details["kwd_method"] != "" + + assert "kwd_time" in keyword_profile.details + assert_go_duration(keyword_profile.details["kwd_time"], "kwd_time") + + assert "kwd_1_tok_time" in keyword_profile.details + assert_go_duration(keyword_profile.details["kwd_1_tok_time"], "kwd_1_tok_time") + + assert "kwd_6_res_count" in keyword_profile.details + assert keyword_profile.details["kwd_6_res_count"].isdigit() + assert int(keyword_profile.details["kwd_6_res_count"]) >= 0 + + +def test_hybrid_with_query_profile(collection_with_data): + """Test that query profiling works with hybrid search (both vector and keyword).""" + result = collection_with_data.query.hybrid( + query="hello", + vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + limit=2, + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile" + assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile" + + assert_common_profile(shard.searches["vector"]) + assert "vector_search_took" in shard.searches["vector"].details + + assert_common_profile(shard.searches["keyword"]) + assert "kwd_method" in shard.searches["keyword"].details + + +def test_near_vector_group_by_with_query_profile(collection_with_data): + """Test that query profiling works with group_by (mirrors C# QueryProfiling_NearText_GroupBy_Returns_Profile).""" + from weaviate.collections.classes.grpc import GroupBy + + result = collection_with_data.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3), + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + assert_common_profile(shard.searches["vector"]) + + +def test_no_query_profile_when_not_requested(collection_with_data): + """Test that query_profile is None when not requested.""" + result = collection_with_data.query.fetch_objects( + return_metadata=MetadataQuery(distance=True), + ) + assert result.query_profile is None + + +def test_query_profile_with_metadata_list(collection_with_data): + """Test that query profiling works when using list-style metadata.""" + result = collection_with_data.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=["query_profile", "distance"], + limit=2, + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + assert_common_profile(shard.searches["vector"]) + + +def test_query_profile_details_are_strings(collection_with_data): + """Test that all detail keys and values are non-empty strings.""" + result = collection_with_data.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + limit=1, + ) + assert result.query_profile is not None + for shard in result.query_profile.shards: + assert len(shard.searches) > 0, "Shard should have at least one search profile" + for search_type, profile in shard.searches.items(): + assert isinstance(search_type, str) and search_type != "" + assert len(profile.details) > 0 + for key, value in profile.details.items(): + assert isinstance(key, str) and key != "" + assert isinstance(value, str) and value != "" diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index bff0e35ca..20ece3bc0 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -90,6 +90,7 @@ class MetadataQuery(_WeaviateInput): score: bool = Field(default=False) explain_score: bool = Field(default=False) is_consistent: bool = Field(default=False) + query_profile: bool = Field(default=False) @classmethod def full(cls) -> "MetadataQuery": @@ -117,6 +118,7 @@ class _MetadataQuery: explain_score: bool = False is_consistent: bool = False vectors: Optional[List[str]] = None + query_profile: bool = False @classmethod def from_public( @@ -138,6 +140,7 @@ def from_public( score=public.score, explain_score=public.explain_score, is_consistent=public.is_consistent, + query_profile=public.query_profile, ) ) @@ -152,6 +155,7 @@ def from_public( "score", "explain_score", "is_consistent", + "query_profile", ] ], MetadataQuery, diff --git a/weaviate/collections/classes/internal.py b/weaviate/collections/classes/internal.py index f4d542f66..55a69c5cd 100644 --- a/weaviate/collections/classes/internal.py +++ b/weaviate/collections/classes/internal.py @@ -90,6 +90,29 @@ def _is_empty(self) -> bool: ) +@dataclass +class SearchProfileReturn: + """Profiling details for a single search type within a shard.""" + + details: Dict[str, str] + + +@dataclass +class ShardProfileReturn: + """Profiling data for a single shard.""" + + name: str + node: str + searches: Dict[str, SearchProfileReturn] + + +@dataclass +class QueryProfileReturn: + """Per-shard query profiling data returned when `query_profile=True` is set in metadata.""" + + shards: List[ShardProfileReturn] + + @dataclass class GroupByMetadataReturn: """Metadata of an object returned by a group by query.""" @@ -210,6 +233,7 @@ class GenerativeReturn(Generic[P, R]): __generated: Optional[str] objects: List[GenerativeObject[P, R]] generative: Optional[GenerativeGrouped] + query_profile: Optional[QueryProfileReturn] # init required because of nuances of dataclass when defining @property generated and private var __generated def __init__( @@ -217,10 +241,12 @@ def __init__( generated: Optional[str], objects: List[GenerativeObject[P, R]], generative: Optional[GenerativeGrouped], + query_profile: Optional[QueryProfileReturn] = None, ) -> None: self.__generated = generated self.objects = objects self.generative = generative + self.query_profile = query_profile @property @deprecated( @@ -257,6 +283,7 @@ class GenerativeGroupByReturn(Generic[P, R]): objects: List[GroupByObject[P, R]] groups: Dict[str, GenerativeGroup[P, R]] generated: Optional[str] + query_profile: Optional[QueryProfileReturn] = None @dataclass @@ -265,6 +292,7 @@ class GroupByReturn(Generic[P, R]): objects: List[GroupByObject[P, R]] groups: Dict[str, Group[P, R]] + query_profile: Optional[QueryProfileReturn] = None @dataclass @@ -272,6 +300,7 @@ class QueryReturn(Generic[P, R]): """The return type of a query within the `.query` namespace of a collection.""" objects: List[Object[P, R]] + query_profile: Optional[QueryProfileReturn] = None _GQLEntryReturnType: TypeAlias = Dict[str, List[Dict[str, Any]]] diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 1ac014ada..d635a3f2f 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -521,6 +521,7 @@ def _metadata_to_grpc(self, metadata: _MetadataQuery) -> search_get_pb2.Metadata score=metadata.score, is_consistent=metadata.is_consistent, vectors=metadata.vectors, + query_profile=metadata.query_profile, ) def __resolve_property(self, prop: QueryNested) -> search_get_pb2.ObjectPropertiesRequest: diff --git a/weaviate/collections/queries/base_executor.py b/weaviate/collections/queries/base_executor.py index e721d9edb..f3eb36399 100644 --- a/weaviate/collections/queries/base_executor.py +++ b/weaviate/collections/queries/base_executor.py @@ -40,9 +40,12 @@ GroupByReturn, MetadataReturn, Object, + QueryProfileReturn, QueryReturn, ReturnProperties, ReturnReferences, + SearchProfileReturn, + ShardProfileReturn, WeaviateProperties, _CrossReference, _extract_properties_from_data_model, @@ -53,7 +56,7 @@ from weaviate.collections.grpc.query import _QueryGRPC from weaviate.collections.grpc.shared import _ByteOps, _Unpack from weaviate.connect.v4 import ConnectionType -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.proto.v1 import base_pb2, generative_pb2, properties_pb2, search_get_pb2 from weaviate.types import INCLUDE_VECTOR from weaviate.util import ( @@ -452,6 +455,25 @@ def __result_to_group_by_object( belongs_to_group=group_name, ) + def __extract_query_profile( + self, res: search_get_pb2.SearchReply + ) -> Optional[QueryProfileReturn]: + if not res.HasField("query_profile"): + return None + return QueryProfileReturn( + shards=[ + ShardProfileReturn( + name=shard.name, + node=shard.node, + searches={ + key: SearchProfileReturn(details=dict(profile.details)) + for key, profile in shard.searches.items() + }, + ) + for shard in res.query_profile.shards + ] + ) + def _result_to_query_return( self, res: search_get_pb2.SearchReply, @@ -461,7 +483,8 @@ def _result_to_query_return( objects=[ self.__result_to_query_object(obj.properties, obj.metadata, options) for obj in res.results - ] + ], + query_profile=self.__extract_query_profile(res), ) def _result_to_generative_query_return( @@ -480,6 +503,7 @@ def _result_to_generative_query_return( generative=self.__extract_generative_grouped_from_generative( res.generative_grouped_results ), + query_profile=self.__extract_query_profile(res), ) def _result_to_generative_return( @@ -507,7 +531,11 @@ def _result_to_groupby_return( objects_group_by: List[GroupByObject] = [ obj for group in groups.values() for obj in group.objects ] - return GroupByReturn(objects=objects_group_by, groups=groups) + return GroupByReturn( + objects=objects_group_by, + groups=groups, + query_profile=self.__extract_query_profile(res), + ) def _result_to_generative_groupby_return( self, @@ -537,6 +565,7 @@ def _result_to_generative_groupby_return( generated=( res.generative_grouped_result if res.generative_grouped_result != "" else None ), + query_profile=self.__extract_query_profile(res), ) def _result_to_query_or_groupby_return( @@ -615,6 +644,13 @@ def _parse_return_metadata( ret_md = cast(MetadataQuery, return_metadata) else: ret_md = MetadataQuery(**{str(prop): True for prop in return_metadata}) + + if ret_md is not None and ret_md.query_profile: + if self._connection._weaviate_version.is_lower_than(1, 36, 9): + raise WeaviateUnsupportedFeatureError( + "Query profiling", str(self._connection._weaviate_version), "1.36.9" + ) + return _MetadataQuery.from_public(ret_md, include_vector) def _parse_return_references( diff --git a/weaviate/outputs/query.py b/weaviate/outputs/query.py index 42ede14b3..625d669d8 100644 --- a/weaviate/outputs/query.py +++ b/weaviate/outputs/query.py @@ -32,11 +32,14 @@ Object, ObjectSingleReturn, QueryNearMediaReturnType, + QueryProfileReturn, QueryReturn, QueryReturnType, QuerySingleReturn, ReferenceInput, ReferenceInputs, + SearchProfileReturn, + ShardProfileReturn, ) from weaviate.collections.classes.types import ( GeoCoordinate, @@ -75,11 +78,14 @@ "GenerativeGroup", "PhoneNumberType", "QueryNearMediaReturnType", + "QueryProfileReturn", "QueryReturnType", "QueryReturn", "QuerySingleReturn", "ReferenceInput", "ReferenceInputs", + "SearchProfileReturn", + "ShardProfileReturn", "Sorting", "TargetVectorJoinType", "WeaviateField", diff --git a/weaviate/proto/v1/v4216/v1/search_get_pb2.py b/weaviate/proto/v1/v4216/v1/search_get_pb2.py index 7885e57d6..78f20076e 100644 --- a/weaviate/proto/v1/v4216/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v4216/v1/search_get_pb2.py @@ -17,7 +17,7 @@ from weaviate.proto.v1.v4216.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,6 +31,10 @@ _SEARCHREQUEST.fields_by_name['uses_125_api']._serialized_options = b'\030\001' _SEARCHREPLY.fields_by_name['generative_grouped_result']._options = None _SEARCHREPLY.fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY._options = None + _QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY._serialized_options = b'8\001' + _QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY._options = None + _QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY._serialized_options = b'8\001' _GROUPBYRESULT.fields_by_name['generative']._options = None _GROUPBYRESULT.fields_by_name['generative']._serialized_options = b'\030\001' _METADATARESULT.fields_by_name['vector']._options = None @@ -46,27 +50,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi b/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi index 9dd1ee0d0..a0300e672 100644 --- a/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi @@ -88,7 +88,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ["uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors"] + __slots__ = ["uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile"] UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -99,6 +99,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -109,7 +110,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ["non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties"] @@ -154,18 +156,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ["took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results"] + __slots__ = ["took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile"] TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ["shards"] + class SearchProfile(_message.Message): + __slots__ = ["details"] + class DetailsEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ["name", "node", "searches"] + class SearchesEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ["score"] diff --git a/weaviate/proto/v1/v5261/v1/search_get_pb2.py b/weaviate/proto/v1/v5261/v1/search_get_pb2.py index 04356c19f..7686973af 100644 --- a/weaviate/proto/v1/v5261/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v5261/v1/search_get_pb2.py @@ -18,7 +18,7 @@ from weaviate.proto.v1.v5261.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -32,6 +32,10 @@ _globals['_SEARCHREQUEST'].fields_by_name['uses_125_api']._serialized_options = b'\030\001' _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._loaded_options = None _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_options = b'8\001' + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_options = b'8\001' _globals['_GROUPBYRESULT'].fields_by_name['generative']._loaded_options = None _globals['_GROUPBYRESULT'].fields_by_name['generative']._serialized_options = b'\030\001' _globals['_METADATARESULT'].fields_by_name['vector']._loaded_options = None @@ -47,27 +51,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi b/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi index 4a28237d9..783ca3564 100644 --- a/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi @@ -88,7 +88,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors") + __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile") UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -99,6 +99,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -109,7 +110,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ("non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties") @@ -154,18 +156,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results") + __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile") TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ("shards",) + class SearchProfile(_message.Message): + __slots__ = ("details",) + class DetailsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ("name", "node", "searches") + class SearchesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ("score",) diff --git a/weaviate/proto/v1/v6300/v1/search_get_pb2.py b/weaviate/proto/v1/v6300/v1/search_get_pb2.py index 88dfb5992..1b38e4fb6 100644 --- a/weaviate/proto/v1/v6300/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v6300/v1/search_get_pb2.py @@ -28,7 +28,7 @@ from weaviate.proto.v1.v6300.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -42,6 +42,10 @@ _globals['_SEARCHREQUEST'].fields_by_name['uses_125_api']._serialized_options = b'\030\001' _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._loaded_options = None _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_options = b'8\001' + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_options = b'8\001' _globals['_GROUPBYRESULT'].fields_by_name['generative']._loaded_options = None _globals['_GROUPBYRESULT'].fields_by_name['generative']._serialized_options = b'\030\001' _globals['_METADATARESULT'].fields_by_name['vector']._loaded_options = None @@ -57,27 +61,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi b/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi index 8dd3cb881..f631fb396 100644 --- a/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi @@ -89,7 +89,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors") + __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile") UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -100,6 +100,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -110,7 +111,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ("non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties") @@ -155,18 +157,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results") + __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile") TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ("shards",) + class SearchProfile(_message.Message): + __slots__ = ("details",) + class DetailsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ("name", "node", "searches") + class SearchesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ("score",) From 6fd60b567010b19399e4e1accfe8261103f6c989 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:05:53 +0200 Subject: [PATCH 55/99] Reformatted --- integration/test_collection_query_profile.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py index 021c83802..2061eea7a 100644 --- a/integration/test_collection_query_profile.py +++ b/integration/test_collection_query_profile.py @@ -13,9 +13,9 @@ def assert_go_duration(value: str, label: str = "") -> None: """Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs').""" - assert GO_DURATION_RE.fullmatch( - value - ), f"Expected Go duration format for {label!r}, got {value!r}" + assert GO_DURATION_RE.fullmatch(value), ( + f"Expected Go duration format for {label!r}, got {value!r}" + ) def assert_common_profile(profile: SearchProfileReturn) -> None: @@ -91,16 +91,12 @@ def test_near_vector_with_query_profile(collection_with_data): assert_common_profile(vector_profile) assert "vector_search_took" in vector_profile.details - assert_go_duration( - vector_profile.details["vector_search_took"], "vector_search_took" - ) + assert_go_duration(vector_profile.details["vector_search_took"], "vector_search_took") assert "hnsw_flat_search" in vector_profile.details assert vector_profile.details["hnsw_flat_search"] in ("true", "false") - layer_keys = [ - k for k in vector_profile.details if k.startswith("knn_search_layer_") - ] + layer_keys = [k for k in vector_profile.details if k.startswith("knn_search_layer_")] assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key" for k in layer_keys: assert_go_duration(vector_profile.details[k], k) From a1df0988a5094b1f97deaae8a0a59dbd1b139760 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Tue, 14 Apr 2026 16:23:13 +0200 Subject: [PATCH 56/99] Skip test for lower versions --- integration/test_collection_query_profile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py index 2061eea7a..92687129d 100644 --- a/integration/test_collection_query_profile.py +++ b/integration/test_collection_query_profile.py @@ -37,6 +37,8 @@ def client(): @pytest.fixture(scope="module") def collection_with_data(client: weaviate.WeaviateClient): + if client._connection._weaviate_version.is_lower_than(1, 36, 9): + pytest.skip("Query profiling requires Weaviate >= 1.36.9") name = "TestQueryProfile" client.collections.delete(name) collection = client.collections.create( From 239ed323ccc92dcd1713a5c0b372b7036dd70a79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 14 Apr 2026 21:03:13 +0100 Subject: [PATCH 57/99] feat: add tokenizer module with sync and async support, including integration tests --- integration/test_tokenize.py | 355 ++++++++++++++++++++++++++++++++++ weaviate/__init__.py | 2 + weaviate/client.py | 3 + weaviate/client.pyi | 3 + weaviate/tokenize/__init__.py | 7 + weaviate/tokenize/async_.py | 8 + weaviate/tokenize/executor.py | 166 ++++++++++++++++ weaviate/tokenize/sync.py | 8 + weaviate/tokenize/types.py | 25 +++ 9 files changed, 577 insertions(+) create mode 100644 integration/test_tokenize.py create mode 100644 weaviate/tokenize/__init__.py create mode 100644 weaviate/tokenize/async_.py create mode 100644 weaviate/tokenize/executor.py create mode 100644 weaviate/tokenize/sync.py create mode 100644 weaviate/tokenize/types.py diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py new file mode 100644 index 000000000..e54f9d49d --- /dev/null +++ b/integration/test_tokenize.py @@ -0,0 +1,355 @@ +"""Integration tests for the tokenize module. + +These tests cover the client's responsibilities: +- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, kwargs) +- Correct deserialization of responses into typed objects +- Client-side validation (_TextAnalyzerConfigCreate rejects invalid input) +- Both sync and async client paths +""" + +from typing import AsyncGenerator, Generator + +import pytest +import pytest_asyncio + +import weaviate +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsPreset, + TextAnalyzerConfig, + Tokenization, + _StopwordsCreate, + _TextAnalyzerConfigCreate, +) +from weaviate.config import AdditionalConfig +from weaviate.tokenize.types import TokenizeResult + + +@pytest.fixture(scope="module") +def client() -> Generator[weaviate.WeaviateClient, None, None]: + c = weaviate.connect_to_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + yield c + c.close() + + +@pytest_asyncio.fixture +async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: + c = weaviate.use_async_with_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + await c.connect() + yield c + await c.close() + + +# --------------------------------------------------------------------------- +# Serialization: enums, strings, kwargs, _TextAnalyzerConfigCreate +# --------------------------------------------------------------------------- + + +class TestSerialization: + """Verify the client correctly serializes different input forms.""" + + @pytest.mark.parametrize( + "tokenization,text,expected_tokens", + [ + (Tokenization.WORD, "The quick brown fox", ["the", "quick", "brown", "fox"]), + (Tokenization.LOWERCASE, "Hello World Test", ["hello", "world", "test"]), + (Tokenization.WHITESPACE, "Hello World Test", ["Hello", "World", "Test"]), + (Tokenization.FIELD, " Hello World ", ["Hello World"]), + (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"]), + ], + ) + def test_tokenization_enum( + self, + client: weaviate.WeaviateClient, + tokenization: Tokenization, + text: str, + expected_tokens: list, + ) -> None: + result = client.tokenize.text(text=text, tokenization=tokenization) + assert isinstance(result, TokenizeResult) + assert result.tokenization == tokenization.value + assert result.indexed == expected_tokens + assert result.query == expected_tokens + + def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text(text="hello world", tokenization="word") + assert result.tokenization == "word" + assert result.indexed == ["hello", "world"] + + def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + stopword_preset=StopwordsPreset.EN, + ) + assert "the" not in result.query + assert "quick" in result.query + + def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + stopword_preset="en", + ) + assert "the" not in result.query + + def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + ascii_fold=True, + ) + assert result.indexed == ["l", "ecole", "est", "fermee"] + + def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True) + result = client.tokenize.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["l", "ecole", "est", "fermee"] + + def test_analyzer_config_and_kwargs_produce_same_result( + self, client: weaviate.WeaviateClient + ) -> None: + """analyzer_config object and equivalent kwargs must produce identical output.""" + cfg = _TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN + ) + via_config = client.tokenize.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + via_kwargs = client.tokenize.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ) + assert via_config.indexed == via_kwargs.indexed + assert via_config.query == via_kwargs.query + + def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="hello world test", + tokenization=Tokenization.WORD, + stopword_preset="custom", + stopword_presets={ + "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None), + }, + ) + assert result.indexed == ["hello", "world", "test"] + assert result.query == ["hello", "world"] + + def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="the quick", + tokenization=Tokenization.WORD, + stopword_preset="en-no-the", + stopword_presets={ + "en-no-the": _StopwordsCreate( + preset=StopwordsPreset.EN, additions=None, removals=["the"] + ), + }, + ) + assert result.indexed == ["the", "quick"] + assert result.query == ["the", "quick"] + + +# --------------------------------------------------------------------------- +# Deserialization: typed response fields +# --------------------------------------------------------------------------- + + +class TestDeserialization: + """Verify the client correctly deserializes response fields into typed objects.""" + + def test_result_type(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD) + assert isinstance(result, TokenizeResult) + assert isinstance(result.indexed, list) + assert isinstance(result.query, list) + + def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text( + text="L'école", + tokenization=Tokenization.WORD, + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ) + assert isinstance(result.analyzer_config, TextAnalyzerConfig) + assert result.analyzer_config.ascii_fold is True + assert result.analyzer_config.ascii_fold_ignore == ["é"] + assert result.analyzer_config.stopword_preset == "en" + + def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD) + assert result.analyzer_config is None + + def test_stopword_config_deserialized_on_property( + self, client: weaviate.WeaviateClient + ) -> None: + """Property endpoint returns stopwordConfig; verify it deserializes to StopwordsConfig.""" + client.collections.delete("TestDeserStopword") + try: + client.collections.create_from_dict( + { + "class": "TestDeserStopword", + "vectorizer": "none", + "properties": [ + { + "name": "title", + "dataType": ["text"], + "tokenization": "word", + "textAnalyzer": {"stopwordPreset": "en"}, + }, + ], + } + ) + result = client.tokenize.property( + collection_name="TestDeserStopword", + property_name="title", + text="the quick", + ) + assert isinstance(result, TokenizeResult) + assert result.tokenization == "word" + # Stopword config should be deserialized when present + if result.stopword_config is not None: + assert isinstance(result.stopword_config, StopwordsConfig) + finally: + client.collections.delete("TestDeserStopword") + + def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: + client.collections.delete("TestDeserPropTypes") + try: + client.collections.create_from_dict( + { + "class": "TestDeserPropTypes", + "vectorizer": "none", + "properties": [ + { + "name": "tag", + "dataType": ["text"], + "tokenization": "field", + }, + ], + } + ) + result = client.tokenize.property( + collection_name="TestDeserPropTypes", + property_name="tag", + text=" Hello World ", + ) + assert isinstance(result, TokenizeResult) + assert result.tokenization == "field" + assert result.indexed == ["Hello World"] + finally: + client.collections.delete("TestDeserPropTypes") + + +# --------------------------------------------------------------------------- +# Client-side validation (_TextAnalyzerConfigCreate) +# --------------------------------------------------------------------------- + + +class TestClientSideValidation: + """Verify that _TextAnalyzerConfigCreate rejects invalid input before hitting the server.""" + + def test_ascii_fold_ignore_without_fold_raises(self) -> None: + with pytest.raises(ValueError, match="asciiFoldIgnore"): + _TextAnalyzerConfigCreate(ascii_fold=False, ascii_fold_ignore=["é"]) + + def test_ascii_fold_ignore_without_fold_default_raises(self) -> None: + with pytest.raises(ValueError, match="asciiFoldIgnore"): + _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"]) + + def test_valid_config_does_not_raise(self) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]) + assert cfg.asciiFold is True + assert cfg.asciiFoldIgnore == ["é", "ñ"] + + def test_fold_without_ignore_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True) + assert cfg.asciiFold is True + assert cfg.asciiFoldIgnore is None + + def test_stopword_preset_only_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en") + assert cfg.stopwordPreset == "en" + + def test_empty_config_is_valid(self) -> None: + cfg = _TextAnalyzerConfigCreate() + assert cfg.asciiFold is None + assert cfg.asciiFoldIgnore is None + assert cfg.stopwordPreset is None + + +# --------------------------------------------------------------------------- +# Async client +# --------------------------------------------------------------------------- + + +class TestAsyncClient: + """Verify both text() and property() work through the async client.""" + + @pytest.mark.asyncio + async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + result = await async_client.tokenize.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + ) + assert isinstance(result, TokenizeResult) + assert result.indexed == ["the", "quick", "brown", "fox"] + + @pytest.mark.asyncio + async def test_text_with_analyzer_config( + self, async_client: weaviate.WeaviateAsyncClient + ) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN) + result = await async_client.tokenize.text( + text="L'école est fermée", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.indexed == ["l", "ecole", "est", "fermee"] + assert isinstance(result.analyzer_config, TextAnalyzerConfig) + assert result.analyzer_config.ascii_fold is True + + @pytest.mark.asyncio + async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + await async_client.collections.delete("TestAsyncPropTokenize") + try: + await async_client.collections.create_from_dict( + { + "class": "TestAsyncPropTokenize", + "vectorizer": "none", + "properties": [ + { + "name": "title", + "dataType": ["text"], + "tokenization": "word", + "textAnalyzer": {"stopwordPreset": "en"}, + }, + ], + } + ) + result = await async_client.tokenize.property( + collection_name="TestAsyncPropTokenize", + property_name="title", + text="The quick brown fox", + ) + assert isinstance(result, TokenizeResult) + assert result.tokenization == "word" + assert result.indexed == ["the", "quick", "brown", "fox"] + assert "the" not in result.query + assert "quick" in result.query + finally: + await async_client.collections.delete("TestAsyncPropTokenize") diff --git a/weaviate/__init__.py b/weaviate/__init__.py index 562b142bc..6fd9368ea 100644 --- a/weaviate/__init__.py +++ b/weaviate/__init__.py @@ -21,6 +21,7 @@ embedded, exceptions, outputs, + tokenize, types, ) from .client import Client, WeaviateAsyncClient, WeaviateClient @@ -67,6 +68,7 @@ "embedded", "exceptions", "outputs", + "tokenize", "types", "use_async_with_custom", "use_async_with_embedded", diff --git a/weaviate/client.py b/weaviate/client.py index d7f9080f4..cbd12be9a 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -22,6 +22,7 @@ from .embedded import EmbeddedOptions from .groups import _Groups, _GroupsAsync from .rbac import _Roles, _RolesAsync +from .tokenize import _Tokenize, _TokenizeAsync from .types import NUMBER from .users import _Users, _UsersAsync @@ -82,6 +83,7 @@ def __init__( self.debug = _DebugAsync(self._connection) self.groups = _GroupsAsync(self._connection) self.roles = _RolesAsync(self._connection) + self.tokenize = _TokenizeAsync(self._connection) self.users = _UsersAsync(self._connection) async def __aenter__(self) -> "WeaviateAsyncClient": @@ -157,6 +159,7 @@ def __init__( self.debug = _Debug(self._connection) self.groups = _Groups(self._connection) self.roles = _Roles(self._connection) + self.tokenize = _Tokenize(self._connection) self.users = _Users(self._connection) def __enter__(self) -> "WeaviateClient": diff --git a/weaviate/client.pyi b/weaviate/client.pyi index 9b32af15f..a6a44f8f7 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -21,6 +21,7 @@ from .cluster import _Cluster, _ClusterAsync from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync from .debug import _Debug, _DebugAsync from .rbac import _Roles, _RolesAsync +from .tokenize import _Tokenize, _TokenizeAsync from .types import NUMBER TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER] @@ -35,6 +36,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]): debug: _DebugAsync groups: _GroupsAsync roles: _RolesAsync + tokenize: _TokenizeAsync users: _UsersAsync async def close(self) -> None: ... @@ -58,6 +60,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]): debug: _Debug groups: _Groups roles: _Roles + tokenize: _Tokenize users: _Users def close(self) -> None: ... diff --git a/weaviate/tokenize/__init__.py b/weaviate/tokenize/__init__.py new file mode 100644 index 000000000..d0c2883c5 --- /dev/null +++ b/weaviate/tokenize/__init__.py @@ -0,0 +1,7 @@ +"""Module for tokenize operations.""" + +from .async_ import _TokenizeAsync +from .sync import _Tokenize +from .types import TokenizeResult + +__all__ = ["_Tokenize", "_TokenizeAsync", "TokenizeResult"] diff --git a/weaviate/tokenize/async_.py b/weaviate/tokenize/async_.py new file mode 100644 index 000000000..a59c392ea --- /dev/null +++ b/weaviate/tokenize/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.tokenize.executor import _TokenizeExecutor + + +@executor.wrap("async") +class _TokenizeAsync(_TokenizeExecutor[ConnectionAsync]): + pass diff --git a/weaviate/tokenize/executor.py b/weaviate/tokenize/executor.py new file mode 100644 index 000000000..bd2c24dc1 --- /dev/null +++ b/weaviate/tokenize/executor.py @@ -0,0 +1,166 @@ +"""Tokenize executor.""" + +from typing import Any, Dict, Generic, List, Optional, Union + +from httpx import Response + +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsPreset, + TextAnalyzerConfig, + Tokenization, + _StopwordsCreate, + _TextAnalyzerConfigCreate, +) +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes +from weaviate.tokenize.types import TokenizeResult + + +def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]: + ac = body.get("analyzerConfig") + if ac is None: + return None + if "asciiFold" not in ac and "stopwordPreset" not in ac: + return None + return TextAnalyzerConfig( + ascii_fold=ac.get("asciiFold", False), + ascii_fold_ignore=ac.get("asciiFoldIgnore"), + stopword_preset=ac.get("stopwordPreset"), + ) + + +def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]: + sc = body.get("stopwordConfig") + if sc is None: + return None + return StopwordsConfig( + preset=StopwordsPreset(sc["preset"]) if sc.get("preset") else StopwordsPreset.NONE, + additions=sc.get("additions"), + removals=sc.get("removals"), + ) + + +def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult: + return TokenizeResult( + tokenization=body["tokenization"], + indexed=body["indexed"], + query=body["query"], + analyzer_config=_parse_analyzer_config(body), + stopword_config=_parse_stopword_config(body), + ) + + +class _TokenizeExecutor(Generic[ConnectionType]): + def __init__(self, connection: ConnectionType): + self._connection = connection + + def text( + self, + text: str, + tokenization: Union[Tokenization, str], + *, + analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, + ascii_fold: Optional[bool] = None, + ascii_fold_ignore: Optional[List[str]] = None, + stopword_preset: Optional[Union[StopwordsPreset, str]] = None, + stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using the generic /v1/tokenize endpoint. + + Analyzer settings can be provided either via a ``_TextAnalyzerConfigCreate`` + object **or** via the individual keyword arguments (``ascii_fold``, + ``ascii_fold_ignore``, ``stopword_preset``). If ``analyzer_config`` is + given the individual keyword arguments are ignored. + + Args: + text: The text to tokenize. + tokenization: The tokenization method to use (e.g. Tokenization.WORD). + analyzer_config: A ``_TextAnalyzerConfigCreate`` instance that bundles + ascii_fold, ascii_fold_ignore, and stopword_preset settings. + ascii_fold: Whether to fold accented characters to ASCII equivalents. + ascii_fold_ignore: Characters to exclude from ASCII folding. + stopword_preset: Stopword preset name to apply for query-time filtering. + stopword_presets: Custom stopword preset definitions, keyed by name. + Each value is a ``_StopwordsCreate`` with optional preset, additions, + and removals fields. + + Returns: + A TokenizeResult with indexed and query token lists. + """ + tokenization_str = ( + tokenization.value if isinstance(tokenization, Tokenization) else tokenization + ) + + payload: Dict[str, Any] = { + "text": text, + "tokenization": tokenization_str, + } + + if analyzer_config is not None: + ac_dict = analyzer_config._to_dict() + if ac_dict: + payload["analyzerConfig"] = ac_dict + else: + ac: Dict[str, Any] = {} + if ascii_fold is not None: + ac["asciiFold"] = ascii_fold + if ascii_fold_ignore is not None: + ac["asciiFoldIgnore"] = ascii_fold_ignore + if stopword_preset is not None: + ac["stopwordPreset"] = ( + stopword_preset.value + if isinstance(stopword_preset, StopwordsPreset) + else stopword_preset + ) + if ac: + payload["analyzerConfig"] = ac + + if stopword_presets is not None: + payload["stopwordPresets"] = { + name: cfg._to_dict() for name, cfg in stopword_presets.items() + } + + def resp(response: Response) -> TokenizeResult: + return _parse_tokenize_result(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path="/tokenize", + weaviate_object=payload, + error_msg="Tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), + ) + + def property( + self, + collection_name: str, + property_name: str, + text: str, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using a property's configured tokenization settings. + + Args: + collection_name: The collection (class) name. + property_name: The property name whose tokenization config to use. + text: The text to tokenize. + + Returns: + A TokenizeResult with indexed and query token lists. + """ + path = f"/schema/{collection_name}/properties/{property_name}/tokenize" + + payload: Dict[str, Any] = {"text": text} + + def resp(response: Response) -> TokenizeResult: + return _parse_tokenize_result(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path=path, + weaviate_object=payload, + error_msg="Property tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), + ) diff --git a/weaviate/tokenize/sync.py b/weaviate/tokenize/sync.py new file mode 100644 index 000000000..755c42559 --- /dev/null +++ b/weaviate/tokenize/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.tokenize.executor import _TokenizeExecutor + + +@executor.wrap("sync") +class _Tokenize(_TokenizeExecutor[ConnectionSync]): + pass diff --git a/weaviate/tokenize/types.py b/weaviate/tokenize/types.py new file mode 100644 index 000000000..ba4009b2d --- /dev/null +++ b/weaviate/tokenize/types.py @@ -0,0 +1,25 @@ +"""Return types for tokenize operations.""" + +from dataclasses import dataclass, field +from typing import List, Optional + +from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig + + +@dataclass +class TokenizeResult: + """Result of a tokenization operation. + + Attributes: + tokenization: The tokenization method that was applied. + indexed: Tokens as they would be stored in the inverted index. + query: Tokens as they would be used for querying (after stopword removal). + analyzer_config: The text analyzer configuration that was used, if any. + stopword_config: The stopword configuration that was used, if any. + """ + + tokenization: str + indexed: List[str] + query: List[str] + analyzer_config: Optional[TextAnalyzerConfig] = field(default=None) + stopword_config: Optional[StopwordsConfig] = field(default=None) From 480dbe0d4e614606dba54bbe2064af9401fc0f71 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 9 Mar 2026 16:49:40 +0100 Subject: [PATCH 58/99] Add support for collection export endpoint --- integration/test_export.py | 219 +++++++++++++++++++++++ weaviate/classes/__init__.py | 2 + weaviate/classes/export.py | 11 ++ weaviate/client.py | 3 + weaviate/exceptions.py | 8 + weaviate/export/__init__.py | 7 + weaviate/export/async_.py | 8 + weaviate/export/async_.pyi | 38 ++++ weaviate/export/executor.py | 331 +++++++++++++++++++++++++++++++++++ weaviate/export/export.py | 92 ++++++++++ weaviate/export/sync.py | 8 + weaviate/export/sync.pyi | 38 ++++ weaviate/outputs/__init__.py | 15 +- weaviate/outputs/export.py | 15 ++ 14 files changed, 794 insertions(+), 1 deletion(-) create mode 100644 integration/test_export.py create mode 100644 weaviate/classes/export.py create mode 100644 weaviate/export/__init__.py create mode 100644 weaviate/export/async_.py create mode 100644 weaviate/export/async_.pyi create mode 100644 weaviate/export/executor.py create mode 100644 weaviate/export/export.py create mode 100644 weaviate/export/sync.py create mode 100644 weaviate/export/sync.pyi create mode 100644 weaviate/outputs/export.py diff --git a/integration/test_export.py b/integration/test_export.py new file mode 100644 index 000000000..791360e3a --- /dev/null +++ b/integration/test_export.py @@ -0,0 +1,219 @@ +import time +import uuid +from typing import Generator, List, Union + +import pytest +from _pytest.fixtures import SubRequest + +import weaviate +from weaviate.auth import Auth +from weaviate.collections.classes.config import DataType, Property +from weaviate.exceptions import UnexpectedStatusCodeException +from weaviate.export.export import ( + ExportFileFormat, + ExportStatus, + ExportStorage, +) + +from .conftest import _sanitize_collection_name + +RBAC_PORTS = (8093, 50065) +RBAC_AUTH_CREDS = Auth.api_key("admin-key") + +pytestmark = pytest.mark.xdist_group(name="export") + +BACKEND = ExportStorage.FILESYSTEM + +COLLECTION_NAME = "ExportTestCollection" + +OBJECT_PROPS = [{"title": f"object {i}", "count": i} for i in range(5)] + +OBJECT_IDS = [ + "fd34ccf4-1a2a-47ad-8446-231839366c3f", + "2653442b-05d8-4fa3-b46a-d4a152eb63bc", + "55374edb-17de-487f-86cb-9a9fbc30823f", + "124ff6aa-597f-44d0-8c13-62fbb1e66888", + "f787386e-7d1c-481f-b8c3-3dbfd8bbad85", +] + + +@pytest.fixture(scope="module") +def client() -> Generator[weaviate.WeaviateClient, None, None]: + client = weaviate.connect_to_local( + port=RBAC_PORTS[0], grpc_port=RBAC_PORTS[1], auth_credentials=RBAC_AUTH_CREDS + ) + client.collections.delete(COLLECTION_NAME) + + col = client.collections.create( + name=COLLECTION_NAME, + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="count", data_type=DataType.INT), + ], + ) + for i, props in enumerate(OBJECT_PROPS): + col.data.insert(properties=props, uuid=OBJECT_IDS[i]) + + yield client + client.collections.delete(COLLECTION_NAME) + client.close() + + +def unique_export_id(name: str) -> str: + """Generate a unique export ID based on the test name.""" + name = _sanitize_collection_name(name) + random_part = str(uuid.uuid4()).replace("-", "")[:12] + return name + random_part + + +def test_create_export_with_waiting(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Create an export with wait_for_completion=True.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + assert COLLECTION_NAME in resp.collections + + +def test_create_export_without_waiting( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Create an export without waiting, then poll status.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=[COLLECTION_NAME], + ) + assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] + + # poll until done + while True: + status = client.export.get_status(export_id=export_id, backend=BACKEND) + assert status.status in [ + ExportStatus.STARTED, + ExportStatus.TRANSFERRING, + ExportStatus.SUCCESS, + ] + if status.status == ExportStatus.SUCCESS: + break + time.sleep(0.1) + + assert status.export_id == export_id + + +def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Check status of a completed export.""" + export_id = unique_export_id(request.node.name) + + client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + + status = client.export.get_status(export_id=export_id, backend=BACKEND) + assert status.status == ExportStatus.SUCCESS + assert status.export_id == export_id + assert status.backend == BACKEND.value + + +def test_create_export_with_parquet_format( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Create an export explicitly specifying parquet format.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + + +@pytest.mark.parametrize("include", [[COLLECTION_NAME], COLLECTION_NAME]) +def test_create_export_include_as_str_and_list( + client: weaviate.WeaviateClient, include: Union[str, List[str]], request: SubRequest +) -> None: + """Verify include_collections accepts both str and list.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=include, + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + assert COLLECTION_NAME in resp.collections + + +def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Cancel a running export.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=[COLLECTION_NAME], + ) + assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] + + result = client.export.cancel(export_id=export_id, backend=BACKEND) + assert result is True + + # verify it's cancelled or already completed (race condition) + start = time.time() + while time.time() - start < 5: + status = client.export.get_status(export_id=export_id, backend=BACKEND) + if status.status in [ExportStatus.CANCELLED, ExportStatus.SUCCESS]: + break + time.sleep(0.1) + assert status.status in [ExportStatus.CANCELLED, ExportStatus.SUCCESS] + + +def test_fail_on_non_existing_collection( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail export on non-existing collection.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(UnexpectedStatusCodeException): + client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=["NonExistingCollection"], + wait_for_completion=True, + ) + + +def test_fail_on_both_include_and_exclude( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail when both include and exclude collections are set.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(TypeError): + client.export.create( + export_id=export_id, + backend=BACKEND, + include_collections=COLLECTION_NAME, + exclude_collections="SomeOther", + ) + + +def test_fail_status_for_non_existing_export( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail checking status for non-existing export.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(UnexpectedStatusCodeException): + client.export.get_status(export_id=export_id, backend=BACKEND) diff --git a/weaviate/classes/__init__.py b/weaviate/classes/__init__.py index 467a17f37..d495744ac 100644 --- a/weaviate/classes/__init__.py +++ b/weaviate/classes/__init__.py @@ -5,6 +5,7 @@ batch, config, data, + export, generate, generics, init, @@ -22,6 +23,7 @@ "config", "ConsistencyLevel", "data", + "export", "generate", "generics", "init", diff --git a/weaviate/classes/export.py b/weaviate/classes/export.py new file mode 100644 index 000000000..07e87a813 --- /dev/null +++ b/weaviate/classes/export.py @@ -0,0 +1,11 @@ +from weaviate.export.export import ( + ExportConfig, + ExportFileFormat, + ExportStorage, +) + +__all__ = [ + "ExportConfig", + "ExportFileFormat", + "ExportStorage", +] diff --git a/weaviate/client.py b/weaviate/client.py index d7f9080f4..f22389403 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -20,6 +20,7 @@ from .connect.v4 import ConnectionAsync, ConnectionSync from .debug import _Debug, _DebugAsync from .embedded import EmbeddedOptions +from .export import _Export, _ExportAsync from .groups import _Groups, _GroupsAsync from .rbac import _Roles, _RolesAsync from .types import NUMBER @@ -76,6 +77,7 @@ def __init__( ) self.alias = _AliasAsync(self._connection) self.backup = _BackupAsync(self._connection) + self.export = _ExportAsync(self._connection) self.batch = _BatchClientWrapperAsync(self._connection) self.cluster = _ClusterAsync(self._connection) self.collections = _CollectionsAsync(self._connection) @@ -152,6 +154,7 @@ def __init__( consistency_level=None, ) self.backup = _Backup(self._connection) + self.export = _Export(self._connection) self.cluster = _Cluster(self._connection) self.collections = collections self.debug = _Debug(self._connection) diff --git a/weaviate/exceptions.py b/weaviate/exceptions.py index 2a5b429d5..2b3d9ed09 100644 --- a/weaviate/exceptions.py +++ b/weaviate/exceptions.py @@ -141,6 +141,14 @@ class BackupCanceledError(WeaviateBaseError): """Backup canceled Exception.""" +class ExportFailedError(WeaviateBaseError): + """Export Failed Exception.""" + + +class ExportCancelledError(WeaviateBaseError): + """Export Cancelled Exception.""" + + class EmptyResponseError(WeaviateBaseError): """Occurs when an HTTP request unexpectedly returns an empty response.""" diff --git a/weaviate/export/__init__.py b/weaviate/export/__init__.py new file mode 100644 index 000000000..91de2d448 --- /dev/null +++ b/weaviate/export/__init__.py @@ -0,0 +1,7 @@ +"""Module for collection export operations.""" + +from .async_ import _ExportAsync +from .executor import ExportStorage +from .sync import _Export + +__all__ = ["ExportStorage", "_ExportAsync", "_Export"] diff --git a/weaviate/export/async_.py b/weaviate/export/async_.py new file mode 100644 index 000000000..8bd1e3c44 --- /dev/null +++ b/weaviate/export/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.export.executor import _ExportExecutor + + +@executor.wrap("async") +class _ExportAsync(_ExportExecutor[ConnectionAsync]): + pass diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi new file mode 100644 index 000000000..3872a7cb9 --- /dev/null +++ b/weaviate/export/async_.pyi @@ -0,0 +1,38 @@ +from typing import List, Optional, Union + +from weaviate.connect.v4 import ConnectionAsync +from weaviate.export.export import ( + ExportConfig, + ExportCreateReturn, + ExportFileFormat, + ExportStatusReturn, + ExportStorage, +) + +from .executor import _ExportExecutor + +class _ExportAsync(_ExportExecutor[ConnectionAsync]): + async def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat = ExportFileFormat.PARQUET, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: bool = False, + config: Optional[ExportConfig] = None, + ) -> ExportCreateReturn: ... + async def get_status( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> ExportStatusReturn: ... + async def cancel( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py new file mode 100644 index 000000000..25ecf643f --- /dev/null +++ b/weaviate/export/executor.py @@ -0,0 +1,331 @@ +"""Export class definition.""" + +import asyncio +import time +from typing import Dict, Generic, List, Optional, Tuple, Union + +from httpx import Response + +from weaviate.connect import executor +from weaviate.connect.v4 import ( + Connection, + ConnectionAsync, + ConnectionType, + _ExpectedStatusCodes, +) +from weaviate.exceptions import ( + EmptyResponseException, + ExportCancelledError, + ExportFailedError, +) +from weaviate.export.export import ( + STORAGE_NAMES, + ExportConfig, + ExportCreateReturn, + ExportFileFormat, + ExportStatus, + ExportStatusReturn, + ExportStorage, +) +from weaviate.util import ( + _capitalize_first_letter, + _decode_json_response_dict, +) + + +class _ExportExecutor(Generic[ConnectionType]): + def __init__(self, connection: Connection): + self._connection = connection + + def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat = ExportFileFormat.PARQUET, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: bool = False, + config: Optional[ExportConfig] = None, + ) -> executor.Result[ExportCreateReturn]: + """Create an export of all/per collection Weaviate objects. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where to create the export. + file_format: The file format of the export. By default PARQUET. + include_collections: The collection/list of collections to be included in the export. If not specified all + collections will be included. Either `include_collections` or `exclude_collections` can be set. + exclude_collections: The collection/list of collections to be excluded in the export. + Either `include_collections` or `exclude_collections` can be set. + wait_for_completion: Whether to wait until the export is done. By default False. + config: The configuration of the export (bucket, path). By default None. + + Returns: + An `ExportCreateReturn` object that contains the export creation response. + + Raises: + weaviate.exceptions.UnexpectedStatusCodeError: If weaviate reports a non-OK status. + TypeError: One of the arguments have a wrong type. + """ + ( + export_id, + backend, + include_collections, + exclude_collections, + ) = _get_and_validate_create_arguments( + export_id=export_id, + backend=backend, + include_classes=include_collections, + exclude_classes=exclude_collections, + wait_for_completion=wait_for_completion, + ) + + payload: dict = { + "id": export_id, + "file_format": file_format.value, + "include": include_collections, + "exclude": exclude_collections, + } + + if config is not None: + config_dict: Dict[str, str] = {} + if config.bucket is not None: + config_dict["bucket"] = config.bucket + if config.path is not None: + config_dict["path"] = config.path + if config_dict: + payload["config"] = config_dict + + path = f"/export/{backend.value}" + + if isinstance(self._connection, ConnectionAsync): + + async def _execute() -> ExportCreateReturn: + res = await executor.aresult( + self._connection.post( + path=path, + weaviate_object=payload, + error_msg="Export creation failed due to connection error.", + ) + ) + create_status = _decode_json_response_dict(res, "Export creation") + assert create_status is not None + if wait_for_completion: + while True: + status = await executor.aresult( + self.get_status( + export_id=export_id, + backend=backend, + bucket=config.bucket if config else None, + path=config.path if config else None, + ) + ) + create_status["status"] = status.status + if status.status == ExportStatus.SUCCESS: + break + if status.status == ExportStatus.FAILED: + raise ExportFailedError( + f"Export failed: {create_status} with error: {status.error}" + ) + if status.status == ExportStatus.CANCELLED: + raise ExportCancelledError( + f"Export was cancelled: {create_status} with error: {status.error}" + ) + await asyncio.sleep(1) + return ExportCreateReturn(**create_status) + + return _execute() + + res = executor.result( + self._connection.post( + path=path, + weaviate_object=payload, + error_msg="Export creation failed due to connection error.", + ) + ) + create_status = _decode_json_response_dict(res, "Export creation") + assert create_status is not None + if wait_for_completion: + while True: + status = executor.result( + self.get_status( + export_id=export_id, + backend=backend, + bucket=config.bucket if config else None, + path=config.path if config else None, + ) + ) + create_status["status"] = status.status + if status.status == ExportStatus.SUCCESS: + break + if status.status == ExportStatus.FAILED: + raise ExportFailedError( + f"Export failed: {create_status} with error: {status.error}" + ) + if status.status == ExportStatus.CANCELLED: + raise ExportCancelledError( + f"Export was cancelled: {create_status} with error: {status.error}" + ) + time.sleep(1) + return ExportCreateReturn(**create_status) + + def get_status( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> executor.Result[ExportStatusReturn]: + """Check the status of an export. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where the export was created. + bucket: The bucket of the export location. By default None. + path: The path of the export location. By default None. + + Returns: + An `ExportStatusReturn` object that contains the export status response. + """ + export_id, backend = _get_and_validate_get_status( + export_id=export_id, + backend=backend, + ) + + url_path = f"/export/{backend.value}/{export_id}" + params: Dict[str, str] = {} + if bucket is not None: + params["bucket"] = bucket + if path is not None: + params["path"] = path + + def resp(res: Response) -> ExportStatusReturn: + typed_response = _decode_json_response_dict(res, "Export status check") + if typed_response is None: + raise EmptyResponseException() + return ExportStatusReturn(**typed_response) + + return executor.execute( + response_callback=resp, + method=self._connection.get, + path=url_path, + params=params, + error_msg="Export status check failed due to connection error.", + ) + + def cancel( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> executor.Result[bool]: + """Cancel a running export. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where the export was created. + bucket: The bucket of the export location. By default None. + path: The path of the export location. By default None. + + Returns: + A bool indicating if the cancellation was successful. + """ + export_id, backend = _get_and_validate_get_status( + export_id=export_id, + backend=backend, + ) + url_path = f"/export/{backend.value}/{export_id}" + params: Dict[str, str] = {} + if bucket is not None: + params["bucket"] = bucket + if path is not None: + params["path"] = path + + def resp(res: Response) -> bool: + if res.status_code == 204: + return True + typed_response = _decode_json_response_dict(res, "Export cancel") + if typed_response is None: + raise EmptyResponseException() + return False + + return executor.execute( + response_callback=resp, + method=self._connection.delete, + path=url_path, + params=params, + error_msg="Export cancel failed due to connection error.", + status_codes=_ExpectedStatusCodes(ok_in=[204, 404], error="cancel export"), + ) + + +def _get_and_validate_create_arguments( + export_id: str, + backend: Union[str, ExportStorage], + include_classes: Union[List[str], str, None], + exclude_classes: Union[List[str], str, None], + wait_for_completion: bool, +) -> Tuple[str, ExportStorage, List[str], List[str]]: + if not isinstance(export_id, str): + raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + if isinstance(backend, str): + try: + backend = ExportStorage(backend.lower()) + except KeyError: + raise ValueError( + f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." + ) + + if not isinstance(wait_for_completion, bool): + raise TypeError( + f"'wait_for_completion' must be of type bool. Given type: {type(wait_for_completion)}." + ) + + if include_classes is not None: + if isinstance(include_classes, str): + include_classes = [include_classes] + elif not isinstance(include_classes, list): + raise TypeError( + "'include_collections' must be of type str, list of str or None. " + f"Given type: {type(include_classes)}." + ) + else: + include_classes = [] + + if exclude_classes is not None: + if isinstance(exclude_classes, str): + exclude_classes = [exclude_classes] + elif not isinstance(exclude_classes, list): + raise TypeError( + "'exclude_collections' must be of type str, list of str or None. " + f"Given type: {type(exclude_classes)}." + ) + else: + exclude_classes = [] + + if include_classes and exclude_classes: + raise TypeError( + "Either 'include_collections' OR 'exclude_collections' can be set, not both." + ) + + include_classes = [_capitalize_first_letter(cls) for cls in include_classes] + exclude_classes = [_capitalize_first_letter(cls) for cls in exclude_classes] + + return (export_id, backend, include_classes, exclude_classes) + + +def _get_and_validate_get_status( + export_id: str, backend: Union[str, ExportStorage] +) -> Tuple[str, ExportStorage]: + if not isinstance(export_id, str): + raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + if isinstance(backend, str): + try: + backend = ExportStorage(backend.lower()) + except KeyError: + raise ValueError( + f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." + ) + + return (export_id, backend) diff --git a/weaviate/export/export.py b/weaviate/export/export.py new file mode 100644 index 000000000..913f6198c --- /dev/null +++ b/weaviate/export/export.py @@ -0,0 +1,92 @@ +"""Export models and enums.""" + +from datetime import datetime +from enum import Enum +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +STORAGE_NAMES = { + "filesystem", + "s3", + "gcs", + "azure", +} + + +class ExportStorage(str, Enum): + """Which backend should be used to write the export to.""" + + FILESYSTEM = "filesystem" + S3 = "s3" + GCS = "gcs" + AZURE = "azure" + + +class ExportFileFormat(str, Enum): + """Which file format should be used for the export.""" + + PARQUET = "parquet" + + +class ExportStatus(str, Enum): + """The status of an export.""" + + STARTED = "STARTED" + TRANSFERRING = "TRANSFERRING" + SUCCESS = "SUCCESS" + FAILED = "FAILED" + CANCELLED = "CANCELLED" + SKIPPED = "SKIPPED" + + +class ShardExportStatus(str, Enum): + """The status of an individual shard export.""" + + STARTED = "STARTED" + TRANSFERRING = "TRANSFERRING" + SUCCESS = "SUCCESS" + FAILED = "FAILED" + CANCELLED = "CANCELLED" + SKIPPED = "SKIPPED" + + +class ExportConfig(BaseModel): + """Configuration for where to write the export.""" + + bucket: Optional[str] = None + path: Optional[str] = None + + +class ShardProgress(BaseModel): + """Progress of a single shard export.""" + + status: ShardExportStatus + objects_exported: int = Field(alias="objectsExported", default=0) + error: Optional[str] = None + skip_reason: Optional[str] = Field(alias="skipReason", default=None) + + model_config = {"populate_by_name": True} + + +class ExportCreateReturn(BaseModel): + """Return type of the export creation method.""" + + export_id: str = Field(alias="id") + backend: str + path: str + status: ExportStatus + started_at: Optional[datetime] = Field(alias="startedAt", default=None) + collections: List[str] = Field(default_factory=list, alias="classes") + + model_config = {"populate_by_name": True} + + +class ExportStatusReturn(ExportCreateReturn): + """Return type of the export status method.""" + + shard_status: Optional[Dict[str, Dict[str, ShardProgress]]] = Field( + alias="shardStatus", default=None + ) + error: Optional[str] = None + took_in_ms: Optional[int] = Field(alias="tookInMs", default=None) diff --git a/weaviate/export/sync.py b/weaviate/export/sync.py new file mode 100644 index 000000000..0510601f8 --- /dev/null +++ b/weaviate/export/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.export.executor import _ExportExecutor + + +@executor.wrap("sync") +class _Export(_ExportExecutor[ConnectionSync]): + pass diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi new file mode 100644 index 000000000..615c79cfc --- /dev/null +++ b/weaviate/export/sync.pyi @@ -0,0 +1,38 @@ +from typing import List, Optional, Union + +from weaviate.connect.v4 import ConnectionSync +from weaviate.export.export import ( + ExportConfig, + ExportCreateReturn, + ExportFileFormat, + ExportStatusReturn, + ExportStorage, +) + +from .executor import _ExportExecutor + +class _Export(_ExportExecutor[ConnectionSync]): + def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat = ExportFileFormat.PARQUET, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: bool = False, + config: Optional[ExportConfig] = None, + ) -> ExportCreateReturn: ... + def get_status( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> ExportStatusReturn: ... + def cancel( + self, + export_id: str, + backend: ExportStorage, + bucket: Optional[str] = None, + path: Optional[str] = None, + ) -> bool: ... diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py index 62193fc35..cd6176d93 100644 --- a/weaviate/outputs/__init__.py +++ b/weaviate/outputs/__init__.py @@ -1,4 +1,16 @@ -from . import aggregate, backup, batch, cluster, config, data, query, replication, tenants, users +from . import ( + aggregate, + backup, + batch, + cluster, + config, + data, + export, + query, + replication, + tenants, + users, +) __all__ = [ "aggregate", @@ -7,6 +19,7 @@ "cluster", "config", "data", + "export", "query", "replication", "tenants", diff --git a/weaviate/outputs/export.py b/weaviate/outputs/export.py new file mode 100644 index 000000000..531b715c4 --- /dev/null +++ b/weaviate/outputs/export.py @@ -0,0 +1,15 @@ +from weaviate.export.export import ( + ExportCreateReturn, + ExportStatus, + ExportStatusReturn, + ShardExportStatus, + ShardProgress, +) + +__all__ = [ + "ExportCreateReturn", + "ExportStatus", + "ExportStatusReturn", + "ShardExportStatus", + "ShardProgress", +] From 92c3d1f3090f12de6737fb6462d88103392148f2 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 9 Mar 2026 16:59:55 +0100 Subject: [PATCH 59/99] Small cleanup after review --- integration/test_export.py | 15 ++++++++------- weaviate/export/async_.pyi | 2 +- weaviate/export/executor.py | 4 ++-- weaviate/export/export.py | 3 --- weaviate/export/sync.pyi | 2 +- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/integration/test_export.py b/integration/test_export.py index 791360e3a..314c6b8fc 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -6,7 +6,6 @@ from _pytest.fixtures import SubRequest import weaviate -from weaviate.auth import Auth from weaviate.collections.classes.config import DataType, Property from weaviate.exceptions import UnexpectedStatusCodeException from weaviate.export.export import ( @@ -17,9 +16,6 @@ from .conftest import _sanitize_collection_name -RBAC_PORTS = (8093, 50065) -RBAC_AUTH_CREDS = Auth.api_key("admin-key") - pytestmark = pytest.mark.xdist_group(name="export") BACKEND = ExportStorage.FILESYSTEM @@ -39,9 +35,7 @@ @pytest.fixture(scope="module") def client() -> Generator[weaviate.WeaviateClient, None, None]: - client = weaviate.connect_to_local( - port=RBAC_PORTS[0], grpc_port=RBAC_PORTS[1], auth_credentials=RBAC_AUTH_CREDS - ) + client = weaviate.connect_to_local() client.collections.delete(COLLECTION_NAME) col = client.collections.create( @@ -73,6 +67,7 @@ def test_create_export_with_waiting(client: weaviate.WeaviateClient, request: Su resp = client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=[COLLECTION_NAME], wait_for_completion=True, ) @@ -89,6 +84,7 @@ def test_create_export_without_waiting( resp = client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=[COLLECTION_NAME], ) assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] @@ -115,6 +111,7 @@ def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=[COLLECTION_NAME], wait_for_completion=True, ) @@ -151,6 +148,7 @@ def test_create_export_include_as_str_and_list( resp = client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=include, wait_for_completion=True, ) @@ -165,6 +163,7 @@ def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> resp = client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=[COLLECTION_NAME], ) assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] @@ -191,6 +190,7 @@ def test_fail_on_non_existing_collection( client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=["NonExistingCollection"], wait_for_completion=True, ) @@ -205,6 +205,7 @@ def test_fail_on_both_include_and_exclude( client.export.create( export_id=export_id, backend=BACKEND, + file_format=ExportFileFormat.PARQUET, include_collections=COLLECTION_NAME, exclude_collections="SomeOther", ) diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi index 3872a7cb9..652c26bee 100644 --- a/weaviate/export/async_.pyi +++ b/weaviate/export/async_.pyi @@ -16,7 +16,7 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): self, export_id: str, backend: ExportStorage, - file_format: ExportFileFormat = ExportFileFormat.PARQUET, + file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: bool = False, diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 25ecf643f..39d586f2f 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -41,7 +41,7 @@ def create( self, export_id: str, backend: ExportStorage, - file_format: ExportFileFormat = ExportFileFormat.PARQUET, + file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: bool = False, @@ -52,7 +52,7 @@ def create( Args: export_id: The identifier name of the export. backend: The backend storage where to create the export. - file_format: The file format of the export. By default PARQUET. + file_format: The file format of the export (e.g. ExportFileFormat.PARQUET). include_collections: The collection/list of collections to be included in the export. If not specified all collections will be included. Either `include_collections` or `exclude_collections` can be set. exclude_collections: The collection/list of collections to be excluded in the export. diff --git a/weaviate/export/export.py b/weaviate/export/export.py index 913f6198c..30f603bb5 100644 --- a/weaviate/export/export.py +++ b/weaviate/export/export.py @@ -37,17 +37,14 @@ class ExportStatus(str, Enum): SUCCESS = "SUCCESS" FAILED = "FAILED" CANCELLED = "CANCELLED" - SKIPPED = "SKIPPED" class ShardExportStatus(str, Enum): """The status of an individual shard export.""" - STARTED = "STARTED" TRANSFERRING = "TRANSFERRING" SUCCESS = "SUCCESS" FAILED = "FAILED" - CANCELLED = "CANCELLED" SKIPPED = "SKIPPED" diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index 615c79cfc..93ed5f4fa 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -16,7 +16,7 @@ class _Export(_ExportExecutor[ConnectionSync]): self, export_id: str, backend: ExportStorage, - file_format: ExportFileFormat = ExportFileFormat.PARQUET, + file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: bool = False, From 3dc9259d395b55dea431bd37e3342f07d7836665 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 10 Mar 2026 07:31:13 +0100 Subject: [PATCH 60/99] Rename ENum --- integration/test_export.py | 4 ++-- weaviate/exceptions.py | 4 ++-- weaviate/export/executor.py | 14 +++++++------- weaviate/export/export.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/integration/test_export.py b/integration/test_export.py index 314c6b8fc..7748740c4 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -175,10 +175,10 @@ def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> start = time.time() while time.time() - start < 5: status = client.export.get_status(export_id=export_id, backend=BACKEND) - if status.status in [ExportStatus.CANCELLED, ExportStatus.SUCCESS]: + if status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS]: break time.sleep(0.1) - assert status.status in [ExportStatus.CANCELLED, ExportStatus.SUCCESS] + assert status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS] def test_fail_on_non_existing_collection( diff --git a/weaviate/exceptions.py b/weaviate/exceptions.py index 2b3d9ed09..ce0fe6f7e 100644 --- a/weaviate/exceptions.py +++ b/weaviate/exceptions.py @@ -145,8 +145,8 @@ class ExportFailedError(WeaviateBaseError): """Export Failed Exception.""" -class ExportCancelledError(WeaviateBaseError): - """Export Cancelled Exception.""" +class ExportCanceledError(WeaviateBaseError): + """Export Canceled Exception.""" class EmptyResponseError(WeaviateBaseError): diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 39d586f2f..0a54c741a 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -15,7 +15,7 @@ ) from weaviate.exceptions import ( EmptyResponseException, - ExportCancelledError, + ExportCanceledError, ExportFailedError, ) from weaviate.export.export import ( @@ -127,9 +127,9 @@ async def _execute() -> ExportCreateReturn: raise ExportFailedError( f"Export failed: {create_status} with error: {status.error}" ) - if status.status == ExportStatus.CANCELLED: - raise ExportCancelledError( - f"Export was cancelled: {create_status} with error: {status.error}" + if status.status == ExportStatus.CANCELED: + raise ExportCanceledError( + f"Export was canceled: {create_status} with error: {status.error}" ) await asyncio.sleep(1) return ExportCreateReturn(**create_status) @@ -162,9 +162,9 @@ async def _execute() -> ExportCreateReturn: raise ExportFailedError( f"Export failed: {create_status} with error: {status.error}" ) - if status.status == ExportStatus.CANCELLED: - raise ExportCancelledError( - f"Export was cancelled: {create_status} with error: {status.error}" + if status.status == ExportStatus.CANCELED: + raise ExportCanceledError( + f"Export was canceled: {create_status} with error: {status.error}" ) time.sleep(1) return ExportCreateReturn(**create_status) diff --git a/weaviate/export/export.py b/weaviate/export/export.py index 30f603bb5..43ee08d99 100644 --- a/weaviate/export/export.py +++ b/weaviate/export/export.py @@ -36,7 +36,7 @@ class ExportStatus(str, Enum): TRANSFERRING = "TRANSFERRING" SUCCESS = "SUCCESS" FAILED = "FAILED" - CANCELLED = "CANCELLED" + CANCELED = "CANCELED" class ShardExportStatus(str, Enum): From c36e540a0c3c20c372f3a12a570de6a73fcf856c Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 31 Mar 2026 14:28:12 -0700 Subject: [PATCH 61/99] adapt to latest version --- weaviate/export/async_.pyi | 12 ++---------- weaviate/export/executor.py | 14 +------------- weaviate/export/export.py | 2 +- weaviate/export/sync.pyi | 12 ++---------- 4 files changed, 6 insertions(+), 34 deletions(-) diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi index 652c26bee..df4b7d7ab 100644 --- a/weaviate/export/async_.pyi +++ b/weaviate/export/async_.pyi @@ -23,16 +23,8 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... async def get_status( - self, - export_id: str, - backend: ExportStorage, - bucket: Optional[str] = None, - path: Optional[str] = None, + self, export_id: str, backend: ExportStorage, path: Optional[str] = None ) -> ExportStatusReturn: ... async def cancel( - self, - export_id: str, - backend: ExportStorage, - bucket: Optional[str] = None, - path: Optional[str] = None, + self, export_id: str, backend: ExportStorage, path: Optional[str] = None ) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 0a54c741a..db013da2c 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -58,7 +58,7 @@ def create( exclude_collections: The collection/list of collections to be excluded in the export. Either `include_collections` or `exclude_collections` can be set. wait_for_completion: Whether to wait until the export is done. By default False. - config: The configuration of the export (bucket, path). By default None. + config: The configuration of the export (path). By default None. Returns: An `ExportCreateReturn` object that contains the export creation response. @@ -89,8 +89,6 @@ def create( if config is not None: config_dict: Dict[str, str] = {} - if config.bucket is not None: - config_dict["bucket"] = config.bucket if config.path is not None: config_dict["path"] = config.path if config_dict: @@ -116,7 +114,6 @@ async def _execute() -> ExportCreateReturn: self.get_status( export_id=export_id, backend=backend, - bucket=config.bucket if config else None, path=config.path if config else None, ) ) @@ -151,7 +148,6 @@ async def _execute() -> ExportCreateReturn: self.get_status( export_id=export_id, backend=backend, - bucket=config.bucket if config else None, path=config.path if config else None, ) ) @@ -173,7 +169,6 @@ def get_status( self, export_id: str, backend: ExportStorage, - bucket: Optional[str] = None, path: Optional[str] = None, ) -> executor.Result[ExportStatusReturn]: """Check the status of an export. @@ -181,7 +176,6 @@ def get_status( Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - bucket: The bucket of the export location. By default None. path: The path of the export location. By default None. Returns: @@ -194,8 +188,6 @@ def get_status( url_path = f"/export/{backend.value}/{export_id}" params: Dict[str, str] = {} - if bucket is not None: - params["bucket"] = bucket if path is not None: params["path"] = path @@ -217,7 +209,6 @@ def cancel( self, export_id: str, backend: ExportStorage, - bucket: Optional[str] = None, path: Optional[str] = None, ) -> executor.Result[bool]: """Cancel a running export. @@ -225,7 +216,6 @@ def cancel( Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - bucket: The bucket of the export location. By default None. path: The path of the export location. By default None. Returns: @@ -237,8 +227,6 @@ def cancel( ) url_path = f"/export/{backend.value}/{export_id}" params: Dict[str, str] = {} - if bucket is not None: - params["bucket"] = bucket if path is not None: params["path"] = path diff --git a/weaviate/export/export.py b/weaviate/export/export.py index 43ee08d99..e41704a0f 100644 --- a/weaviate/export/export.py +++ b/weaviate/export/export.py @@ -51,7 +51,6 @@ class ShardExportStatus(str, Enum): class ExportConfig(BaseModel): """Configuration for where to write the export.""" - bucket: Optional[str] = None path: Optional[str] = None @@ -82,6 +81,7 @@ class ExportCreateReturn(BaseModel): class ExportStatusReturn(ExportCreateReturn): """Return type of the export status method.""" + completed_at: Optional[datetime] = Field(alias="completedAt", default=None) shard_status: Optional[Dict[str, Dict[str, ShardProgress]]] = Field( alias="shardStatus", default=None ) diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index 93ed5f4fa..fa00adb2c 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -23,16 +23,8 @@ class _Export(_ExportExecutor[ConnectionSync]): config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... def get_status( - self, - export_id: str, - backend: ExportStorage, - bucket: Optional[str] = None, - path: Optional[str] = None, + self, export_id: str, backend: ExportStorage, path: Optional[str] = None ) -> ExportStatusReturn: ... def cancel( - self, - export_id: str, - backend: ExportStorage, - bucket: Optional[str] = None, - path: Optional[str] = None, + self, export_id: str, backend: ExportStorage, path: Optional[str] = None ) -> bool: ... From 54eea32d0571e3be41aa1551d22fdcf7751224a6 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 31 Mar 2026 15:46:21 -0700 Subject: [PATCH 62/99] Update UX --- weaviate/export/async_.pyi | 21 +++++++-- weaviate/export/executor.py | 87 ++++++++++++++++++++++--------------- weaviate/export/sync.pyi | 21 +++++++-- 3 files changed, 87 insertions(+), 42 deletions(-) diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi index df4b7d7ab..8450b1c1d 100644 --- a/weaviate/export/async_.pyi +++ b/weaviate/export/async_.pyi @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union, overload from weaviate.connect.v4 import ConnectionAsync from weaviate.export.export import ( @@ -12,6 +12,7 @@ from weaviate.export.export import ( from .executor import _ExportExecutor class _ExportAsync(_ExportExecutor[ConnectionAsync]): + @overload async def create( self, export_id: str, @@ -19,12 +20,24 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, - wait_for_completion: bool = False, + *, + wait_for_completion: Literal[True], + config: Optional[ExportConfig] = None, + ) -> ExportStatusReturn: ... + @overload + async def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... async def get_status( - self, export_id: str, backend: ExportStorage, path: Optional[str] = None + self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None ) -> ExportStatusReturn: ... async def cancel( - self, export_id: str, backend: ExportStorage, path: Optional[str] = None + self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None ) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index db013da2c..587400a1c 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -2,7 +2,7 @@ import asyncio import time -from typing import Dict, Generic, List, Optional, Tuple, Union +from typing import Dict, Generic, List, Literal, Optional, Tuple, Union, overload from httpx import Response @@ -37,6 +37,31 @@ class _ExportExecutor(Generic[ConnectionType]): def __init__(self, connection: Connection): self._connection = connection + @overload + def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + *, + wait_for_completion: Literal[True], + config: Optional[ExportConfig] = None, + ) -> executor.Result[ExportStatusReturn]: ... + + @overload + def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, + config: Optional[ExportConfig] = None, + ) -> executor.Result[ExportCreateReturn]: ... + def create( self, export_id: str, @@ -46,7 +71,7 @@ def create( exclude_collections: Union[List[str], str, None] = None, wait_for_completion: bool = False, config: Optional[ExportConfig] = None, - ) -> executor.Result[ExportCreateReturn]: + ) -> executor.Result[Union[ExportCreateReturn, ExportStatusReturn]]: """Create an export of all/per collection Weaviate objects. Args: @@ -83,9 +108,11 @@ def create( payload: dict = { "id": export_id, "file_format": file_format.value, - "include": include_collections, - "exclude": exclude_collections, } + if include_collections: + payload["include"] = include_collections + if exclude_collections: + payload["exclude"] = exclude_collections if config is not None: config_dict: Dict[str, str] = {} @@ -98,7 +125,7 @@ def create( if isinstance(self._connection, ConnectionAsync): - async def _execute() -> ExportCreateReturn: + async def _execute() -> Union[ExportCreateReturn, ExportStatusReturn]: res = await executor.aresult( self._connection.post( path=path, @@ -114,19 +141,16 @@ async def _execute() -> ExportCreateReturn: self.get_status( export_id=export_id, backend=backend, - path=config.path if config else None, + config=config, ) ) - create_status["status"] = status.status if status.status == ExportStatus.SUCCESS: - break + return status if status.status == ExportStatus.FAILED: - raise ExportFailedError( - f"Export failed: {create_status} with error: {status.error}" - ) + raise ExportFailedError(f"Export failed with error: {status.error}") if status.status == ExportStatus.CANCELED: raise ExportCanceledError( - f"Export was canceled: {create_status} with error: {status.error}" + f"Export was canceled with error: {status.error}" ) await asyncio.sleep(1) return ExportCreateReturn(**create_status) @@ -148,20 +172,15 @@ async def _execute() -> ExportCreateReturn: self.get_status( export_id=export_id, backend=backend, - path=config.path if config else None, + config=config, ) ) - create_status["status"] = status.status if status.status == ExportStatus.SUCCESS: - break + return status if status.status == ExportStatus.FAILED: - raise ExportFailedError( - f"Export failed: {create_status} with error: {status.error}" - ) + raise ExportFailedError(f"Export failed with error: {status.error}") if status.status == ExportStatus.CANCELED: - raise ExportCanceledError( - f"Export was canceled: {create_status} with error: {status.error}" - ) + raise ExportCanceledError(f"Export was canceled with error: {status.error}") time.sleep(1) return ExportCreateReturn(**create_status) @@ -169,14 +188,14 @@ def get_status( self, export_id: str, backend: ExportStorage, - path: Optional[str] = None, + config: Optional[ExportConfig] = None, ) -> executor.Result[ExportStatusReturn]: """Check the status of an export. Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - path: The path of the export location. By default None. + config: The configuration of the export (path). By default None. Returns: An `ExportStatusReturn` object that contains the export status response. @@ -188,8 +207,8 @@ def get_status( url_path = f"/export/{backend.value}/{export_id}" params: Dict[str, str] = {} - if path is not None: - params["path"] = path + if config is not None and config.path is not None: + params["path"] = config.path def resp(res: Response) -> ExportStatusReturn: typed_response = _decode_json_response_dict(res, "Export status check") @@ -209,17 +228,17 @@ def cancel( self, export_id: str, backend: ExportStorage, - path: Optional[str] = None, + config: Optional[ExportConfig] = None, ) -> executor.Result[bool]: """Cancel a running export. Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - path: The path of the export location. By default None. + config: The configuration of the export (path). By default None. Returns: - A bool indicating if the cancellation was successful. + True if the export was cancelled, False if the export had already finished. """ export_id, backend = _get_and_validate_get_status( export_id=export_id, @@ -227,15 +246,15 @@ def cancel( ) url_path = f"/export/{backend.value}/{export_id}" params: Dict[str, str] = {} - if path is not None: - params["path"] = path + if config is not None and config.path is not None: + params["path"] = config.path def resp(res: Response) -> bool: if res.status_code == 204: return True - typed_response = _decode_json_response_dict(res, "Export cancel") - if typed_response is None: - raise EmptyResponseException() + # 409 means export already finished — not an error, just already done + if res.status_code == 409: + return False return False return executor.execute( @@ -244,7 +263,7 @@ def resp(res: Response) -> bool: path=url_path, params=params, error_msg="Export cancel failed due to connection error.", - status_codes=_ExpectedStatusCodes(ok_in=[204, 404], error="cancel export"), + status_codes=_ExpectedStatusCodes(ok_in=[204, 409], error="cancel export"), ) diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index fa00adb2c..cd8325b39 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union, overload from weaviate.connect.v4 import ConnectionSync from weaviate.export.export import ( @@ -12,6 +12,7 @@ from weaviate.export.export import ( from .executor import _ExportExecutor class _Export(_ExportExecutor[ConnectionSync]): + @overload def create( self, export_id: str, @@ -19,12 +20,24 @@ class _Export(_ExportExecutor[ConnectionSync]): file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, - wait_for_completion: bool = False, + *, + wait_for_completion: Literal[True], + config: Optional[ExportConfig] = None, + ) -> ExportStatusReturn: ... + @overload + def create( + self, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... def get_status( - self, export_id: str, backend: ExportStorage, path: Optional[str] = None + self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None ) -> ExportStatusReturn: ... def cancel( - self, export_id: str, backend: ExportStorage, path: Optional[str] = None + self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None ) -> bool: ... From 42bfc5c6550c5b4959a1aec39ed64cc89431ba69 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 7 Apr 2026 15:21:27 -0700 Subject: [PATCH 63/99] Remove export path parameter --- weaviate/classes/export.py | 2 -- weaviate/export/async_.pyi | 13 +++---------- weaviate/export/executor.py | 28 +--------------------------- weaviate/export/export.py | 6 ------ weaviate/export/sync.pyi | 13 +++---------- 5 files changed, 7 insertions(+), 55 deletions(-) diff --git a/weaviate/classes/export.py b/weaviate/classes/export.py index 07e87a813..d14fb3f07 100644 --- a/weaviate/classes/export.py +++ b/weaviate/classes/export.py @@ -1,11 +1,9 @@ from weaviate.export.export import ( - ExportConfig, ExportFileFormat, ExportStorage, ) __all__ = [ - "ExportConfig", "ExportFileFormat", "ExportStorage", ] diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi index 8450b1c1d..4987a0cc6 100644 --- a/weaviate/export/async_.pyi +++ b/weaviate/export/async_.pyi @@ -1,8 +1,7 @@ -from typing import List, Literal, Optional, Union, overload +from typing import List, Literal, Union, overload from weaviate.connect.v4 import ConnectionAsync from weaviate.export.export import ( - ExportConfig, ExportCreateReturn, ExportFileFormat, ExportStatusReturn, @@ -22,7 +21,6 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): exclude_collections: Union[List[str], str, None] = None, *, wait_for_completion: Literal[True], - config: Optional[ExportConfig] = None, ) -> ExportStatusReturn: ... @overload async def create( @@ -33,11 +31,6 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: Literal[False] = False, - config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... - async def get_status( - self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None - ) -> ExportStatusReturn: ... - async def cancel( - self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None - ) -> bool: ... + async def get_status(self, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + async def cancel(self, export_id: str, backend: ExportStorage) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 587400a1c..18f2f9f72 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -2,7 +2,7 @@ import asyncio import time -from typing import Dict, Generic, List, Literal, Optional, Tuple, Union, overload +from typing import Generic, List, Literal, Tuple, Union, overload from httpx import Response @@ -20,7 +20,6 @@ ) from weaviate.export.export import ( STORAGE_NAMES, - ExportConfig, ExportCreateReturn, ExportFileFormat, ExportStatus, @@ -47,7 +46,6 @@ def create( exclude_collections: Union[List[str], str, None] = None, *, wait_for_completion: Literal[True], - config: Optional[ExportConfig] = None, ) -> executor.Result[ExportStatusReturn]: ... @overload @@ -59,7 +57,6 @@ def create( include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: Literal[False] = False, - config: Optional[ExportConfig] = None, ) -> executor.Result[ExportCreateReturn]: ... def create( @@ -70,7 +67,6 @@ def create( include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: bool = False, - config: Optional[ExportConfig] = None, ) -> executor.Result[Union[ExportCreateReturn, ExportStatusReturn]]: """Create an export of all/per collection Weaviate objects. @@ -83,7 +79,6 @@ def create( exclude_collections: The collection/list of collections to be excluded in the export. Either `include_collections` or `exclude_collections` can be set. wait_for_completion: Whether to wait until the export is done. By default False. - config: The configuration of the export (path). By default None. Returns: An `ExportCreateReturn` object that contains the export creation response. @@ -114,13 +109,6 @@ def create( if exclude_collections: payload["exclude"] = exclude_collections - if config is not None: - config_dict: Dict[str, str] = {} - if config.path is not None: - config_dict["path"] = config.path - if config_dict: - payload["config"] = config_dict - path = f"/export/{backend.value}" if isinstance(self._connection, ConnectionAsync): @@ -141,7 +129,6 @@ async def _execute() -> Union[ExportCreateReturn, ExportStatusReturn]: self.get_status( export_id=export_id, backend=backend, - config=config, ) ) if status.status == ExportStatus.SUCCESS: @@ -172,7 +159,6 @@ async def _execute() -> Union[ExportCreateReturn, ExportStatusReturn]: self.get_status( export_id=export_id, backend=backend, - config=config, ) ) if status.status == ExportStatus.SUCCESS: @@ -188,14 +174,12 @@ def get_status( self, export_id: str, backend: ExportStorage, - config: Optional[ExportConfig] = None, ) -> executor.Result[ExportStatusReturn]: """Check the status of an export. Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - config: The configuration of the export (path). By default None. Returns: An `ExportStatusReturn` object that contains the export status response. @@ -206,9 +190,6 @@ def get_status( ) url_path = f"/export/{backend.value}/{export_id}" - params: Dict[str, str] = {} - if config is not None and config.path is not None: - params["path"] = config.path def resp(res: Response) -> ExportStatusReturn: typed_response = _decode_json_response_dict(res, "Export status check") @@ -220,7 +201,6 @@ def resp(res: Response) -> ExportStatusReturn: response_callback=resp, method=self._connection.get, path=url_path, - params=params, error_msg="Export status check failed due to connection error.", ) @@ -228,14 +208,12 @@ def cancel( self, export_id: str, backend: ExportStorage, - config: Optional[ExportConfig] = None, ) -> executor.Result[bool]: """Cancel a running export. Args: export_id: The identifier name of the export. backend: The backend storage where the export was created. - config: The configuration of the export (path). By default None. Returns: True if the export was cancelled, False if the export had already finished. @@ -245,9 +223,6 @@ def cancel( backend=backend, ) url_path = f"/export/{backend.value}/{export_id}" - params: Dict[str, str] = {} - if config is not None and config.path is not None: - params["path"] = config.path def resp(res: Response) -> bool: if res.status_code == 204: @@ -261,7 +236,6 @@ def resp(res: Response) -> bool: response_callback=resp, method=self._connection.delete, path=url_path, - params=params, error_msg="Export cancel failed due to connection error.", status_codes=_ExpectedStatusCodes(ok_in=[204, 409], error="cancel export"), ) diff --git a/weaviate/export/export.py b/weaviate/export/export.py index e41704a0f..3cdc7e945 100644 --- a/weaviate/export/export.py +++ b/weaviate/export/export.py @@ -48,12 +48,6 @@ class ShardExportStatus(str, Enum): SKIPPED = "SKIPPED" -class ExportConfig(BaseModel): - """Configuration for where to write the export.""" - - path: Optional[str] = None - - class ShardProgress(BaseModel): """Progress of a single shard export.""" diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index cd8325b39..8b1820581 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -1,8 +1,7 @@ -from typing import List, Literal, Optional, Union, overload +from typing import List, Literal, Union, overload from weaviate.connect.v4 import ConnectionSync from weaviate.export.export import ( - ExportConfig, ExportCreateReturn, ExportFileFormat, ExportStatusReturn, @@ -22,7 +21,6 @@ class _Export(_ExportExecutor[ConnectionSync]): exclude_collections: Union[List[str], str, None] = None, *, wait_for_completion: Literal[True], - config: Optional[ExportConfig] = None, ) -> ExportStatusReturn: ... @overload def create( @@ -33,11 +31,6 @@ class _Export(_ExportExecutor[ConnectionSync]): include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, wait_for_completion: Literal[False] = False, - config: Optional[ExportConfig] = None, ) -> ExportCreateReturn: ... - def get_status( - self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None - ) -> ExportStatusReturn: ... - def cancel( - self, export_id: str, backend: ExportStorage, config: Optional[ExportConfig] = None - ) -> bool: ... + def get_status(self, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + def cancel(self, export_id: str, backend: ExportStorage) -> bool: ... From 2c749671e989079eb2bf2b626bf007925393fb9c Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 13 Apr 2026 06:38:39 +0200 Subject: [PATCH 64/99] Self-review of changes --- integration/test_export.py | 2 +- weaviate/export/executor.py | 23 +++++++++++++++++++++-- weaviate/export/export.py | 16 ++-------------- weaviate/export/sync.pyi | 4 +++- weaviate/outputs/export.py | 2 ++ 5 files changed, 29 insertions(+), 18 deletions(-) diff --git a/integration/test_export.py b/integration/test_export.py index 7748740c4..b393d153e 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -201,7 +201,7 @@ def test_fail_on_both_include_and_exclude( ) -> None: """Fail when both include and exclude collections are set.""" export_id = unique_export_id(request.node.name) - with pytest.raises(TypeError): + with pytest.raises(ValueError): client.export.create( export_id=export_id, backend=BACKEND, diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 18f2f9f72..84d2651f6 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -6,6 +6,7 @@ from httpx import Response +from weaviate.backup.backup import STORAGE_NAMES from weaviate.connect import executor from weaviate.connect.v4 import ( Connection, @@ -17,9 +18,9 @@ EmptyResponseException, ExportCanceledError, ExportFailedError, + WeaviateUnsupportedFeatureError, ) from weaviate.export.export import ( - STORAGE_NAMES, ExportCreateReturn, ExportFileFormat, ExportStatus, @@ -87,6 +88,12 @@ def create( weaviate.exceptions.UnexpectedStatusCodeError: If weaviate reports a non-OK status. TypeError: One of the arguments have a wrong type. """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) ( export_id, backend, @@ -184,6 +191,12 @@ def get_status( Returns: An `ExportStatusReturn` object that contains the export status response. """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) export_id, backend = _get_and_validate_get_status( export_id=export_id, backend=backend, @@ -218,6 +231,12 @@ def cancel( Returns: True if the export was cancelled, False if the export had already finished. """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) export_id, backend = _get_and_validate_get_status( export_id=export_id, backend=backend, @@ -286,7 +305,7 @@ def _get_and_validate_create_arguments( exclude_classes = [] if include_classes and exclude_classes: - raise TypeError( + raise ValueError( "Either 'include_collections' OR 'exclude_collections' can be set, not both." ) diff --git a/weaviate/export/export.py b/weaviate/export/export.py index 3cdc7e945..fe94afa5c 100644 --- a/weaviate/export/export.py +++ b/weaviate/export/export.py @@ -6,21 +6,9 @@ from pydantic import BaseModel, Field -STORAGE_NAMES = { - "filesystem", - "s3", - "gcs", - "azure", -} +from weaviate.backup.backup import BackupStorage - -class ExportStorage(str, Enum): - """Which backend should be used to write the export to.""" - - FILESYSTEM = "filesystem" - S3 = "s3" - GCS = "gcs" - AZURE = "azure" +ExportStorage = BackupStorage class ExportFileFormat(str, Enum): diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index 8b1820581..c9caee127 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -1,6 +1,8 @@ from typing import List, Literal, Union, overload -from weaviate.connect.v4 import ConnectionSync +from weaviate.connect.v4 import ( + ConnectionSync, +) from weaviate.export.export import ( ExportCreateReturn, ExportFileFormat, diff --git a/weaviate/outputs/export.py b/weaviate/outputs/export.py index 531b715c4..de0554de4 100644 --- a/weaviate/outputs/export.py +++ b/weaviate/outputs/export.py @@ -2,6 +2,7 @@ ExportCreateReturn, ExportStatus, ExportStatusReturn, + ExportStorage, ShardExportStatus, ShardProgress, ) @@ -10,6 +11,7 @@ "ExportCreateReturn", "ExportStatus", "ExportStatusReturn", + "ExportStorage", "ShardExportStatus", "ShardProgress", ] From ed9f28879a7a1454ca02122d73a96677749fd70f Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 13 Apr 2026 06:59:34 +0200 Subject: [PATCH 65/99] Review fixes --- integration/test_export.py | 3 +-- weaviate/client.pyi | 3 +++ weaviate/export/executor.py | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/integration/test_export.py b/integration/test_export.py index b393d153e..b885e9588 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -168,8 +168,7 @@ def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> ) assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] - result = client.export.cancel(export_id=export_id, backend=BACKEND) - assert result is True + client.export.cancel(export_id=export_id, backend=BACKEND) # verify it's cancelled or already completed (race condition) start = time.time() diff --git a/weaviate/client.pyi b/weaviate/client.pyi index 9b32af15f..0ac79415c 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -20,6 +20,7 @@ from .backup import _Backup, _BackupAsync from .cluster import _Cluster, _ClusterAsync from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync from .debug import _Debug, _DebugAsync +from .export import _Export, _ExportAsync from .rbac import _Roles, _RolesAsync from .types import NUMBER @@ -29,6 +30,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]): _connection: ConnectionAsync alias: _AliasAsync backup: _BackupAsync + export: _ExportAsync batch: _BatchClientWrapperAsync collections: _CollectionsAsync cluster: _ClusterAsync @@ -52,6 +54,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]): _connection: ConnectionSync alias: _Alias backup: _Backup + export: _Export batch: _BatchClientWrapper collections: _Collections cluster: _Cluster diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 84d2651f6..6d772f969 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -82,7 +82,8 @@ def create( wait_for_completion: Whether to wait until the export is done. By default False. Returns: - An `ExportCreateReturn` object that contains the export creation response. + An `ExportCreateReturn` when `wait_for_completion=False`, or an `ExportStatusReturn` + when `wait_for_completion=True` and the export completes successfully. Raises: weaviate.exceptions.UnexpectedStatusCodeError: If weaviate reports a non-OK status. @@ -272,7 +273,7 @@ def _get_and_validate_create_arguments( if isinstance(backend, str): try: backend = ExportStorage(backend.lower()) - except KeyError: + except ValueError: raise ValueError( f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." ) @@ -323,7 +324,7 @@ def _get_and_validate_get_status( if isinstance(backend, str): try: backend = ExportStorage(backend.lower()) - except KeyError: + except ValueError: raise ValueError( f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." ) From e9192f85f742011fb1dab7019ec15e71c730da61 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Mon, 13 Apr 2026 07:43:59 +0200 Subject: [PATCH 66/99] Add version guard for export integration tests --- integration/test_export.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration/test_export.py b/integration/test_export.py index b885e9588..b72166998 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -36,6 +36,9 @@ @pytest.fixture(scope="module") def client() -> Generator[weaviate.WeaviateClient, None, None]: client = weaviate.connect_to_local() + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + client.close() + pytest.skip("Collection export is not supported in versions lower than 1.37.0") client.collections.delete(COLLECTION_NAME) col = client.collections.create( From 338195add4a518d02137c9f43ec4edd47f36f96d Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 14 Apr 2026 07:50:33 +0200 Subject: [PATCH 67/99] Update to latest image --- .github/workflows/main.yaml | 2 +- ci/docker-compose.yml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d3dfd1fa4..a7d6cca4d 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.16-efdedfa WEAVIATE_136: 1.36.9-d905e6c - WEAVIATE_137: 1.37.0-rc.1-578c4eb + WEAVIATE_137: 1.37.0-rc.1-bc3891e jobs: lint-and-format: diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 20840fafa..ddc92d0ed 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -31,6 +31,8 @@ services: DISABLE_LAZY_LOAD_SHARDS: 'true' GRPC_MAX_MESSAGE_SIZE: 100000000 # 100mb OBJECTS_TTL_DELETE_SCHEDULE: "@every 12h" # for objectTTL tests to work + EXPORT_ENABLED: 'true' + EXPORT_DEFAULT_PATH: "/var/lib/weaviate/exports" contextionary: environment: From 90840ce1c8c5e0b6b7a46129f72efd58c17d69a2 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 14 Apr 2026 08:04:40 +0200 Subject: [PATCH 68/99] Lowercase export ID --- weaviate/export/executor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 6d772f969..52d073e0a 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -270,6 +270,7 @@ def _get_and_validate_create_arguments( ) -> Tuple[str, ExportStorage, List[str], List[str]]: if not isinstance(export_id, str): raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + export_id = export_id.lower() if isinstance(backend, str): try: backend = ExportStorage(backend.lower()) @@ -321,6 +322,7 @@ def _get_and_validate_get_status( ) -> Tuple[str, ExportStorage]: if not isinstance(export_id, str): raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + export_id = export_id.lower() if isinstance(backend, str): try: backend = ExportStorage(backend.lower()) From 584f8a68bb2e495e950334fff805b6da112173ce Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Wed, 15 Apr 2026 07:47:55 +0200 Subject: [PATCH 69/99] Enforce kwargs for export --- weaviate/collections/collections/executor.py | 2 +- weaviate/collections/config/executor.py | 2 +- weaviate/export/async_.pyi | 7 ++++--- weaviate/export/executor.py | 6 +++++- weaviate/export/sync.pyi | 7 ++++--- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index 644a0c844..2a733356c 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -44,13 +44,13 @@ _check_references_generic, ) from weaviate.collections.collection import Collection, CollectionAsync +from weaviate.collections.config.executor import _any_property_has_text_analyzer from weaviate.connect import executor from weaviate.connect.v4 import ( ConnectionAsync, ConnectionType, _ExpectedStatusCodes, ) -from weaviate.collections.config.executor import _any_property_has_text_analyzer from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.util import _capitalize_first_letter, _decode_json_response_dict from weaviate.validator import _validate_input, _ValidateArgument diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index bd302c3c4..103ab70ac 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -5,8 +5,8 @@ Generic, List, Literal, - Sequence, Optional, + Sequence, Tuple, Union, cast, diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi index 4987a0cc6..4f1b82ced 100644 --- a/weaviate/export/async_.pyi +++ b/weaviate/export/async_.pyi @@ -14,17 +14,18 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): @overload async def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, - *, wait_for_completion: Literal[True], ) -> ExportStatusReturn: ... @overload async def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, @@ -32,5 +33,5 @@ class _ExportAsync(_ExportExecutor[ConnectionAsync]): exclude_collections: Union[List[str], str, None] = None, wait_for_completion: Literal[False] = False, ) -> ExportCreateReturn: ... - async def get_status(self, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... - async def cancel(self, export_id: str, backend: ExportStorage) -> bool: ... + async def get_status(self, *, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + async def cancel(self, *, export_id: str, backend: ExportStorage) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py index 52d073e0a..516273c73 100644 --- a/weaviate/export/executor.py +++ b/weaviate/export/executor.py @@ -40,18 +40,19 @@ def __init__(self, connection: Connection): @overload def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, - *, wait_for_completion: Literal[True], ) -> executor.Result[ExportStatusReturn]: ... @overload def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, @@ -62,6 +63,7 @@ def create( def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, @@ -180,6 +182,7 @@ async def _execute() -> Union[ExportCreateReturn, ExportStatusReturn]: def get_status( self, + *, export_id: str, backend: ExportStorage, ) -> executor.Result[ExportStatusReturn]: @@ -220,6 +223,7 @@ def resp(res: Response) -> ExportStatusReturn: def cancel( self, + *, export_id: str, backend: ExportStorage, ) -> executor.Result[bool]: diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi index c9caee127..afe7284e2 100644 --- a/weaviate/export/sync.pyi +++ b/weaviate/export/sync.pyi @@ -16,17 +16,18 @@ class _Export(_ExportExecutor[ConnectionSync]): @overload def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, include_collections: Union[List[str], str, None] = None, exclude_collections: Union[List[str], str, None] = None, - *, wait_for_completion: Literal[True], ) -> ExportStatusReturn: ... @overload def create( self, + *, export_id: str, backend: ExportStorage, file_format: ExportFileFormat, @@ -34,5 +35,5 @@ class _Export(_ExportExecutor[ConnectionSync]): exclude_collections: Union[List[str], str, None] = None, wait_for_completion: Literal[False] = False, ) -> ExportCreateReturn: ... - def get_status(self, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... - def cancel(self, export_id: str, backend: ExportStorage) -> bool: ... + def get_status(self, *, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + def cancel(self, *, export_id: str, backend: ExportStorage) -> bool: ... From 96ca193e7507b0e40f6b3865e4afbdbb7af2d7b0 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Wed, 15 Apr 2026 08:13:28 +0200 Subject: [PATCH 70/99] Fix tests --- integration/test_export.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/test_export.py b/integration/test_export.py index b72166998..71e3a146f 100644 --- a/integration/test_export.py +++ b/integration/test_export.py @@ -104,7 +104,7 @@ def test_create_export_without_waiting( break time.sleep(0.1) - assert status.export_id == export_id + assert status.export_id.lower() == export_id.lower() def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) -> None: @@ -121,7 +121,7 @@ def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) status = client.export.get_status(export_id=export_id, backend=BACKEND) assert status.status == ExportStatus.SUCCESS - assert status.export_id == export_id + assert status.export_id.lower() == export_id.lower() assert status.backend == BACKEND.value From 594e8ee7c8d38f995ed4ce24e942be07a98be51d Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Wed, 15 Apr 2026 08:38:09 +0200 Subject: [PATCH 71/99] Fix tests --- integration/test_collection_query_profile.py | 87 ++++++++++---------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py index 92687129d..3b57e27b9 100644 --- a/integration/test_collection_query_profile.py +++ b/integration/test_collection_query_profile.py @@ -1,12 +1,14 @@ import re +from typing import Any import pytest -import weaviate -from weaviate.collections.classes.config import Configure, DataType, Property +from weaviate.collections import Collection +from weaviate.collections.classes.config import DataType, Property from weaviate.collections.classes.data import DataObject -from weaviate.collections.classes.grpc import MetadataQuery +from weaviate.collections.classes.grpc import GroupBy, MetadataQuery from weaviate.collections.classes.internal import SearchProfileReturn +from integration.conftest import CollectionFactory GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)") @@ -28,26 +30,12 @@ def assert_common_profile(profile: SearchProfileReturn) -> None: assert isinstance(value, str) and value != "" -@pytest.fixture(scope="module") -def client(): - client = weaviate.connect_to_local() - yield client - client.close() - - -@pytest.fixture(scope="module") -def collection_with_data(client: weaviate.WeaviateClient): - if client._connection._weaviate_version.is_lower_than(1, 36, 9): - pytest.skip("Query profiling requires Weaviate >= 1.36.9") - name = "TestQueryProfile" - client.collections.delete(name) - collection = client.collections.create( - name=name, - vectorizer_config=Configure.Vectorizer.none(), - properties=[ - Property(name="text", data_type=DataType.TEXT), - ], +def _create_and_populate(collection_factory: CollectionFactory) -> Collection[Any, Any]: + collection = collection_factory( + properties=[Property(name="text", data_type=DataType.TEXT)], ) + if collection._connection._weaviate_version.is_lower_than(1, 36, 9): + pytest.skip("Query profiling requires Weaviate >= 1.36.9") collection.data.insert_many( [ DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]), @@ -55,13 +43,13 @@ def collection_with_data(client: weaviate.WeaviateClient): DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]), ] ) - yield collection - client.collections.delete(name) + return collection -def test_fetch_objects_with_query_profile(collection_with_data): +def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) -> None: """Test that query profiling works with fetch_objects (object lookup).""" - result = collection_with_data.query.fetch_objects( + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( return_metadata=MetadataQuery(query_profile=True), ) assert len(result.objects) == 3 @@ -76,9 +64,10 @@ def test_fetch_objects_with_query_profile(collection_with_data): assert_common_profile(shard.searches["object"]) -def test_near_vector_with_query_profile(collection_with_data): +def test_near_vector_with_query_profile(collection_factory: CollectionFactory) -> None: """Test that query profiling works with near_vector search.""" - result = collection_with_data.query.near_vector( + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( near_vector=[1.0, 0.0, 0.0], return_metadata=MetadataQuery(query_profile=True, distance=True), limit=2, @@ -107,9 +96,10 @@ def test_near_vector_with_query_profile(collection_with_data): assert_go_duration(vector_profile.details["objects_took"], "objects_took") -def test_bm25_with_query_profile(collection_with_data): +def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None: """Test that query profiling works with BM25 keyword search.""" - result = collection_with_data.query.bm25( + collection = _create_and_populate(collection_factory) + result = collection.query.bm25( query="hello", return_metadata=MetadataQuery(query_profile=True, score=True), ) @@ -135,9 +125,10 @@ def test_bm25_with_query_profile(collection_with_data): assert int(keyword_profile.details["kwd_6_res_count"]) >= 0 -def test_hybrid_with_query_profile(collection_with_data): +def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> None: """Test that query profiling works with hybrid search (both vector and keyword).""" - result = collection_with_data.query.hybrid( + collection = _create_and_populate(collection_factory) + result = collection.query.hybrid( query="hello", vector=[1.0, 0.0, 0.0], return_metadata=MetadataQuery(query_profile=True), @@ -157,11 +148,12 @@ def test_hybrid_with_query_profile(collection_with_data): assert "kwd_method" in shard.searches["keyword"].details -def test_near_vector_group_by_with_query_profile(collection_with_data): - """Test that query profiling works with group_by (mirrors C# QueryProfiling_NearText_GroupBy_Returns_Profile).""" - from weaviate.collections.classes.grpc import GroupBy - - result = collection_with_data.query.near_vector( +def test_near_vector_group_by_with_query_profile( + collection_factory: CollectionFactory, +) -> None: + """Test that query profiling works with group_by.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( near_vector=[1.0, 0.0, 0.0], return_metadata=MetadataQuery(query_profile=True), group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3), @@ -174,17 +166,23 @@ def test_near_vector_group_by_with_query_profile(collection_with_data): assert_common_profile(shard.searches["vector"]) -def test_no_query_profile_when_not_requested(collection_with_data): +def test_no_query_profile_when_not_requested( + collection_factory: CollectionFactory, +) -> None: """Test that query_profile is None when not requested.""" - result = collection_with_data.query.fetch_objects( + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( return_metadata=MetadataQuery(distance=True), ) assert result.query_profile is None -def test_query_profile_with_metadata_list(collection_with_data): +def test_query_profile_with_metadata_list( + collection_factory: CollectionFactory, +) -> None: """Test that query profiling works when using list-style metadata.""" - result = collection_with_data.query.near_vector( + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( near_vector=[1.0, 0.0, 0.0], return_metadata=["query_profile", "distance"], limit=2, @@ -197,9 +195,12 @@ def test_query_profile_with_metadata_list(collection_with_data): assert_common_profile(shard.searches["vector"]) -def test_query_profile_details_are_strings(collection_with_data): +def test_query_profile_details_are_strings( + collection_factory: CollectionFactory, +) -> None: """Test that all detail keys and values are non-empty strings.""" - result = collection_with_data.query.near_vector( + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( near_vector=[1.0, 0.0, 0.0], return_metadata=MetadataQuery(query_profile=True), limit=1, From b83a9488ca83e93846ee1f0714e1a3b37d95ee4a Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Wed, 15 Apr 2026 09:17:27 +0200 Subject: [PATCH 72/99] Add negative assertions --- integration/test_collection_query_profile.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py index 3b57e27b9..0c68c9e69 100644 --- a/integration/test_collection_query_profile.py +++ b/integration/test_collection_query_profile.py @@ -61,6 +61,8 @@ def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) assert shard.node != "" assert "object" in shard.searches + assert "vector" not in shard.searches + assert "keyword" not in shard.searches assert_common_profile(shard.searches["object"]) @@ -78,6 +80,8 @@ def test_near_vector_with_query_profile(collection_factory: CollectionFactory) - shard = result.query_profile.shards[0] assert "vector" in shard.searches + assert "keyword" not in shard.searches + assert "object" not in shard.searches vector_profile = shard.searches["vector"] assert_common_profile(vector_profile) @@ -108,6 +112,8 @@ def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None: shard = result.query_profile.shards[0] assert "keyword" in shard.searches + assert "vector" not in shard.searches + assert "object" not in shard.searches keyword_profile = shard.searches["keyword"] assert_common_profile(keyword_profile) @@ -140,6 +146,7 @@ def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> Non shard = result.query_profile.shards[0] assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile" assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile" + assert "object" not in shard.searches assert_common_profile(shard.searches["vector"]) assert "vector_search_took" in shard.searches["vector"].details From 8b2caaf7356223370b3eba7665d7c5e59c685be1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 10:39:17 +0100 Subject: [PATCH 73/99] refactor: names don't shadow existing --- integration/test_tokenize.py | 38 +++++++++---------- weaviate/__init__.py | 4 +- weaviate/client.py | 6 +-- weaviate/client.pyi | 6 +-- weaviate/tokenization/__init__.py | 7 ++++ weaviate/tokenization/async_.py | 8 ++++ .../{tokenize => tokenization}/executor.py | 6 +-- .../types.py => tokenization/models.py} | 0 weaviate/tokenization/sync.py | 8 ++++ weaviate/tokenize/__init__.py | 7 ---- weaviate/tokenize/async_.py | 8 ---- weaviate/tokenize/sync.py | 8 ---- 12 files changed, 53 insertions(+), 53 deletions(-) create mode 100644 weaviate/tokenization/__init__.py create mode 100644 weaviate/tokenization/async_.py rename weaviate/{tokenize => tokenization}/executor.py (97%) rename weaviate/{tokenize/types.py => tokenization/models.py} (100%) create mode 100644 weaviate/tokenization/sync.py delete mode 100644 weaviate/tokenize/__init__.py delete mode 100644 weaviate/tokenize/async_.py delete mode 100644 weaviate/tokenize/sync.py diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index e54f9d49d..b3ecff875 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -22,7 +22,7 @@ _TextAnalyzerConfigCreate, ) from weaviate.config import AdditionalConfig -from weaviate.tokenize.types import TokenizeResult +from weaviate.tokenization.models import TokenizeResult @pytest.fixture(scope="module") @@ -69,19 +69,19 @@ def test_tokenization_enum( text: str, expected_tokens: list, ) -> None: - result = client.tokenize.text(text=text, tokenization=tokenization) + result = client.tokenization.text(text=text, tokenization=tokenization) assert isinstance(result, TokenizeResult) assert result.tokenization == tokenization.value assert result.indexed == expected_tokens assert result.query == expected_tokens def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text(text="hello world", tokenization="word") + result = client.tokenization.text(text="hello world", tokenization="word") assert result.tokenization == "word" assert result.indexed == ["hello", "world"] def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="The quick brown fox", tokenization=Tokenization.WORD, stopword_preset=StopwordsPreset.EN, @@ -90,7 +90,7 @@ def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: assert "quick" in result.query def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="The quick brown fox", tokenization=Tokenization.WORD, stopword_preset="en", @@ -98,7 +98,7 @@ def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: assert "the" not in result.query def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="L'école est fermée", tokenization=Tokenization.WORD, ascii_fold=True, @@ -107,7 +107,7 @@ def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None: def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None: cfg = _TextAnalyzerConfigCreate(ascii_fold=True) - result = client.tokenize.text( + result = client.tokenization.text( text="L'école est fermée", tokenization=Tokenization.WORD, analyzer_config=cfg, @@ -121,12 +121,12 @@ def test_analyzer_config_and_kwargs_produce_same_result( cfg = _TextAnalyzerConfigCreate( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN ) - via_config = client.tokenize.text( + via_config = client.tokenization.text( text="L'école est fermée", tokenization=Tokenization.WORD, analyzer_config=cfg, ) - via_kwargs = client.tokenize.text( + via_kwargs = client.tokenization.text( text="L'école est fermée", tokenization=Tokenization.WORD, ascii_fold=True, @@ -137,7 +137,7 @@ def test_analyzer_config_and_kwargs_produce_same_result( assert via_config.query == via_kwargs.query def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="hello world test", tokenization=Tokenization.WORD, stopword_preset="custom", @@ -149,7 +149,7 @@ def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) - assert result.query == ["hello", "world"] def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="the quick", tokenization=Tokenization.WORD, stopword_preset="en-no-the", @@ -172,13 +172,13 @@ class TestDeserialization: """Verify the client correctly deserializes response fields into typed objects.""" def test_result_type(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD) + result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) assert isinstance(result, TokenizeResult) assert isinstance(result.indexed, list) assert isinstance(result.query, list) def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text( + result = client.tokenization.text( text="L'école", tokenization=Tokenization.WORD, ascii_fold=True, @@ -191,7 +191,7 @@ def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> assert result.analyzer_config.stopword_preset == "en" def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenize.text(text="hello", tokenization=Tokenization.WORD) + result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) assert result.analyzer_config is None def test_stopword_config_deserialized_on_property( @@ -214,7 +214,7 @@ def test_stopword_config_deserialized_on_property( ], } ) - result = client.tokenize.property( + result = client.tokenization.for_property( collection_name="TestDeserStopword", property_name="title", text="the quick", @@ -243,7 +243,7 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: ], } ) - result = client.tokenize.property( + result = client.tokenization.for_property( collection_name="TestDeserPropTypes", property_name="tag", text=" Hello World ", @@ -302,7 +302,7 @@ class TestAsyncClient: @pytest.mark.asyncio async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: - result = await async_client.tokenize.text( + result = await async_client.tokenization.text( text="The quick brown fox", tokenization=Tokenization.WORD, ) @@ -314,7 +314,7 @@ async def test_text_with_analyzer_config( self, async_client: weaviate.WeaviateAsyncClient ) -> None: cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN) - result = await async_client.tokenize.text( + result = await async_client.tokenization.text( text="L'école est fermée", tokenization=Tokenization.WORD, analyzer_config=cfg, @@ -341,7 +341,7 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien ], } ) - result = await async_client.tokenize.property( + result = await async_client.tokenization.for_property( collection_name="TestAsyncPropTokenize", property_name="title", text="The quick brown fox", diff --git a/weaviate/__init__.py b/weaviate/__init__.py index 6fd9368ea..2e7e5e58b 100644 --- a/weaviate/__init__.py +++ b/weaviate/__init__.py @@ -21,7 +21,7 @@ embedded, exceptions, outputs, - tokenize, + tokenization, types, ) from .client import Client, WeaviateAsyncClient, WeaviateClient @@ -68,7 +68,7 @@ "embedded", "exceptions", "outputs", - "tokenize", + "tokenization", "types", "use_async_with_custom", "use_async_with_embedded", diff --git a/weaviate/client.py b/weaviate/client.py index cbd12be9a..10ce4c77b 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -22,7 +22,7 @@ from .embedded import EmbeddedOptions from .groups import _Groups, _GroupsAsync from .rbac import _Roles, _RolesAsync -from .tokenize import _Tokenize, _TokenizeAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER from .users import _Users, _UsersAsync @@ -83,7 +83,7 @@ def __init__( self.debug = _DebugAsync(self._connection) self.groups = _GroupsAsync(self._connection) self.roles = _RolesAsync(self._connection) - self.tokenize = _TokenizeAsync(self._connection) + self.tokenization = _TokenizationAsync(self._connection) self.users = _UsersAsync(self._connection) async def __aenter__(self) -> "WeaviateAsyncClient": @@ -159,7 +159,7 @@ def __init__( self.debug = _Debug(self._connection) self.groups = _Groups(self._connection) self.roles = _Roles(self._connection) - self.tokenize = _Tokenize(self._connection) + self.tokenization = _Tokenization(self._connection) self.users = _Users(self._connection) def __enter__(self) -> "WeaviateClient": diff --git a/weaviate/client.pyi b/weaviate/client.pyi index a6a44f8f7..8fafdc3d1 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -21,7 +21,7 @@ from .cluster import _Cluster, _ClusterAsync from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync from .debug import _Debug, _DebugAsync from .rbac import _Roles, _RolesAsync -from .tokenize import _Tokenize, _TokenizeAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER] @@ -36,7 +36,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]): debug: _DebugAsync groups: _GroupsAsync roles: _RolesAsync - tokenize: _TokenizeAsync + tokenization: _TokenizationAsync users: _UsersAsync async def close(self) -> None: ... @@ -60,7 +60,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]): debug: _Debug groups: _Groups roles: _Roles - tokenize: _Tokenize + tokenization: _Tokenization users: _Users def close(self) -> None: ... diff --git a/weaviate/tokenization/__init__.py b/weaviate/tokenization/__init__.py new file mode 100644 index 000000000..2437f7745 --- /dev/null +++ b/weaviate/tokenization/__init__.py @@ -0,0 +1,7 @@ +"""Module for tokenization operations.""" + +from .async_ import _TokenizationAsync +from .sync import _Tokenization +from .models import TokenizeResult + +__all__ = ["_Tokenization", "_TokenizationAsync", "TokenizeResult"] diff --git a/weaviate/tokenization/async_.py b/weaviate/tokenization/async_.py new file mode 100644 index 000000000..5406a39dd --- /dev/null +++ b/weaviate/tokenization/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("async") +class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): + pass diff --git a/weaviate/tokenize/executor.py b/weaviate/tokenization/executor.py similarity index 97% rename from weaviate/tokenize/executor.py rename to weaviate/tokenization/executor.py index bd2c24dc1..9ddf5f7ed 100644 --- a/weaviate/tokenize/executor.py +++ b/weaviate/tokenization/executor.py @@ -14,7 +14,7 @@ ) from weaviate.connect import executor from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes -from weaviate.tokenize.types import TokenizeResult +from weaviate.tokenization.models import TokenizeResult def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]: @@ -51,7 +51,7 @@ def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult: ) -class _TokenizeExecutor(Generic[ConnectionType]): +class _TokenizationExecutor(Generic[ConnectionType]): def __init__(self, connection: ConnectionType): self._connection = connection @@ -133,7 +133,7 @@ def resp(response: Response) -> TokenizeResult: status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), ) - def property( + def for_property( self, collection_name: str, property_name: str, diff --git a/weaviate/tokenize/types.py b/weaviate/tokenization/models.py similarity index 100% rename from weaviate/tokenize/types.py rename to weaviate/tokenization/models.py diff --git a/weaviate/tokenization/sync.py b/weaviate/tokenization/sync.py new file mode 100644 index 000000000..ab28cc98e --- /dev/null +++ b/weaviate/tokenization/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("sync") +class _Tokenization(_TokenizationExecutor[ConnectionSync]): + pass diff --git a/weaviate/tokenize/__init__.py b/weaviate/tokenize/__init__.py deleted file mode 100644 index d0c2883c5..000000000 --- a/weaviate/tokenize/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Module for tokenize operations.""" - -from .async_ import _TokenizeAsync -from .sync import _Tokenize -from .types import TokenizeResult - -__all__ = ["_Tokenize", "_TokenizeAsync", "TokenizeResult"] diff --git a/weaviate/tokenize/async_.py b/weaviate/tokenize/async_.py deleted file mode 100644 index a59c392ea..000000000 --- a/weaviate/tokenize/async_.py +++ /dev/null @@ -1,8 +0,0 @@ -from weaviate.connect import executor -from weaviate.connect.v4 import ConnectionAsync -from weaviate.tokenize.executor import _TokenizeExecutor - - -@executor.wrap("async") -class _TokenizeAsync(_TokenizeExecutor[ConnectionAsync]): - pass diff --git a/weaviate/tokenize/sync.py b/weaviate/tokenize/sync.py deleted file mode 100644 index 755c42559..000000000 --- a/weaviate/tokenize/sync.py +++ /dev/null @@ -1,8 +0,0 @@ -from weaviate.connect import executor -from weaviate.connect.v4 import ConnectionSync -from weaviate.tokenize.executor import _TokenizeExecutor - - -@executor.wrap("sync") -class _Tokenize(_TokenizeExecutor[ConnectionSync]): - pass From ede0b96477aa44f5de9fb964e8a48a84e126b408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 10:47:01 +0100 Subject: [PATCH 74/99] fix: add version gate --- integration/test_tokenize.py | 24 ++++++++++++++++++++++++ weaviate/tokenization/executor.py | 17 +++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index b3ecff875..47321aaf5 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -22,6 +22,7 @@ _TextAnalyzerConfigCreate, ) from weaviate.config import AdditionalConfig +from weaviate.exceptions import WeaviateUnsupportedFeatureError from weaviate.tokenization.models import TokenizeResult @@ -292,6 +293,29 @@ def test_empty_config_is_valid(self) -> None: assert cfg.stopwordPreset is None +# --------------------------------------------------------------------------- +# Version gate +# --------------------------------------------------------------------------- + + +class TestVersionGate: + """On Weaviate < 1.37 the client must raise before sending the request.""" + + def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + with pytest.raises(WeaviateUnsupportedFeatureError): + client.tokenization.text(text="hello", tokenization=Tokenization.WORD) + + def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + with pytest.raises(WeaviateUnsupportedFeatureError): + client.tokenization.for_property( + collection_name="Any", property_name="title", text="hello" + ) + + # --------------------------------------------------------------------------- # Async client # --------------------------------------------------------------------------- diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 9ddf5f7ed..de3f68061 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -14,6 +14,7 @@ ) from weaviate.connect import executor from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes +from weaviate.exceptions import WeaviateUnsupportedFeatureError from weaviate.tokenization.models import TokenizeResult @@ -55,6 +56,14 @@ class _TokenizationExecutor(Generic[ConnectionType]): def __init__(self, connection: ConnectionType): self._connection = connection + def _check_version(self) -> None: + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Tokenization", + str(self._connection._weaviate_version), + "1.37.0", + ) + def text( self, text: str, @@ -87,7 +96,11 @@ def text( Returns: A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. """ + self._check_version() tokenization_str = ( tokenization.value if isinstance(tokenization, Tokenization) else tokenization ) @@ -148,7 +161,11 @@ def for_property( Returns: A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. """ + self._check_version() path = f"/schema/{collection_name}/properties/{property_name}/tokenize" payload: Dict[str, Any] = {"text": text} From 8d379f4142222f7483d29042a9ec75f92c2e4cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 10:50:22 +0100 Subject: [PATCH 75/99] refactor: update tokenization type to use Tokenization enum in TokenizeResult and related tests --- integration/test_tokenize.py | 10 +++++----- weaviate/tokenization/executor.py | 2 +- weaviate/tokenization/models.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 47321aaf5..24b515f2c 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -72,13 +72,13 @@ def test_tokenization_enum( ) -> None: result = client.tokenization.text(text=text, tokenization=tokenization) assert isinstance(result, TokenizeResult) - assert result.tokenization == tokenization.value + assert result.tokenization == tokenization assert result.indexed == expected_tokens assert result.query == expected_tokens def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None: result = client.tokenization.text(text="hello world", tokenization="word") - assert result.tokenization == "word" + assert result.tokenization == Tokenization.WORD assert result.indexed == ["hello", "world"] def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: @@ -221,7 +221,7 @@ def test_stopword_config_deserialized_on_property( text="the quick", ) assert isinstance(result, TokenizeResult) - assert result.tokenization == "word" + assert result.tokenization == Tokenization.WORD # Stopword config should be deserialized when present if result.stopword_config is not None: assert isinstance(result.stopword_config, StopwordsConfig) @@ -250,7 +250,7 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: text=" Hello World ", ) assert isinstance(result, TokenizeResult) - assert result.tokenization == "field" + assert result.tokenization == Tokenization.FIELD assert result.indexed == ["Hello World"] finally: client.collections.delete("TestDeserPropTypes") @@ -371,7 +371,7 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien text="The quick brown fox", ) assert isinstance(result, TokenizeResult) - assert result.tokenization == "word" + assert result.tokenization == Tokenization.WORD assert result.indexed == ["the", "quick", "brown", "fox"] assert "the" not in result.query assert "quick" in result.query diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index de3f68061..6228d7350 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -44,7 +44,7 @@ def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]: def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult: return TokenizeResult( - tokenization=body["tokenization"], + tokenization=Tokenization(body["tokenization"]), indexed=body["indexed"], query=body["query"], analyzer_config=_parse_analyzer_config(body), diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index ba4009b2d..ecb01f695 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -3,7 +3,7 @@ from dataclasses import dataclass, field from typing import List, Optional -from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig +from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig, Tokenization @dataclass @@ -18,7 +18,7 @@ class TokenizeResult: stopword_config: The stopword configuration that was used, if any. """ - tokenization: str + tokenization: Tokenization indexed: List[str] query: List[str] analyzer_config: Optional[TextAnalyzerConfig] = field(default=None) From 91a359a38a56b7b812997b3e8280be6ae1d7b71e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 11:04:13 +0100 Subject: [PATCH 76/99] refactor: models --- integration/test_tokenize.py | 91 +++++++++++++++---------------- weaviate/outputs/__init__.py | 15 ++++- weaviate/outputs/tokenization.py | 5 ++ weaviate/tokenization/executor.py | 78 ++------------------------ weaviate/tokenization/models.py | 47 +++++++++++++--- 5 files changed, 108 insertions(+), 128 deletions(-) create mode 100644 weaviate/outputs/tokenization.py diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 24b515f2c..ddd67b656 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -1,9 +1,10 @@ -"""Integration tests for the tokenize module. +"""Integration tests for the tokenization module. These tests cover the client's responsibilities: -- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, kwargs) +- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate) - Correct deserialization of responses into typed objects - Client-side validation (_TextAnalyzerConfigCreate rejects invalid input) +- Version gate (>= 1.37.0) - Both sync and async client paths """ @@ -46,7 +47,7 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: # --------------------------------------------------------------------------- -# Serialization: enums, strings, kwargs, _TextAnalyzerConfigCreate +# Serialization # --------------------------------------------------------------------------- @@ -76,72 +77,68 @@ def test_tokenization_enum( assert result.indexed == expected_tokens assert result.query == expected_tokens - def test_tokenization_string(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenization.text(text="hello world", tokenization="word") + def test_no_analyzer_config(self, client: weaviate.WeaviateClient) -> None: + result = client.tokenization.text(text="hello world", tokenization=Tokenization.WORD) assert result.tokenization == Tokenization.WORD assert result.indexed == ["hello", "world"] + assert result.analyzer_config is None - def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: + def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True) result = client.tokenization.text( - text="The quick brown fox", + text="L'école est fermée", tokenization=Tokenization.WORD, - stopword_preset=StopwordsPreset.EN, + analyzer_config=cfg, ) - assert "the" not in result.query - assert "quick" in result.query + assert result.indexed == ["l", "ecole", "est", "fermee"] - def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: + def test_ascii_fold_with_ignore(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é"]) result = client.tokenization.text( - text="The quick brown fox", + text="L'école est fermée", tokenization=Tokenization.WORD, - stopword_preset="en", + analyzer_config=cfg, ) - assert "the" not in result.query + assert result.indexed == ["l", "école", "est", "fermée"] - def test_ascii_fold_via_kwargs(self, client: weaviate.WeaviateClient) -> None: + def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN) result = client.tokenization.text( - text="L'école est fermée", + text="The quick brown fox", tokenization=Tokenization.WORD, - ascii_fold=True, + analyzer_config=cfg, ) - assert result.indexed == ["l", "ecole", "est", "fermee"] + assert "the" not in result.query + assert "quick" in result.query - def test_ascii_fold_via_analyzer_config(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True) + def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en") result = client.tokenization.text( - text="L'école est fermée", + text="The quick brown fox", tokenization=Tokenization.WORD, analyzer_config=cfg, ) - assert result.indexed == ["l", "ecole", "est", "fermee"] + assert "the" not in result.query - def test_analyzer_config_and_kwargs_produce_same_result( - self, client: weaviate.WeaviateClient - ) -> None: - """analyzer_config object and equivalent kwargs must produce identical output.""" + def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClient) -> None: cfg = _TextAnalyzerConfigCreate( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN ) - via_config = client.tokenization.text( - text="L'école est fermée", + result = client.tokenization.text( + text="The école est fermée", tokenization=Tokenization.WORD, analyzer_config=cfg, ) - via_kwargs = client.tokenization.text( - text="L'école est fermée", - tokenization=Tokenization.WORD, - ascii_fold=True, - ascii_fold_ignore=["é"], - stopword_preset=StopwordsPreset.EN, - ) - assert via_config.indexed == via_kwargs.indexed - assert via_config.query == via_kwargs.query + assert result.indexed == ["the", "école", "est", "fermée"] + assert "the" not in result.query + assert "école" in result.query - def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) -> None: + def test_stopword_presets_custom_additions(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="custom") result = client.tokenization.text( text="hello world test", tokenization=Tokenization.WORD, - stopword_preset="custom", + analyzer_config=cfg, stopword_presets={ "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None), }, @@ -150,10 +147,11 @@ def test_stopword_presets_serialization(self, client: weaviate.WeaviateClient) - assert result.query == ["hello", "world"] def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate(stopword_preset="en-no-the") result = client.tokenization.text( text="the quick", tokenization=Tokenization.WORD, - stopword_preset="en-no-the", + analyzer_config=cfg, stopword_presets={ "en-no-the": _StopwordsCreate( preset=StopwordsPreset.EN, additions=None, removals=["the"] @@ -165,7 +163,7 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate # --------------------------------------------------------------------------- -# Deserialization: typed response fields +# Deserialization # --------------------------------------------------------------------------- @@ -179,12 +177,13 @@ def test_result_type(self, client: weaviate.WeaviateClient) -> None: assert isinstance(result.query, list) def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None: + cfg = _TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN + ) result = client.tokenization.text( text="L'école", tokenization=Tokenization.WORD, - ascii_fold=True, - ascii_fold_ignore=["é"], - stopword_preset=StopwordsPreset.EN, + analyzer_config=cfg, ) assert isinstance(result.analyzer_config, TextAnalyzerConfig) assert result.analyzer_config.ascii_fold is True @@ -198,7 +197,6 @@ def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) def test_stopword_config_deserialized_on_property( self, client: weaviate.WeaviateClient ) -> None: - """Property endpoint returns stopwordConfig; verify it deserializes to StopwordsConfig.""" client.collections.delete("TestDeserStopword") try: client.collections.create_from_dict( @@ -222,7 +220,6 @@ def test_stopword_config_deserialized_on_property( ) assert isinstance(result, TokenizeResult) assert result.tokenization == Tokenization.WORD - # Stopword config should be deserialized when present if result.stopword_config is not None: assert isinstance(result.stopword_config, StopwordsConfig) finally: @@ -322,7 +319,7 @@ def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient class TestAsyncClient: - """Verify both text() and property() work through the async client.""" + """Verify both text() and for_property() work through the async client.""" @pytest.mark.asyncio async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py index 62193fc35..ba3cf894f 100644 --- a/weaviate/outputs/__init__.py +++ b/weaviate/outputs/__init__.py @@ -1,4 +1,16 @@ -from . import aggregate, backup, batch, cluster, config, data, query, replication, tenants, users +from . import ( + aggregate, + backup, + batch, + cluster, + config, + data, + query, + replication, + tenants, + tokenization, + users, +) __all__ = [ "aggregate", @@ -10,5 +22,6 @@ "query", "replication", "tenants", + "tokenization", "users", ] diff --git a/weaviate/outputs/tokenization.py b/weaviate/outputs/tokenization.py new file mode 100644 index 000000000..0854f8b0d --- /dev/null +++ b/weaviate/outputs/tokenization.py @@ -0,0 +1,5 @@ +from weaviate.tokenization.models import TokenizeResult + +__all__ = [ + "TokenizeResult", +] diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 6228d7350..226aeb6c6 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -1,13 +1,10 @@ """Tokenize executor.""" -from typing import Any, Dict, Generic, List, Optional, Union +from typing import Any, Dict, Generic, Optional from httpx import Response from weaviate.collections.classes.config import ( - StopwordsConfig, - StopwordsPreset, - TextAnalyzerConfig, Tokenization, _StopwordsCreate, _TextAnalyzerConfigCreate, @@ -18,40 +15,6 @@ from weaviate.tokenization.models import TokenizeResult -def _parse_analyzer_config(body: Dict[str, Any]) -> Optional[TextAnalyzerConfig]: - ac = body.get("analyzerConfig") - if ac is None: - return None - if "asciiFold" not in ac and "stopwordPreset" not in ac: - return None - return TextAnalyzerConfig( - ascii_fold=ac.get("asciiFold", False), - ascii_fold_ignore=ac.get("asciiFoldIgnore"), - stopword_preset=ac.get("stopwordPreset"), - ) - - -def _parse_stopword_config(body: Dict[str, Any]) -> Optional[StopwordsConfig]: - sc = body.get("stopwordConfig") - if sc is None: - return None - return StopwordsConfig( - preset=StopwordsPreset(sc["preset"]) if sc.get("preset") else StopwordsPreset.NONE, - additions=sc.get("additions"), - removals=sc.get("removals"), - ) - - -def _parse_tokenize_result(body: Dict[str, Any]) -> TokenizeResult: - return TokenizeResult( - tokenization=Tokenization(body["tokenization"]), - indexed=body["indexed"], - query=body["query"], - analyzer_config=_parse_analyzer_config(body), - stopword_config=_parse_stopword_config(body), - ) - - class _TokenizationExecutor(Generic[ConnectionType]): def __init__(self, connection: ConnectionType): self._connection = connection @@ -67,29 +30,17 @@ def _check_version(self) -> None: def text( self, text: str, - tokenization: Union[Tokenization, str], + tokenization: Tokenization, *, analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, - ascii_fold: Optional[bool] = None, - ascii_fold_ignore: Optional[List[str]] = None, - stopword_preset: Optional[Union[StopwordsPreset, str]] = None, stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None, ) -> executor.Result[TokenizeResult]: """Tokenize text using the generic /v1/tokenize endpoint. - Analyzer settings can be provided either via a ``_TextAnalyzerConfigCreate`` - object **or** via the individual keyword arguments (``ascii_fold``, - ``ascii_fold_ignore``, ``stopword_preset``). If ``analyzer_config`` is - given the individual keyword arguments are ignored. - Args: text: The text to tokenize. tokenization: The tokenization method to use (e.g. Tokenization.WORD). - analyzer_config: A ``_TextAnalyzerConfigCreate`` instance that bundles - ascii_fold, ascii_fold_ignore, and stopword_preset settings. - ascii_fold: Whether to fold accented characters to ASCII equivalents. - ascii_fold_ignore: Characters to exclude from ASCII folding. - stopword_preset: Stopword preset name to apply for query-time filtering. + analyzer_config: Text analyzer settings (ASCII folding, stopword preset). stopword_presets: Custom stopword preset definitions, keyed by name. Each value is a ``_StopwordsCreate`` with optional preset, additions, and removals fields. @@ -101,33 +52,16 @@ def text( WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. """ self._check_version() - tokenization_str = ( - tokenization.value if isinstance(tokenization, Tokenization) else tokenization - ) payload: Dict[str, Any] = { "text": text, - "tokenization": tokenization_str, + "tokenization": tokenization.value, } if analyzer_config is not None: ac_dict = analyzer_config._to_dict() if ac_dict: payload["analyzerConfig"] = ac_dict - else: - ac: Dict[str, Any] = {} - if ascii_fold is not None: - ac["asciiFold"] = ascii_fold - if ascii_fold_ignore is not None: - ac["asciiFoldIgnore"] = ascii_fold_ignore - if stopword_preset is not None: - ac["stopwordPreset"] = ( - stopword_preset.value - if isinstance(stopword_preset, StopwordsPreset) - else stopword_preset - ) - if ac: - payload["analyzerConfig"] = ac if stopword_presets is not None: payload["stopwordPresets"] = { @@ -135,7 +69,7 @@ def text( } def resp(response: Response) -> TokenizeResult: - return _parse_tokenize_result(response.json()) + return TokenizeResult.model_validate(response.json()) return executor.execute( response_callback=resp, @@ -171,7 +105,7 @@ def for_property( payload: Dict[str, Any] = {"text": text} def resp(response: Response) -> TokenizeResult: - return _parse_tokenize_result(response.json()) + return TokenizeResult.model_validate(response.json()) return executor.execute( response_callback=resp, diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index ecb01f695..8bfa508f8 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -1,13 +1,18 @@ -"""Return types for tokenize operations.""" +"""Return types for tokenization operations.""" -from dataclasses import dataclass, field -from typing import List, Optional +from typing import Any, Dict, List, Optional -from weaviate.collections.classes.config import StopwordsConfig, TextAnalyzerConfig, Tokenization +from pydantic import BaseModel, ConfigDict, Field, field_validator +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsPreset, + TextAnalyzerConfig, + Tokenization, +) -@dataclass -class TokenizeResult: + +class TokenizeResult(BaseModel): """Result of a tokenization operation. Attributes: @@ -18,8 +23,34 @@ class TokenizeResult: stopword_config: The stopword configuration that was used, if any. """ + model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) + tokenization: Tokenization indexed: List[str] query: List[str] - analyzer_config: Optional[TextAnalyzerConfig] = field(default=None) - stopword_config: Optional[StopwordsConfig] = field(default=None) + analyzer_config: Optional[TextAnalyzerConfig] = Field(default=None, alias="analyzerConfig") + stopword_config: Optional[StopwordsConfig] = Field(default=None, alias="stopwordConfig") + + @field_validator("analyzer_config", mode="before") + @classmethod + def _parse_analyzer_config(cls, v: Optional[Dict[str, Any]]) -> Optional[TextAnalyzerConfig]: + if v is None: + return None + if "asciiFold" not in v and "stopwordPreset" not in v: + return None + return TextAnalyzerConfig( + ascii_fold=v.get("asciiFold", False), + ascii_fold_ignore=v.get("asciiFoldIgnore"), + stopword_preset=v.get("stopwordPreset"), + ) + + @field_validator("stopword_config", mode="before") + @classmethod + def _parse_stopword_config(cls, v: Optional[Dict[str, Any]]) -> Optional[StopwordsConfig]: + if v is None: + return None + return StopwordsConfig( + preset=StopwordsPreset(v["preset"]), + additions=v.get("additions"), + removals=v.get("removals"), + ) From 61665e712dac3d6e0665b9e5e8e7ae85d8e47144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 11:08:31 +0100 Subject: [PATCH 77/99] refactor: move tokenize property to class config --- integration/test_tokenize.py | 27 +++++++---------- weaviate/collections/config/executor.py | 40 +++++++++++++++++++++++++ weaviate/tokenization/executor.py | 36 ---------------------- 3 files changed, 50 insertions(+), 53 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index ddd67b656..565cb197d 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -213,11 +213,8 @@ def test_stopword_config_deserialized_on_property( ], } ) - result = client.tokenization.for_property( - collection_name="TestDeserStopword", - property_name="title", - text="the quick", - ) + col = client.collections.get("TestDeserStopword") + result = col.config.tokenize_property(property_name="title", text="the quick") assert isinstance(result, TokenizeResult) assert result.tokenization == Tokenization.WORD if result.stopword_config is not None: @@ -241,11 +238,8 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: ], } ) - result = client.tokenization.for_property( - collection_name="TestDeserPropTypes", - property_name="tag", - text=" Hello World ", - ) + col = client.collections.get("TestDeserPropTypes") + result = col.config.tokenize_property(property_name="tag", text=" Hello World ") assert isinstance(result, TokenizeResult) assert result.tokenization == Tokenization.FIELD assert result.indexed == ["Hello World"] @@ -304,13 +298,12 @@ def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> Non with pytest.raises(WeaviateUnsupportedFeatureError): client.tokenization.text(text="hello", tokenization=Tokenization.WORD) - def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: if client._connection._weaviate_version.is_at_least(1, 37, 0): pytest.skip("Version gate only applies to Weaviate < 1.37.0") + col = client.collections.get("Any") with pytest.raises(WeaviateUnsupportedFeatureError): - client.tokenization.for_property( - collection_name="Any", property_name="title", text="hello" - ) + col.config.tokenize_property(property_name="title", text="hello") # --------------------------------------------------------------------------- @@ -319,7 +312,7 @@ def test_for_property_raises_on_old_server(self, client: weaviate.WeaviateClient class TestAsyncClient: - """Verify both text() and for_property() work through the async client.""" + """Verify text() and tokenize_property() work through the async client.""" @pytest.mark.asyncio async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: @@ -362,8 +355,8 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien ], } ) - result = await async_client.tokenization.for_property( - collection_name="TestAsyncPropTokenize", + col = async_client.collections.get("TestAsyncPropTokenize") + result = await col.config.tokenize_property( property_name="title", text="The quick brown fox", ) diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index bd302c3c4..9e9390cda 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -56,6 +56,7 @@ WeaviateInvalidInputError, WeaviateUnsupportedFeatureError, ) +from weaviate.tokenization.models import TokenizeResult from weaviate.util import ( _capitalize_first_letter, _decode_json_response_dict, @@ -666,3 +667,42 @@ def resp(res: Response) -> bool: error_msg="Property may not exist", status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"), ) + + def tokenize_property( + self, + property_name: str, + text: str, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using a property's configured tokenization settings. + + Args: + property_name: The property name whose tokenization config to use. + text: The text to tokenize. + + Returns: + A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Tokenization", + str(self._connection._weaviate_version), + "1.37.0", + ) + + path = f"/schema/{self._name}/properties/{property_name}/tokenize" + payload: Dict[str, Any] = {"text": text} + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path=path, + weaviate_object=payload, + error_msg="Property tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), + ) diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 226aeb6c6..5093c14e9 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -79,39 +79,3 @@ def resp(response: Response) -> TokenizeResult: error_msg="Tokenization failed", status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), ) - - def for_property( - self, - collection_name: str, - property_name: str, - text: str, - ) -> executor.Result[TokenizeResult]: - """Tokenize text using a property's configured tokenization settings. - - Args: - collection_name: The collection (class) name. - property_name: The property name whose tokenization config to use. - text: The text to tokenize. - - Returns: - A TokenizeResult with indexed and query token lists. - - Raises: - WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. - """ - self._check_version() - path = f"/schema/{collection_name}/properties/{property_name}/tokenize" - - payload: Dict[str, Any] = {"text": text} - - def resp(response: Response) -> TokenizeResult: - return TokenizeResult.model_validate(response.json()) - - return executor.execute( - response_callback=resp, - method=self._connection.post, - path=path, - weaviate_object=payload, - error_msg="Property tokenization failed", - status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), - ) From aea03278f3ee5712608589138637d77131364955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 11:16:24 +0100 Subject: [PATCH 78/99] fix: remove trailing whitespace in __init__.py --- weaviate/outputs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py index 5381d6dc4..75cb031e0 100644 --- a/weaviate/outputs/__init__.py +++ b/weaviate/outputs/__init__.py @@ -9,7 +9,7 @@ query, replication, tenants, - tokenization, + tokenization, users, ) From ef55ce283b1aae518cb8eacc6dba7fdf6530b709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Wed, 15 Apr 2026 11:55:39 +0100 Subject: [PATCH 79/99] test: add version gate for Weaviate >= 1.37.0 in tokenization tests --- integration/test_tokenize.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 565cb197d..97587235b 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -36,6 +36,12 @@ def client() -> Generator[weaviate.WeaviateClient, None, None]: c.close() +@pytest.fixture(autouse=False) +def require_1_37(client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + + @pytest_asyncio.fixture async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: c = weaviate.use_async_with_local( @@ -51,6 +57,7 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: # --------------------------------------------------------------------------- +@pytest.mark.usefixtures("require_1_37") class TestSerialization: """Verify the client correctly serializes different input forms.""" @@ -167,6 +174,7 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate # --------------------------------------------------------------------------- +@pytest.mark.usefixtures("require_1_37") class TestDeserialization: """Verify the client correctly deserializes response fields into typed objects.""" @@ -311,6 +319,7 @@ def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateC # --------------------------------------------------------------------------- +@pytest.mark.usefixtures("require_1_37") class TestAsyncClient: """Verify text() and tokenize_property() work through the async client.""" From dff05f534c200662f7bf820cecb4efb77a5957ac Mon Sep 17 00:00:00 2001 From: Marcin Antas Date: Sat, 14 Mar 2026 12:03:38 +0100 Subject: [PATCH 80/99] feat: add support for blobHash property type --- test/collection/test_config.py | 8 ++++++++ weaviate/collections/classes/config.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 5a69b21c5..84bba4a63 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -1340,6 +1340,10 @@ def test_config_create_with_properties( name="blob", data_type=DataType.BLOB, ), + Property( + name="blob_hash", + data_type=DataType.BLOB_HASH, + ), Property( name="phone_number", data_type=DataType.PHONE_NUMBER, @@ -1403,6 +1407,10 @@ def test_config_create_with_properties( "dataType": ["blob"], "name": "blob", }, + { + "dataType": ["blobHash"], + "name": "blob_hash", + }, { "dataType": ["phoneNumber"], "name": "phone_number", diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 7874cd8e2..6d60482a3 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -148,6 +148,7 @@ class DataType(str, BaseEnum): UUID_ARRAY: UUID array data type. GEO_COORDINATES: Geo coordinates data type. BLOB: Blob data type. + BLOB_HASH: Blob hash data type. PHONE_NUMBER: Phone number data type. OBJECT: Object data type. OBJECT_ARRAY: Object array data type. @@ -167,6 +168,7 @@ class DataType(str, BaseEnum): UUID_ARRAY = "uuid[]" GEO_COORDINATES = "geoCoordinates" BLOB = "blob" + BLOB_HASH = "blobHash" PHONE_NUMBER = "phoneNumber" OBJECT = "object" OBJECT_ARRAY = "object[]" From 906b35b62441aeca41e7628d6ddeace5dbc30bb2 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Thu, 16 Apr 2026 16:00:25 +0200 Subject: [PATCH 81/99] Add full_with_profile --- integration/test_collection_query_profile.py | 30 ++++++++++++++++++++ weaviate/collections/classes/grpc.py | 24 +++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py index 0c68c9e69..da93b90f5 100644 --- a/integration/test_collection_query_profile.py +++ b/integration/test_collection_query_profile.py @@ -173,6 +173,36 @@ def test_near_vector_group_by_with_query_profile( assert_common_profile(shard.searches["vector"]) +def test_full_with_profile(collection_factory: CollectionFactory) -> None: + """Test that MetadataQuery.full_with_profile() returns profiling and all other metadata.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery.full_with_profile(), + limit=1, + ) + assert len(result.objects) == 1 + obj = result.objects[0] + assert obj.metadata.distance is not None + assert obj.metadata.creation_time is not None + assert obj.metadata.last_update_time is not None + assert obj.metadata.score is not None + assert obj.metadata.explain_score is not None + + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + assert_common_profile(result.query_profile.shards[0].searches["vector"]) + + +def test_full_excludes_query_profile(collection_factory: CollectionFactory) -> None: + """Test that MetadataQuery.full() does not include query profiling.""" + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( + return_metadata=MetadataQuery.full(), + ) + assert result.query_profile is None + + def test_no_query_profile_when_not_requested( collection_factory: CollectionFactory, ) -> None: diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 20ece3bc0..bdcc53dd9 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -94,7 +94,28 @@ class MetadataQuery(_WeaviateInput): @classmethod def full(cls) -> "MetadataQuery": - """Return a MetadataQuery with all fields set to True.""" + """Return a MetadataQuery with all fields set to True. + + NOTE: `query_profile` is excluded because it adds performance overhead. + Use `full_with_profile()` to include it. + """ + return cls( + creation_time=True, + last_update_time=True, + distance=True, + certainty=True, + score=True, + explain_score=True, + is_consistent=True, + ) + + @classmethod + def full_with_profile(cls) -> "MetadataQuery": + """Return a MetadataQuery with all fields set to True, including query profiling. + + Query profiling adds per-shard execution timing breakdowns to the response + but has performance overhead. Requires Weaviate >= 1.36.9. + """ return cls( creation_time=True, last_update_time=True, @@ -103,6 +124,7 @@ def full(cls) -> "MetadataQuery": score=True, explain_score=True, is_consistent=True, + query_profile=True, ) From 66a2fb2e3e164c13e9251ae896e1adc431f9d971 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Fri, 17 Apr 2026 12:51:19 +0200 Subject: [PATCH 82/99] Refactor RBAC permissions --- integration/test_rbac.py | 29 ++++++++++++++++++++++++++--- weaviate/rbac/models.py | 19 +++++++++++++++---- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/integration/test_rbac.py b/integration/test_rbac.py index 86719b6ce..0f8657a2d 100644 --- a/integration/test_rbac.py +++ b/integration/test_rbac.py @@ -427,9 +427,9 @@ 32, # Minimum version for alias permissions ), ( - Permissions.mcp(manage=True), + Permissions.mcp(create=True, read=True, update=True), Role( - name="ManageMCP", + name="MCPAll", alias_permissions=[], cluster_permissions=[], users_permissions=[], @@ -437,7 +437,30 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], - mcp_permissions=[MCPPermissionOutput(actions={Actions.MCP.MANAGE})], + mcp_permissions=[ + MCPPermissionOutput( + actions={Actions.MCP.CREATE, Actions.MCP.READ, Actions.MCP.UPDATE} + ) + ], + nodes_permissions=[], + tenants_permissions=[], + replicate_permissions=[], + groups_permissions=[], + ), + 37, # Minimum version for MCP permissions + ), + ( + Permissions.mcp(read=True), + Role( + name="MCPRead", + alias_permissions=[], + cluster_permissions=[], + users_permissions=[], + collections_permissions=[], + roles_permissions=[], + data_permissions=[], + backups_permissions=[], + mcp_permissions=[MCPPermissionOutput(actions={Actions.MCP.READ})], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], diff --git a/weaviate/rbac/models.py b/weaviate/rbac/models.py index dfdbc48f4..8e0989542 100644 --- a/weaviate/rbac/models.py +++ b/weaviate/rbac/models.py @@ -253,7 +253,9 @@ def values() -> List[str]: class MCPAction(str, _Action, Enum): - MANAGE = "manage_mcp" + CREATE = "create_mcp" + READ = "read_mcp" + UPDATE = "update_mcp" @staticmethod def values() -> List[str]: @@ -1053,9 +1055,18 @@ def backup( return permissions @staticmethod - def mcp(*, manage: bool = False) -> PermissionsCreateType: - if manage: - return [_MCPPermission(actions={MCPAction.MANAGE})] + def mcp( + *, create: bool = False, read: bool = False, update: bool = False + ) -> PermissionsCreateType: + actions: Set[MCPAction] = set() + if create: + actions.add(MCPAction.CREATE) + if read: + actions.add(MCPAction.READ) + if update: + actions.add(MCPAction.UPDATE) + if len(actions) > 0: + return [_MCPPermission(actions=actions)] return [] @staticmethod From 0955364bce3c6359737e857ceebfb948a162fb8e Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Mon, 20 Apr 2026 08:33:08 +0200 Subject: [PATCH 83/99] Bump Weaviate version --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index a7d6cca4d..8dd157443 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.16-efdedfa WEAVIATE_136: 1.36.9-d905e6c - WEAVIATE_137: 1.37.0-rc.1-bc3891e + WEAVIATE_137: 1.37.1 jobs: lint-and-format: From 3624e8b571d8c388befc61e232e0cecff832b0f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 20 Apr 2026 15:52:55 +0100 Subject: [PATCH 84/99] refactor: tokenization executor and models to support stopword configurations and improve response handling --- integration/test_tokenize.py | 245 +++++++++++++++++++----------- weaviate/tokenization/executor.py | 49 +++++- weaviate/tokenization/models.py | 46 +----- 3 files changed, 202 insertions(+), 138 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 97587235b..d692a4808 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -2,10 +2,17 @@ These tests cover the client's responsibilities: - Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate) -- Correct deserialization of responses into typed objects -- Client-side validation (_TextAnalyzerConfigCreate rejects invalid input) +- Correct deserialization of responses into the TokenizeResult object +- Client-side validation (_TextAnalyzerConfigCreate, stopwords/stopword_presets mutex) - Version gate (>= 1.37.0) - Both sync and async client paths + +Server-side behavior this client relies on: +- Word tokenization defaults to preset "en" when no stopword config is sent. +- The generic /v1/tokenize response is minimal: only ``indexed`` and ``query`` + are returned. The property-level endpoint additionally returns ``tokenization``. +- ``stopwords`` and ``stopword_presets`` are mutually exclusive on the generic + endpoint — the server rejects requests that set both. """ from typing import AsyncGenerator, Generator @@ -15,9 +22,7 @@ import weaviate from weaviate.collections.classes.config import ( - StopwordsConfig, StopwordsPreset, - TextAnalyzerConfig, Tokenization, _StopwordsCreate, _TextAnalyzerConfigCreate, @@ -62,13 +67,31 @@ class TestSerialization: """Verify the client correctly serializes different input forms.""" @pytest.mark.parametrize( - "tokenization,text,expected_tokens", + "tokenization,text,expected_indexed,expected_query", [ - (Tokenization.WORD, "The quick brown fox", ["the", "quick", "brown", "fox"]), - (Tokenization.LOWERCASE, "Hello World Test", ["hello", "world", "test"]), - (Tokenization.WHITESPACE, "Hello World Test", ["Hello", "World", "Test"]), - (Tokenization.FIELD, " Hello World ", ["Hello World"]), - (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"]), + # "the" is an English stopword — filtered from the query output + # by the server's default "en" preset for word tokenization. + ( + Tokenization.WORD, + "The quick brown fox", + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + # Non-word tokenizations do not apply the default "en" preset. + ( + Tokenization.LOWERCASE, + "Hello World Test", + ["hello", "world", "test"], + ["hello", "world", "test"], + ), + ( + Tokenization.WHITESPACE, + "Hello World Test", + ["Hello", "World", "Test"], + ["Hello", "World", "Test"], + ), + (Tokenization.FIELD, " Hello World ", ["Hello World"], ["Hello World"]), + (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"], ["hel", "ell", "llo"]), ], ) def test_tokenization_enum( @@ -76,19 +99,35 @@ def test_tokenization_enum( client: weaviate.WeaviateClient, tokenization: Tokenization, text: str, - expected_tokens: list, + expected_indexed: list, + expected_query: list, ) -> None: result = client.tokenization.text(text=text, tokenization=tokenization) assert isinstance(result, TokenizeResult) - assert result.tokenization == tokenization - assert result.indexed == expected_tokens - assert result.query == expected_tokens + assert result.indexed == expected_indexed + assert result.query == expected_query + # Generic endpoint does not echo tokenization back. + assert result.tokenization is None + + def test_default_en_applied_for_word(self, client: weaviate.WeaviateClient) -> None: + """Word tokenization defaults to the 'en' preset when no stopword + config is supplied.""" + result = client.tokenization.text( + text="The quick brown fox", tokenization=Tokenization.WORD + ) + assert result.indexed == ["the", "quick", "brown", "fox"] + # "the" removed by the server's default en preset. + assert result.query == ["quick", "brown", "fox"] - def test_no_analyzer_config(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenization.text(text="hello world", tokenization=Tokenization.WORD) - assert result.tokenization == Tokenization.WORD - assert result.indexed == ["hello", "world"] - assert result.analyzer_config is None + def test_opt_out_of_default_en(self, client: weaviate.WeaviateClient) -> None: + """analyzerConfig.stopwordPreset='none' disables the default en.""" + cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.NONE) + result = client.tokenization.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + analyzer_config=cfg, + ) + assert result.query == ["the", "quick", "brown", "fox"] def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None: cfg = _TextAnalyzerConfigCreate(ascii_fold=True) @@ -140,33 +179,74 @@ def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClien assert "the" not in result.query assert "école" in result.query - def test_stopword_presets_custom_additions(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(stopword_preset="custom") + def test_stopwords_fallback(self, client: weaviate.WeaviateClient) -> None: + """Top-level stopwords acts as the fallback detector when no + analyzerConfig.stopwordPreset is set.""" + sw = _StopwordsCreate( + preset=StopwordsPreset.EN, additions=["quick"], removals=None + ) + result = client.tokenization.text( + text="the quick brown fox", + tokenization=Tokenization.WORD, + stopwords=sw, + ) + assert result.indexed == ["the", "quick", "brown", "fox"] + # "the" (en) and "quick" (addition) filtered. + assert result.query == ["brown", "fox"] + + def test_stopwords_additions_default_preset_to_en( + self, client: weaviate.WeaviateClient + ) -> None: + """Caller omits preset, passes only additions. Server defaults preset + to 'en' and builds detector from en + additions.""" + sw = _StopwordsCreate(preset=None, additions=["hello"], removals=None) + result = client.tokenization.text( + text="the quick hello world", + tokenization=Tokenization.WORD, + stopwords=sw, + ) + assert result.query == ["quick", "world"] + + def test_stopwords_removals_default_preset_to_en( + self, client: weaviate.WeaviateClient + ) -> None: + """Caller omits preset, passes only removals. 'the' is removed from + the en list so it passes through.""" + sw = _StopwordsCreate(preset=None, additions=None, removals=["the"]) + result = client.tokenization.text( + text="the quick is fast", + tokenization=Tokenization.WORD, + stopwords=sw, + ) + # "is" still in en, "the" removed. + assert result.query == ["the", "quick", "fast"] + + def test_stopword_presets_named_reference(self, client: weaviate.WeaviateClient) -> None: + """Define a named preset via stopword_presets, select it via + analyzerConfig.stopwordPreset. Word lists use the collection shape.""" result = client.tokenization.text( text="hello world test", tokenization=Tokenization.WORD, - analyzer_config=cfg, - stopword_presets={ - "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None), - }, + analyzer_config=_TextAnalyzerConfigCreate(stopword_preset="custom"), + stopword_presets={"custom": ["test"]}, ) assert result.indexed == ["hello", "world", "test"] assert result.query == ["hello", "world"] - def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(stopword_preset="en-no-the") + def test_stopword_presets_override_builtin_en( + self, client: weaviate.WeaviateClient + ) -> None: + """A user-defined preset sharing a name with a built-in replaces the + built-in entirely, including on the default-en path for word + tokenization.""" result = client.tokenization.text( - text="the quick", + text="the quick hello world", tokenization=Tokenization.WORD, - analyzer_config=cfg, - stopword_presets={ - "en-no-the": _StopwordsCreate( - preset=StopwordsPreset.EN, additions=None, removals=["the"] - ), - }, + stopword_presets={"en": ["hello"]}, ) - assert result.indexed == ["the", "quick"] - assert result.query == ["the", "quick"] + assert result.indexed == ["the", "quick", "hello", "world"] + # "the" no longer filtered (built-in en replaced), "hello" is. + assert result.query == ["the", "quick", "world"] # --------------------------------------------------------------------------- @@ -176,61 +256,23 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate @pytest.mark.usefixtures("require_1_37") class TestDeserialization: - """Verify the client correctly deserializes response fields into typed objects.""" + """Verify the client correctly deserializes response fields into + TokenizeResult.""" - def test_result_type(self, client: weaviate.WeaviateClient) -> None: + def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None: + """Generic endpoint returns only indexed and query; tokenization is + not echoed back.""" result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) assert isinstance(result, TokenizeResult) assert isinstance(result.indexed, list) assert isinstance(result.query, list) + assert result.tokenization is None - def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate( - ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN - ) - result = client.tokenization.text( - text="L'école", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert isinstance(result.analyzer_config, TextAnalyzerConfig) - assert result.analyzer_config.ascii_fold is True - assert result.analyzer_config.ascii_fold_ignore == ["é"] - assert result.analyzer_config.stopword_preset == "en" - - def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None: - result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) - assert result.analyzer_config is None - - def test_stopword_config_deserialized_on_property( + def test_property_result_populates_tokenization( self, client: weaviate.WeaviateClient ) -> None: - client.collections.delete("TestDeserStopword") - try: - client.collections.create_from_dict( - { - "class": "TestDeserStopword", - "vectorizer": "none", - "properties": [ - { - "name": "title", - "dataType": ["text"], - "tokenization": "word", - "textAnalyzer": {"stopwordPreset": "en"}, - }, - ], - } - ) - col = client.collections.get("TestDeserStopword") - result = col.config.tokenize_property(property_name="title", text="the quick") - assert isinstance(result, TokenizeResult) - assert result.tokenization == Tokenization.WORD - if result.stopword_config is not None: - assert isinstance(result.stopword_config, StopwordsConfig) - finally: - client.collections.delete("TestDeserStopword") - - def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: + """Property endpoint returns tokenization — the server resolved it + from the property's schema rather than the caller sending it.""" client.collections.delete("TestDeserPropTypes") try: client.collections.create_from_dict( @@ -256,12 +298,13 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None: # --------------------------------------------------------------------------- -# Client-side validation (_TextAnalyzerConfigCreate) +# Client-side validation # --------------------------------------------------------------------------- class TestClientSideValidation: - """Verify that _TextAnalyzerConfigCreate rejects invalid input before hitting the server.""" + """Verify that client-side validation rejects invalid input before + hitting the server.""" def test_ascii_fold_ignore_without_fold_raises(self) -> None: with pytest.raises(ValueError, match="asciiFoldIgnore"): @@ -291,6 +334,23 @@ def test_empty_config_is_valid(self) -> None: assert cfg.asciiFoldIgnore is None assert cfg.stopwordPreset is None + def test_stopwords_and_stopword_presets_mutex( + self, client: weaviate.WeaviateClient + ) -> None: + """Client rejects the mutex violation locally with ValueError, before + sending the request (which the server would also reject with 422).""" + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + with pytest.raises(ValueError, match="mutually exclusive"): + client.tokenization.text( + text="hello", + tokenization=Tokenization.WORD, + stopwords=_StopwordsCreate( + preset=StopwordsPreset.EN, additions=None, removals=None + ), + stopword_presets={"custom": ["hello"]}, + ) + # --------------------------------------------------------------------------- # Version gate @@ -331,20 +391,21 @@ async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) - ) assert isinstance(result, TokenizeResult) assert result.indexed == ["the", "quick", "brown", "fox"] + # default "en" applied server-side. + assert result.query == ["quick", "brown", "fox"] @pytest.mark.asyncio - async def test_text_with_analyzer_config( + async def test_text_with_stopwords_fallback( self, async_client: weaviate.WeaviateAsyncClient ) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN) + sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None) result = await async_client.tokenization.text( - text="L'école est fermée", + text="the quick brown fox", tokenization=Tokenization.WORD, - analyzer_config=cfg, + stopwords=sw, ) - assert result.indexed == ["l", "ecole", "est", "fermee"] - assert isinstance(result.analyzer_config, TextAnalyzerConfig) - assert result.analyzer_config.ascii_fold is True + assert result.indexed == ["the", "quick", "brown", "fox"] + assert result.query == ["brown", "fox"] @pytest.mark.asyncio async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 5093c14e9..3a79d6ee1 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -1,6 +1,6 @@ """Tokenize executor.""" -from typing import Any, Dict, Generic, Optional +from typing import Any, Dict, Generic, List, Optional from httpx import Response @@ -33,26 +33,52 @@ def text( tokenization: Tokenization, *, analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, - stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None, + stopwords: Optional[_StopwordsCreate] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> executor.Result[TokenizeResult]: """Tokenize text using the generic /v1/tokenize endpoint. + For ``word`` tokenization the server defaults to the built-in ``en`` + stopword preset when no stopword configuration is supplied. Pass + ``analyzer_config=TextAnalyzerConfig(stopword_preset="none")`` or + equivalent to opt out. + Args: text: The text to tokenize. tokenization: The tokenization method to use (e.g. Tokenization.WORD). - analyzer_config: Text analyzer settings (ASCII folding, stopword preset). - stopword_presets: Custom stopword preset definitions, keyed by name. - Each value is a ``_StopwordsCreate`` with optional preset, additions, - and removals fields. + analyzer_config: Text analyzer settings (ASCII folding, stopword + preset name). ``stopword_preset`` may reference a built-in preset + (``en`` / ``none``) or a name defined in ``stopword_presets``. + stopwords: Fallback stopword config applied when + ``analyzer_config.stopword_preset`` is not set. Same shape as a + collection's ``invertedIndexConfig.stopwords`` — a base preset + optionally tweaked with ``additions`` / ``removals``. An empty + ``preset`` defaults to ``en``. + stopword_presets: User-defined named stopword presets, each a plain + list of words. A name matching a built-in (``en`` / ``none``) + replaces the built-in entirely. + + Note: + ``stopwords`` and ``stopword_presets`` are mutually exclusive on the + server — pass one or the other, not both. The server returns HTTP + 422 if both are supplied. Returns: - A TokenizeResult with indexed and query token lists. + A TokenizeResult with indexed and query token lists. The generic + endpoint does not echo request fields (tokenization, analyzer_config, + stopwords, stopword_presets) back in the response. Raises: WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + ValueError: If both ``stopwords`` and ``stopword_presets`` are passed. """ self._check_version() + if stopwords is not None and stopword_presets is not None: + raise ValueError( + "stopwords and stopword_presets are mutually exclusive; pass only one" + ) + payload: Dict[str, Any] = { "text": text, "tokenization": tokenization.value, @@ -63,9 +89,16 @@ def text( if ac_dict: payload["analyzerConfig"] = ac_dict + if stopwords is not None: + sw_dict = stopwords._to_dict() + if sw_dict: + payload["stopwords"] = sw_dict + if stopword_presets is not None: + # Plain word-list shape matching a collection's + # invertedIndexConfig.stopwordPresets. payload["stopwordPresets"] = { - name: cfg._to_dict() for name, cfg in stopword_presets.items() + name: list(words) for name, words in stopword_presets.items() } def resp(response: Response) -> TokenizeResult: diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index 8bfa508f8..3bf980597 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -1,56 +1,26 @@ """Return types for tokenization operations.""" -from typing import Any, Dict, List, Optional +from typing import List, Optional -from pydantic import BaseModel, ConfigDict, Field, field_validator +from pydantic import BaseModel, ConfigDict -from weaviate.collections.classes.config import ( - StopwordsConfig, - StopwordsPreset, - TextAnalyzerConfig, - Tokenization, -) +from weaviate.collections.classes.config import Tokenization class TokenizeResult(BaseModel): """Result of a tokenization operation. Attributes: - tokenization: The tokenization method that was applied. indexed: Tokens as they would be stored in the inverted index. query: Tokens as they would be used for querying (after stopword removal). - analyzer_config: The text analyzer configuration that was used, if any. - stopword_config: The stopword configuration that was used, if any. + tokenization: The tokenization method that was applied. Populated only by + the property-level endpoint, where the tokenization is resolved from + the property's schema. The generic ``/v1/tokenize`` endpoint does not + echo it back (the caller passed it). """ model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) - tokenization: Tokenization indexed: List[str] query: List[str] - analyzer_config: Optional[TextAnalyzerConfig] = Field(default=None, alias="analyzerConfig") - stopword_config: Optional[StopwordsConfig] = Field(default=None, alias="stopwordConfig") - - @field_validator("analyzer_config", mode="before") - @classmethod - def _parse_analyzer_config(cls, v: Optional[Dict[str, Any]]) -> Optional[TextAnalyzerConfig]: - if v is None: - return None - if "asciiFold" not in v and "stopwordPreset" not in v: - return None - return TextAnalyzerConfig( - ascii_fold=v.get("asciiFold", False), - ascii_fold_ignore=v.get("asciiFoldIgnore"), - stopword_preset=v.get("stopwordPreset"), - ) - - @field_validator("stopword_config", mode="before") - @classmethod - def _parse_stopword_config(cls, v: Optional[Dict[str, Any]]) -> Optional[StopwordsConfig]: - if v is None: - return None - return StopwordsConfig( - preset=StopwordsPreset(v["preset"]), - additions=v.get("additions"), - removals=v.get("removals"), - ) + tokenization: Optional[Tokenization] = None From 5a12f134c36a5f6ed6329abaae101abf42efb7cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 20 Apr 2026 15:55:45 +0100 Subject: [PATCH 85/99] fix: update Weaviate 1.37.1 version to include specific build identifier --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8dd157443..ee9b69537 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.16-efdedfa WEAVIATE_136: 1.36.9-d905e6c - WEAVIATE_137: 1.37.1 + WEAVIATE_137: 1.37.1-5f911bc jobs: lint-and-format: From d7605777b43a799eb31ff712e642a97ee63db855 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Mon, 20 Apr 2026 18:07:58 +0100 Subject: [PATCH 86/99] Reduce timeouts in batch tests --- integration/test_batch_v4.py | 6 +++--- integration/test_collection_batch.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/integration/test_batch_v4.py b/integration/test_batch_v4.py index f4ce7669e..3646f1b1f 100644 --- a/integration/test_batch_v4.py +++ b/integration/test_batch_v4.py @@ -718,7 +718,7 @@ def test_non_existant_collection(client_factory: ClientFactory) -> None: # not, so we do not check for errors here -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_number_of_stored_results_in_batch(client_factory: ClientFactory) -> None: client, name = client_factory() with client.batch.dynamic() as batch: @@ -818,7 +818,7 @@ def test_references_with_to_uuids(client_factory: ClientFactory) -> None: @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_add_one_hundred_thousand_objects_async_client( async_client_factory: AsyncClientFactory, ) -> None: @@ -849,7 +849,7 @@ async def test_add_one_hundred_thousand_objects_async_client( await client.collections.delete(name) -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_add_one_hundred_thousand_objects_sync_client( client_factory: ClientFactory, ) -> None: diff --git a/integration/test_collection_batch.py b/integration/test_collection_batch.py index e670e4883..f2bd5be61 100644 --- a/integration/test_collection_batch.py +++ b/integration/test_collection_batch.py @@ -271,7 +271,7 @@ def test_non_existant_collection(collection_factory_get: CollectionFactoryGet) - @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_batch_one_hundred_thousand_objects_async_collection( batch_collection_async: BatchCollectionAsync, ) -> None: @@ -299,7 +299,7 @@ async def test_batch_one_hundred_thousand_objects_async_collection( @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_ingest_one_hundred_thousand_data_objects_async( batch_collection_async: BatchCollectionAsync, ) -> None: @@ -321,7 +321,7 @@ async def test_ingest_one_hundred_thousand_data_objects_async( assert len(results.errors) == 0, [obj.message for obj in results.errors.values()] -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_ingest_one_hundred_thousand_data_objects( batch_collection: BatchCollection, ) -> None: From 60887f3ab37171d2ea12aa34de57a927fcca8267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Mon, 20 Apr 2026 18:08:38 +0100 Subject: [PATCH 87/99] fix: update Weaviate 1.37.1 version to include architecture suffix --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index b2c567cee..94f75b089 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.18 WEAVIATE_136: 1.36.12 - WEAVIATE_137: 1.37.1-5f911bc + WEAVIATE_137: 1.37.1-5f911bc.amd64 jobs: lint-and-format: From 9fd83b881c953f2ea09c1fb5c89dc38a96173c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 21 Apr 2026 09:23:37 +0100 Subject: [PATCH 88/99] fix: refactor tokenization tests to use parameterized cases for improved readability and maintainability --- integration/test_tokenize.py | 327 +++++++++++++++++++---------------- 1 file changed, 176 insertions(+), 151 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 788cefc31..51f154479 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -109,131 +109,146 @@ def test_tokenization_enum( # Generic endpoint does not echo tokenization back. assert result.tokenization is None - def test_default_en_applied_for_word(self, client: weaviate.WeaviateClient) -> None: - """Word tokenization defaults to the 'en' preset when no stopword config is supplied.""" - result = client.tokenization.text( - text="The quick brown fox", tokenization=Tokenization.WORD - ) - assert result.indexed == ["the", "quick", "brown", "fox"] - # "the" removed by the server's default en preset. - assert result.query == ["quick", "brown", "fox"] - - def test_opt_out_of_default_en(self, client: weaviate.WeaviateClient) -> None: - """analyzerConfig.stopwordPreset='none' disables the default en.""" - cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.NONE) - result = client.tokenization.text( - text="The quick brown fox", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert result.query == ["the", "quick", "brown", "fox"] - - def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True) - result = client.tokenization.text( - text="L'école est fermée", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert result.indexed == ["l", "ecole", "est", "fermee"] - - def test_ascii_fold_with_ignore(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é"]) - result = client.tokenization.text( - text="L'école est fermée", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert result.indexed == ["l", "école", "est", "fermée"] - - def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN) - result = client.tokenization.text( - text="The quick brown fox", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert "the" not in result.query - assert "quick" in result.query - - def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate(stopword_preset="en") - result = client.tokenization.text( - text="The quick brown fox", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert "the" not in result.query - - def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClient) -> None: - cfg = _TextAnalyzerConfigCreate( - ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN - ) - result = client.tokenization.text( - text="The école est fermée", - tokenization=Tokenization.WORD, - analyzer_config=cfg, - ) - assert result.indexed == ["the", "école", "est", "fermée"] - assert "the" not in result.query - assert "école" in result.query - - def test_stopwords_fallback(self, client: weaviate.WeaviateClient) -> None: - """Top-level stopwords acts as the fallback detector when no analyzerConfig.stopwordPreset is set.""" - sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None) - result = client.tokenization.text( - text="the quick brown fox", - tokenization=Tokenization.WORD, - stopwords=sw, - ) - assert result.indexed == ["the", "quick", "brown", "fox"] - # "the" (en) and "quick" (addition) filtered. - assert result.query == ["brown", "fox"] - - def test_stopwords_additions_default_preset_to_en( - self, client: weaviate.WeaviateClient + @pytest.mark.parametrize( + "call_kwargs,expected_indexed,expected_query", + [ + ( + {"text": "The quick brown fox"}, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": _TextAnalyzerConfigCreate( + stopword_preset=StopwordsPreset.NONE + ), + }, + ["the", "quick", "brown", "fox"], + ["the", "quick", "brown", "fox"], + ), + ( + { + "text": "L'école est fermée", + "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True), + }, + ["l", "ecole", "est", "fermee"], + ["l", "ecole", "fermee"], + ), + ( + { + "text": "L'école est fermée", + "analyzer_config": _TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"] + ), + }, + ["l", "école", "est", "fermée"], + ["l", "école", "fermée"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": _TextAnalyzerConfigCreate( + stopword_preset=StopwordsPreset.EN + ), + }, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="en"), + }, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The école est fermée", + "analyzer_config": _TextAnalyzerConfigCreate( + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ), + }, + ["the", "école", "est", "fermée"], + ["école", "est", "fermée"], + ), + ( + { + "text": "the quick brown fox", + "stopwords": _StopwordsCreate( + preset=StopwordsPreset.EN, additions=["quick"], removals=None + ), + }, + ["the", "quick", "brown", "fox"], + ["brown", "fox"], + ), + ( + { + "text": "the quick hello world", + "stopwords": _StopwordsCreate( + preset=None, additions=["hello"], removals=None + ), + }, + ["the", "quick", "hello", "world"], + ["quick", "world"], + ), + ( + { + "text": "the quick is fast", + "stopwords": _StopwordsCreate( + preset=None, additions=None, removals=["the"] + ), + }, + ["the", "quick", "is", "fast"], + ["the", "quick", "fast"], + ), + ( + { + "text": "hello world test", + "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="custom"), + "stopword_presets": {"custom": ["test"]}, + }, + ["hello", "world", "test"], + ["hello", "world"], + ), + ( + { + "text": "the quick hello world", + "stopword_presets": {"en": ["hello"]}, + }, + ["the", "quick", "hello", "world"], + ["the", "quick", "world"], + ), + ], + ids=[ + "default_en_applied_for_word", + "opt_out_of_default_en", + "ascii_fold", + "ascii_fold_with_ignore", + "stopword_preset_enum", + "stopword_preset_string", + "ascii_fold_combined_with_stopwords", + "stopwords_fallback", + "stopwords_additions_default_preset_to_en", + "stopwords_removals_default_preset_to_en", + "stopword_presets_named_reference", + "stopword_presets_override_builtin_en", + ], + ) + def test_text_tokenize( + self, + client: weaviate.WeaviateClient, + call_kwargs: dict, + expected_indexed: list, + expected_query: list, ) -> None: - """Caller omits preset, passes only additions. Server defaults preset to 'en' and builds detector from en + additions.""" - sw = _StopwordsCreate(preset=None, additions=["hello"], removals=None) - result = client.tokenization.text( - text="the quick hello world", - tokenization=Tokenization.WORD, - stopwords=sw, - ) - assert result.query == ["quick", "world"] - - def test_stopwords_removals_default_preset_to_en(self, client: weaviate.WeaviateClient) -> None: - """Caller omits preset, passes only removals. 'the' is removed from the en list so it passes through.""" - sw = _StopwordsCreate(preset=None, additions=None, removals=["the"]) - result = client.tokenization.text( - text="the quick is fast", - tokenization=Tokenization.WORD, - stopwords=sw, - ) - # "is" still in en, "the" removed. - assert result.query == ["the", "quick", "fast"] - - def test_stopword_presets_named_reference(self, client: weaviate.WeaviateClient) -> None: - """Define a named preset via stopword_presets, select it via analyzerConfig.stopwordPreset. Word lists use the collection shape.""" - result = client.tokenization.text( - text="hello world test", - tokenization=Tokenization.WORD, - analyzer_config=_TextAnalyzerConfigCreate(stopword_preset="custom"), - stopword_presets={"custom": ["test"]}, - ) - assert result.indexed == ["hello", "world", "test"] - assert result.query == ["hello", "world"] - - def test_stopword_presets_override_builtin_en(self, client: weaviate.WeaviateClient) -> None: - """A user-defined preset sharing a name with a built-in replaces the built-in entirely, including on the default-en path for word tokenization.""" - result = client.tokenization.text( - text="the quick hello world", - tokenization=Tokenization.WORD, - stopword_presets={"en": ["hello"]}, - ) - assert result.indexed == ["the", "quick", "hello", "world"] - # "the" no longer filtered (built-in en replaced), "hello" is. - assert result.query == ["the", "quick", "world"] + result = client.tokenization.text(tokenization=Tokenization.WORD, **call_kwargs) + assert isinstance(result, TokenizeResult) + assert result.indexed == expected_indexed + assert result.query == expected_query # --------------------------------------------------------------------------- @@ -287,33 +302,44 @@ def test_property_result_populates_tokenization(self, client: weaviate.WeaviateC class TestClientSideValidation: """Verify that client-side validation rejects invalid input before hitting the server.""" - def test_ascii_fold_ignore_without_fold_raises(self) -> None: - with pytest.raises(ValueError, match="asciiFoldIgnore"): - _TextAnalyzerConfigCreate(ascii_fold=False, ascii_fold_ignore=["é"]) - - def test_ascii_fold_ignore_without_fold_default_raises(self) -> None: + @pytest.mark.parametrize( + "kwargs", + [ + {"ascii_fold": False, "ascii_fold_ignore": ["é"]}, + {"ascii_fold_ignore": ["é"]}, + ], + ids=["explicit_false", "default"], + ) + def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None: with pytest.raises(ValueError, match="asciiFoldIgnore"): - _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"]) + _TextAnalyzerConfigCreate(**kwargs) - def test_valid_config_does_not_raise(self) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]) - assert cfg.asciiFold is True - assert cfg.asciiFoldIgnore == ["é", "ñ"] - - def test_fold_without_ignore_is_valid(self) -> None: - cfg = _TextAnalyzerConfigCreate(ascii_fold=True) - assert cfg.asciiFold is True - assert cfg.asciiFoldIgnore is None - - def test_stopword_preset_only_is_valid(self) -> None: - cfg = _TextAnalyzerConfigCreate(stopword_preset="en") - assert cfg.stopwordPreset == "en" - - def test_empty_config_is_valid(self) -> None: - cfg = _TextAnalyzerConfigCreate() - assert cfg.asciiFold is None - assert cfg.asciiFoldIgnore is None - assert cfg.stopwordPreset is None + @pytest.mark.parametrize( + "kwargs,expected", + [ + ( + {"ascii_fold": True, "ascii_fold_ignore": ["é", "ñ"]}, + {"asciiFold": True, "asciiFoldIgnore": ["é", "ñ"]}, + ), + ( + {"ascii_fold": True}, + {"asciiFold": True, "asciiFoldIgnore": None}, + ), + ( + {"stopword_preset": "en"}, + {"stopwordPreset": "en"}, + ), + ( + {}, + {"asciiFold": None, "asciiFoldIgnore": None, "stopwordPreset": None}, + ), + ], + ids=["fold_with_ignore", "fold_without_ignore", "stopword_preset_only", "empty"], + ) + def test_valid_config(self, kwargs: dict, expected: dict) -> None: + cfg = _TextAnalyzerConfigCreate(**kwargs) + for attr, value in expected.items(): + assert getattr(cfg, attr) == value def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateClient) -> None: """Client rejects the mutex violation locally with ValueError, before sending the request (which the server would also reject with 422).""" @@ -411,7 +437,6 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien assert isinstance(result, TokenizeResult) assert result.tokenization == Tokenization.WORD assert result.indexed == ["the", "quick", "brown", "fox"] - assert "the" not in result.query - assert "quick" in result.query + assert result.query == ["quick", "brown", "fox"] finally: await async_client.collections.delete("TestAsyncPropTokenize") From e9d681226e1917b80fed0312b086a5818cac2e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 21 Apr 2026 09:52:24 +0100 Subject: [PATCH 89/99] fix: update Weaviate 1.37.1 version and enhance tokenization tests with new fixtures --- .github/workflows/main.yaml | 2 +- integration/test_tokenize.py | 77 ++++++++++++++++++++++++++++++------ 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 94f75b089..a1ff94f98 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -28,7 +28,7 @@ env: WEAVIATE_134: 1.34.19 WEAVIATE_135: 1.35.18 WEAVIATE_136: 1.36.12 - WEAVIATE_137: 1.37.1-5f911bc.amd64 + WEAVIATE_137: 1.37.1-4e61e26.amd64 jobs: lint-and-format: diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 51f154479..61d54e095 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -9,8 +9,7 @@ Server-side behavior this client relies on: - Word tokenization defaults to preset "en" when no stopword config is sent. -- The generic /v1/tokenize response is minimal: only ``indexed`` and ``query`` - are returned. The property-level endpoint additionally returns ``tokenization``. +- Both endpoints return only ``indexed`` and ``query``. - ``stopwords`` and ``stopword_presets`` are mutually exclusive on the generic endpoint — the server rejects requests that set both. """ @@ -57,6 +56,29 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: await c.close() +@pytest.fixture +def recipe_collection(client: weaviate.WeaviateClient) -> Generator: + """Collection with a `recipe` word-tokenized property and an en + ["quick"] stopwords config.""" + name = "TestTokenizeRecipe" + client.collections.delete(name) + client.collections.create_from_dict( + { + "class": name, + "vectorizer": "none", + "invertedIndexConfig": { + "stopwords": {"preset": "en", "additions": ["quick"]}, + }, + "properties": [ + {"name": "recipe", "dataType": ["text"], "tokenization": "word"}, + ], + } + ) + try: + yield client.collections.get(name) + finally: + client.collections.delete(name) + + # --------------------------------------------------------------------------- # Serialization # --------------------------------------------------------------------------- @@ -106,8 +128,6 @@ def test_tokenization_enum( assert isinstance(result, TokenizeResult) assert result.indexed == expected_indexed assert result.query == expected_query - # Generic endpoint does not echo tokenization back. - assert result.tokenization is None @pytest.mark.parametrize( "call_kwargs,expected_indexed,expected_query", @@ -133,7 +153,7 @@ def test_tokenization_enum( "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True), }, ["l", "ecole", "est", "fermee"], - ["l", "ecole", "fermee"], + ["l", "ecole", "est", "fermee"], ), ( { @@ -143,7 +163,7 @@ def test_tokenization_enum( ), }, ["l", "école", "est", "fermée"], - ["l", "école", "fermée"], + ["l", "école", "est", "fermée"], ), ( { @@ -250,6 +270,42 @@ def test_text_tokenize( assert result.indexed == expected_indexed assert result.query == expected_query + def test_text_from_collection_config( + self, client: weaviate.WeaviateClient, recipe_collection + ) -> None: + """Values round-tripped through config.get() feed back into tokenization.text().""" + config = recipe_collection.config.get() + recipe = next(p for p in config.properties if p.name == "recipe") + stopwords = config.inverted_index_config.stopwords + result = client.tokenization.text( + text="the quick brown fox", + tokenization=recipe.tokenization, + stopwords=_StopwordsCreate(**stopwords.__dict__), + ) + assert result.indexed == ["the", "quick", "brown", "fox"] + assert result.query == ["brown", "fox"] + + def test_property_and_generic_endpoints_agree( + self, client: weaviate.WeaviateClient, recipe_collection + ) -> None: + """Property endpoint (server resolves config from schema) produces the same indexed/query as the generic endpoint fed the same config.""" + config = recipe_collection.config.get() + recipe = next(p for p in config.properties if p.name == "recipe") + stopwords = config.inverted_index_config.stopwords + + text = "the quick brown fox" + via_property = recipe_collection.config.tokenize_property( + property_name="recipe", text=text + ) + via_generic = client.tokenization.text( + text=text, + tokenization=recipe.tokenization, + stopwords=_StopwordsCreate(**stopwords.__dict__), + ) + + assert via_property.indexed == via_generic.indexed + assert via_property.query == via_generic.query + # --------------------------------------------------------------------------- # Deserialization @@ -261,15 +317,14 @@ class TestDeserialization: """Verify the client correctly deserializes response fields into TokenizeResult.""" def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None: - """Generic endpoint returns only indexed and query; tokenization is not echoed back.""" + """Generic endpoint response deserializes into TokenizeResult with indexed and query lists.""" result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) assert isinstance(result, TokenizeResult) assert isinstance(result.indexed, list) assert isinstance(result.query, list) - assert result.tokenization is None - def test_property_result_populates_tokenization(self, client: weaviate.WeaviateClient) -> None: - """Property endpoint returns tokenization — the server resolved it from the property's schema rather than the caller sending it.""" + def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None: + """Property endpoint response deserializes into TokenizeResult — server resolves tokenization from the property's schema.""" client.collections.delete("TestDeserPropTypes") try: client.collections.create_from_dict( @@ -288,7 +343,6 @@ def test_property_result_populates_tokenization(self, client: weaviate.WeaviateC col = client.collections.get("TestDeserPropTypes") result = col.config.tokenize_property(property_name="tag", text=" Hello World ") assert isinstance(result, TokenizeResult) - assert result.tokenization == Tokenization.FIELD assert result.indexed == ["Hello World"] finally: client.collections.delete("TestDeserPropTypes") @@ -435,7 +489,6 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien text="The quick brown fox", ) assert isinstance(result, TokenizeResult) - assert result.tokenization == Tokenization.WORD assert result.indexed == ["the", "quick", "brown", "fox"] assert result.query == ["quick", "brown", "fox"] finally: From 959f554c7df129a226ddbc5f412e95a3879891d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 21 Apr 2026 09:55:35 +0100 Subject: [PATCH 90/99] refactor: ruff format --- integration/test_tokenize.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index 61d54e095..d2a8442d8 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -208,9 +208,7 @@ def test_tokenization_enum( ( { "text": "the quick hello world", - "stopwords": _StopwordsCreate( - preset=None, additions=["hello"], removals=None - ), + "stopwords": _StopwordsCreate(preset=None, additions=["hello"], removals=None), }, ["the", "quick", "hello", "world"], ["quick", "world"], @@ -218,9 +216,7 @@ def test_tokenization_enum( ( { "text": "the quick is fast", - "stopwords": _StopwordsCreate( - preset=None, additions=None, removals=["the"] - ), + "stopwords": _StopwordsCreate(preset=None, additions=None, removals=["the"]), }, ["the", "quick", "is", "fast"], ["the", "quick", "fast"], @@ -294,9 +290,7 @@ def test_property_and_generic_endpoints_agree( stopwords = config.inverted_index_config.stopwords text = "the quick brown fox" - via_property = recipe_collection.config.tokenize_property( - property_name="recipe", text=text - ) + via_property = recipe_collection.config.tokenize_property(property_name="recipe", text=text) via_generic = client.tokenization.text( text=text, tokenization=recipe.tokenization, From 0f7fe47cac92107f502fb5a22c925896084535ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 21 Apr 2026 14:24:33 +0100 Subject: [PATCH 91/99] test: refactor output types and tests to config --- integration/test_tokenize.py | 32 ++++++++++++++++++++++++ weaviate/tokenization/executor.py | 41 ++++++++++++++++++++++--------- weaviate/tokenization/models.py | 5 ---- 3 files changed, 62 insertions(+), 16 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index d2a8442d8..c939e8c5b 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -403,6 +403,38 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli stopword_presets={"custom": ["hello"]}, ) + @pytest.mark.parametrize( + "stopword_presets,match", + [ + ({"custom": "hello"}, "must be a list of strings"), + ( + { + "custom": _StopwordsCreate( + preset=StopwordsPreset.EN, additions=None, removals=None + ), + }, + "must be a list of strings", + ), + ({"custom": ["hello", 123]}, "must contain only strings"), + ], + ids=["str_value", "pydantic_model_value", "non_string_element"], + ) + def test_stopword_presets_invalid_shape_raises( + self, + client: weaviate.WeaviateClient, + stopword_presets: dict, + match: str, + ) -> None: + """Client rejects malformed stopword_presets values locally before sending — str would silently split into characters; a pydantic model would serialize to field tuples.""" + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + with pytest.raises(ValueError, match=match): + client.tokenization.text( + text="hello", + tokenization=Tokenization.WORD, + stopword_presets=stopword_presets, + ) + # --------------------------------------------------------------------------- # Version gate diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 8cedb6e51..825faee05 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -40,14 +40,15 @@ def text( For ``word`` tokenization the server defaults to the built-in ``en`` stopword preset when no stopword configuration is supplied. Pass - ``analyzer_config=TextAnalyzerConfig(stopword_preset="none")`` or - equivalent to opt out. + ``analyzer_config=Configure.text_analyzer(stopword_preset=StopwordsPreset.NONE)`` + (or equivalent) to opt out. Args: text: The text to tokenize. tokenization: The tokenization method to use (e.g. Tokenization.WORD). analyzer_config: Text analyzer settings (ASCII folding, stopword - preset name). ``stopword_preset`` may reference a built-in preset + preset name), built via ``Configure.text_analyzer(...)``. + ``stopword_preset`` may reference a built-in preset (``en`` / ``none``) or a name defined in ``stopword_presets``. stopwords: Fallback stopword config applied when ``analyzer_config.stopword_preset`` is not set. Same shape as a @@ -64,13 +65,13 @@ def text( 422 if both are supplied. Returns: - A TokenizeResult with indexed and query token lists. The generic - endpoint does not echo request fields (tokenization, analyzer_config, - stopwords, stopword_presets) back in the response. + A TokenizeResult with indexed and query token lists. The response + does not echo request fields back. Raises: WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. - ValueError: If both ``stopwords`` and ``stopword_presets`` are passed. + ValueError: If both ``stopwords`` and ``stopword_presets`` are passed, + or if any ``stopword_presets`` value is not a list/tuple of strings. """ self.__check_version() @@ -94,10 +95,28 @@ def text( if stopword_presets is not None: # Plain word-list shape matching a collection's - # invertedIndexConfig.stopwordPresets. - payload["stopwordPresets"] = { - name: list(words) for name, words in stopword_presets.items() - } + # invertedIndexConfig.stopwordPresets. Reject str (would + # silently split into characters) and pydantic models / + # other non-sequence shapes up-front so callers get a clear + # error instead of a malformed payload. + validated: Dict[str, List[str]] = {} + for name, words in stopword_presets.items(): + if isinstance(words, (str, bytes)): + raise ValueError( + f"stopword_presets[{name!r}] must be a list of strings, " + f"got {type(words).__name__}" + ) + if not isinstance(words, (list, tuple)): + raise ValueError( + f"stopword_presets[{name!r}] must be a list of strings, " + f"got {type(words).__name__}" + ) + if not all(isinstance(w, str) for w in words): + raise ValueError( + f"stopword_presets[{name!r}] must contain only strings" + ) + validated[name] = list(words) + payload["stopwordPresets"] = validated def resp(response: Response) -> TokenizeResult: return TokenizeResult.model_validate(response.json()) diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index 3bf980597..017abe429 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -13,14 +13,9 @@ class TokenizeResult(BaseModel): Attributes: indexed: Tokens as they would be stored in the inverted index. query: Tokens as they would be used for querying (after stopword removal). - tokenization: The tokenization method that was applied. Populated only by - the property-level endpoint, where the tokenization is resolved from - the property's schema. The generic ``/v1/tokenize`` endpoint does not - echo it back (the caller passed it). """ model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) indexed: List[str] query: List[str] - tokenization: Optional[Tokenization] = None From 52c2c8c8133eb1828be86f10ea824d74b822ca8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= Date: Tue, 21 Apr 2026 14:31:17 +0100 Subject: [PATCH 92/99] refactor: remove unused imports in tokenization models and format --- weaviate/tokenization/executor.py | 4 +--- weaviate/tokenization/models.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 825faee05..25b36e1d3 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -112,9 +112,7 @@ def text( f"got {type(words).__name__}" ) if not all(isinstance(w, str) for w in words): - raise ValueError( - f"stopword_presets[{name!r}] must contain only strings" - ) + raise ValueError(f"stopword_presets[{name!r}] must contain only strings") validated[name] = list(words) payload["stopwordPresets"] = validated diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index 017abe429..f8fe7cb67 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -1,11 +1,9 @@ """Return types for tokenization operations.""" -from typing import List, Optional +from typing import List from pydantic import BaseModel, ConfigDict -from weaviate.collections.classes.config import Tokenization - class TokenizeResult(BaseModel): """Result of a tokenization operation. From 3de0955c0520358b5d12f81b094b98ef3d208559 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 21 Apr 2026 15:07:26 +0200 Subject: [PATCH 93/99] Use public classes for .text endpoint --- integration/test_tokenize.py | 48 ++++++++++++-------------- weaviate/classes/config.py | 4 +++ weaviate/classes/tokenization.py | 15 ++++++++ weaviate/collections/classes/config.py | 4 +++ weaviate/tokenization/async_.pyi | 8 ++--- weaviate/tokenization/executor.py | 8 ++--- weaviate/tokenization/sync.pyi | 8 ++--- 7 files changed, 57 insertions(+), 38 deletions(-) create mode 100644 weaviate/classes/tokenization.py diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index c939e8c5b..a5b16da32 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -1,9 +1,9 @@ """Integration tests for the tokenization module. These tests cover the client's responsibilities: -- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate) +- Correct serialization of inputs (enums, TextAnalyzerConfigCreate, StopwordsCreate) - Correct deserialization of responses into the TokenizeResult object -- Client-side validation (_TextAnalyzerConfigCreate, stopwords/stopword_presets mutex) +- Client-side validation (TextAnalyzerConfigCreate, stopwords/stopword_presets mutex) - Version gate (>= 1.37.0) - Both sync and async client paths @@ -20,15 +20,15 @@ import pytest_asyncio import weaviate -from weaviate.collections.classes.config import ( +from weaviate.classes.tokenization import ( + StopwordsCreate, StopwordsPreset, + TextAnalyzerConfigCreate, Tokenization, - _StopwordsCreate, - _TextAnalyzerConfigCreate, + TokenizeResult, ) from weaviate.config import AdditionalConfig from weaviate.exceptions import WeaviateUnsupportedFeatureError -from weaviate.tokenization.models import TokenizeResult @pytest.fixture(scope="module") @@ -140,7 +140,7 @@ def test_tokenization_enum( ( { "text": "The quick brown fox", - "analyzer_config": _TextAnalyzerConfigCreate( + "analyzer_config": TextAnalyzerConfigCreate( stopword_preset=StopwordsPreset.NONE ), }, @@ -150,7 +150,7 @@ def test_tokenization_enum( ( { "text": "L'école est fermée", - "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True), + "analyzer_config": TextAnalyzerConfigCreate(ascii_fold=True), }, ["l", "ecole", "est", "fermee"], ["l", "ecole", "est", "fermee"], @@ -158,7 +158,7 @@ def test_tokenization_enum( ( { "text": "L'école est fermée", - "analyzer_config": _TextAnalyzerConfigCreate( + "analyzer_config": TextAnalyzerConfigCreate( ascii_fold=True, ascii_fold_ignore=["é"] ), }, @@ -168,9 +168,7 @@ def test_tokenization_enum( ( { "text": "The quick brown fox", - "analyzer_config": _TextAnalyzerConfigCreate( - stopword_preset=StopwordsPreset.EN - ), + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN), }, ["the", "quick", "brown", "fox"], ["quick", "brown", "fox"], @@ -178,7 +176,7 @@ def test_tokenization_enum( ( { "text": "The quick brown fox", - "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="en"), + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="en"), }, ["the", "quick", "brown", "fox"], ["quick", "brown", "fox"], @@ -186,7 +184,7 @@ def test_tokenization_enum( ( { "text": "The école est fermée", - "analyzer_config": _TextAnalyzerConfigCreate( + "analyzer_config": TextAnalyzerConfigCreate( ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN, @@ -198,7 +196,7 @@ def test_tokenization_enum( ( { "text": "the quick brown fox", - "stopwords": _StopwordsCreate( + "stopwords": StopwordsCreate( preset=StopwordsPreset.EN, additions=["quick"], removals=None ), }, @@ -208,7 +206,7 @@ def test_tokenization_enum( ( { "text": "the quick hello world", - "stopwords": _StopwordsCreate(preset=None, additions=["hello"], removals=None), + "stopwords": StopwordsCreate(preset=None, additions=["hello"], removals=None), }, ["the", "quick", "hello", "world"], ["quick", "world"], @@ -216,7 +214,7 @@ def test_tokenization_enum( ( { "text": "the quick is fast", - "stopwords": _StopwordsCreate(preset=None, additions=None, removals=["the"]), + "stopwords": StopwordsCreate(preset=None, additions=None, removals=["the"]), }, ["the", "quick", "is", "fast"], ["the", "quick", "fast"], @@ -224,7 +222,7 @@ def test_tokenization_enum( ( { "text": "hello world test", - "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="custom"), + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="custom"), "stopword_presets": {"custom": ["test"]}, }, ["hello", "world", "test"], @@ -276,7 +274,7 @@ def test_text_from_collection_config( result = client.tokenization.text( text="the quick brown fox", tokenization=recipe.tokenization, - stopwords=_StopwordsCreate(**stopwords.__dict__), + stopwords=StopwordsCreate(**stopwords.__dict__), ) assert result.indexed == ["the", "quick", "brown", "fox"] assert result.query == ["brown", "fox"] @@ -294,7 +292,7 @@ def test_property_and_generic_endpoints_agree( via_generic = client.tokenization.text( text=text, tokenization=recipe.tokenization, - stopwords=_StopwordsCreate(**stopwords.__dict__), + stopwords=StopwordsCreate(**stopwords.__dict__), ) assert via_property.indexed == via_generic.indexed @@ -360,7 +358,7 @@ class TestClientSideValidation: ) def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None: with pytest.raises(ValueError, match="asciiFoldIgnore"): - _TextAnalyzerConfigCreate(**kwargs) + TextAnalyzerConfigCreate(**kwargs) @pytest.mark.parametrize( "kwargs,expected", @@ -385,7 +383,7 @@ def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None: ids=["fold_with_ignore", "fold_without_ignore", "stopword_preset_only", "empty"], ) def test_valid_config(self, kwargs: dict, expected: dict) -> None: - cfg = _TextAnalyzerConfigCreate(**kwargs) + cfg = TextAnalyzerConfigCreate(**kwargs) for attr, value in expected.items(): assert getattr(cfg, attr) == value @@ -397,9 +395,7 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli client.tokenization.text( text="hello", tokenization=Tokenization.WORD, - stopwords=_StopwordsCreate( - preset=StopwordsPreset.EN, additions=None, removals=None - ), + stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=None, removals=None), stopword_presets={"custom": ["hello"]}, ) @@ -482,7 +478,7 @@ async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) - async def test_text_with_stopwords_fallback( self, async_client: weaviate.WeaviateAsyncClient ) -> None: - sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None) + sw = StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None) result = await async_client.tokenization.text( text="the quick brown fox", tokenization=Tokenization.WORD, diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py index 868cd1c79..c154062d3 100644 --- a/weaviate/classes/config.py +++ b/weaviate/classes/config.py @@ -11,8 +11,10 @@ ReferenceProperty, ReplicationDeletionStrategy, Rerankers, + StopwordsCreate, StopwordsPreset, TextAnalyzerConfig, + TextAnalyzerConfigCreate, Tokenization, VectorDistances, ) @@ -39,8 +41,10 @@ "PQEncoderType", "ReferenceProperty", "Rerankers", + "StopwordsCreate", "StopwordsPreset", "TextAnalyzerConfig", + "TextAnalyzerConfigCreate", "Tokenization", "Vectorizers", "VectorDistances", diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py new file mode 100644 index 000000000..ffb050614 --- /dev/null +++ b/weaviate/classes/tokenization.py @@ -0,0 +1,15 @@ +from weaviate.collections.classes.config import ( + StopwordsCreate, + StopwordsPreset, + TextAnalyzerConfigCreate, + Tokenization, +) +from weaviate.tokenization.models import TokenizeResult + +__all__ = [ + "StopwordsCreate", + "StopwordsPreset", + "TextAnalyzerConfigCreate", + "Tokenization", + "TokenizeResult", +] diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 6d60482a3..068399d70 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1647,6 +1647,7 @@ class _StopwordsConfig(_ConfigBase): StopwordsConfig = _StopwordsConfig +StopwordsCreate = _StopwordsCreate @dataclass @@ -2224,6 +2225,9 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": return self +TextAnalyzerConfigCreate = _TextAnalyzerConfigCreate + + class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi index 11f4a13fc..59e815d87 100644 --- a/weaviate/tokenization/async_.pyi +++ b/weaviate/tokenization/async_.pyi @@ -1,9 +1,9 @@ from typing import Dict, List, Optional from weaviate.collections.classes.config import ( + StopwordsCreate, + TextAnalyzerConfigCreate, Tokenization, - _StopwordsCreate, - _TextAnalyzerConfigCreate, ) from weaviate.connect.v4 import ConnectionAsync from weaviate.tokenization.models import TokenizeResult @@ -16,7 +16,7 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): text: str, tokenization: Tokenization, *, - analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, - stopwords: Optional[_StopwordsCreate] = None, + analyzer_config: Optional[TextAnalyzerConfigCreate] = None, + stopwords: Optional[StopwordsCreate] = None, stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> TokenizeResult: ... diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 25b36e1d3..a3beffd44 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -5,9 +5,9 @@ from httpx import Response from weaviate.collections.classes.config import ( + StopwordsCreate, + TextAnalyzerConfigCreate, Tokenization, - _StopwordsCreate, - _TextAnalyzerConfigCreate, ) from weaviate.connect import executor from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes @@ -32,8 +32,8 @@ def text( text: str, tokenization: Tokenization, *, - analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, - stopwords: Optional[_StopwordsCreate] = None, + analyzer_config: Optional[TextAnalyzerConfigCreate] = None, + stopwords: Optional[StopwordsCreate] = None, stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> executor.Result[TokenizeResult]: """Tokenize text using the generic /v1/tokenize endpoint. diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi index d931aae51..2c2470f85 100644 --- a/weaviate/tokenization/sync.pyi +++ b/weaviate/tokenization/sync.pyi @@ -1,9 +1,9 @@ from typing import Dict, List, Optional from weaviate.collections.classes.config import ( + StopwordsCreate, + TextAnalyzerConfigCreate, Tokenization, - _StopwordsCreate, - _TextAnalyzerConfigCreate, ) from weaviate.connect.v4 import ConnectionSync from weaviate.tokenization.models import TokenizeResult @@ -16,7 +16,7 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]): text: str, tokenization: Tokenization, *, - analyzer_config: Optional[_TextAnalyzerConfigCreate] = None, - stopwords: Optional[_StopwordsCreate] = None, + analyzer_config: Optional[TextAnalyzerConfigCreate] = None, + stopwords: Optional[StopwordsCreate] = None, stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> TokenizeResult: ... From 55b136adfd37f8289b1aa9ffd3816335b25fd599 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 21 Apr 2026 15:40:47 +0200 Subject: [PATCH 94/99] Add overloads for exclusivity of stopwrods --- integration/test_tokenize.py | 13 ++--- weaviate/tokenization/async_.pyi | 17 +++++-- weaviate/tokenization/executor.py | 83 ++++++++++++++++++++++++------- weaviate/tokenization/sync.pyi | 17 +++++-- 4 files changed, 93 insertions(+), 37 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index a5b16da32..dc244d2c3 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -274,7 +274,7 @@ def test_text_from_collection_config( result = client.tokenization.text( text="the quick brown fox", tokenization=recipe.tokenization, - stopwords=StopwordsCreate(**stopwords.__dict__), + stopwords=stopwords, ) assert result.indexed == ["the", "quick", "brown", "fox"] assert result.query == ["brown", "fox"] @@ -292,7 +292,7 @@ def test_property_and_generic_endpoints_agree( via_generic = client.tokenization.text( text=text, tokenization=recipe.tokenization, - stopwords=StopwordsCreate(**stopwords.__dict__), + stopwords=stopwords, ) assert via_property.indexed == via_generic.indexed @@ -308,13 +308,6 @@ def test_property_and_generic_endpoints_agree( class TestDeserialization: """Verify the client correctly deserializes response fields into TokenizeResult.""" - def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None: - """Generic endpoint response deserializes into TokenizeResult with indexed and query lists.""" - result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD) - assert isinstance(result, TokenizeResult) - assert isinstance(result.indexed, list) - assert isinstance(result.query, list) - def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None: """Property endpoint response deserializes into TokenizeResult — server resolves tokenization from the property's schema.""" client.collections.delete("TestDeserPropTypes") @@ -405,7 +398,7 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli ({"custom": "hello"}, "must be a list of strings"), ( { - "custom": _StopwordsCreate( + "custom": StopwordsCreate( preset=StopwordsPreset.EN, additions=None, removals=None ), }, diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi index 59e815d87..6bd2d9e8a 100644 --- a/weaviate/tokenization/async_.pyi +++ b/weaviate/tokenization/async_.pyi @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, overload from weaviate.collections.classes.config import ( StopwordsCreate, @@ -11,12 +11,21 @@ from weaviate.tokenization.models import TokenizeResult from .executor import _TokenizationExecutor class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): + @overload async def text( self, text: str, tokenization: Tokenization, *, - analyzer_config: Optional[TextAnalyzerConfigCreate] = None, - stopwords: Optional[StopwordsCreate] = None, - stopword_presets: Optional[Dict[str, List[str]]] = None, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[StopwordsCreate] = ..., + ) -> TokenizeResult: ... + @overload + async def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., ) -> TokenizeResult: ... diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index a3beffd44..150cc6dd9 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -1,6 +1,6 @@ """Tokenize executor.""" -from typing import Any, Dict, Generic, List, Optional +from typing import Any, Dict, Generic, List, Optional, overload from httpx import Response @@ -27,6 +27,29 @@ def __check_version(self) -> None: "1.37.0", ) + # Overloads make ``stopwords`` and ``stopword_presets`` mutually exclusive + # at type-check time. Passing both is additionally rejected at runtime with + # ``ValueError`` in the implementation below. + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[StopwordsCreate] = ..., + ) -> executor.Result[TokenizeResult]: ... + + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., + ) -> executor.Result[TokenizeResult]: ... + def text( self, text: str, @@ -40,33 +63,55 @@ def text( For ``word`` tokenization the server defaults to the built-in ``en`` stopword preset when no stopword configuration is supplied. Pass - ``analyzer_config=Configure.text_analyzer(stopword_preset=StopwordsPreset.NONE)`` - (or equivalent) to opt out. + ``analyzer_config=TextAnalyzerConfigCreate(stopword_preset="none")`` + or equivalent to opt out. + + Call patterns for stopword handling (``stopwords`` and + ``stopword_presets`` are mutually exclusive — pass at most one): + + 1. **No stopword config** — rely on the server default (``en`` for + word tokenization, none otherwise):: + + client.tokenization.text(text=..., tokenization=Tokenization.WORD) + + 2. **Apply a one-off stopwords block** via ``stopwords`` — the block + filters the query tokens directly, same shape as a collection's + ``invertedIndexConfig.stopwords``:: + + client.tokenization.text( + text=..., + tokenization=Tokenization.WORD, + stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=["foo"]), + ) + + 3. **Register a named-preset catalog** via ``stopword_presets`` and + reference one by name from ``analyzer_config.stopword_preset``. + The catalog can also override built-in presets such as ``en``:: + + client.tokenization.text( + text=..., + tokenization=Tokenization.WORD, + analyzer_config=TextAnalyzerConfigCreate(stopword_preset="custom"), + stopword_presets={"custom": ["foo", "bar"]}, + ) Args: text: The text to tokenize. - tokenization: The tokenization method to use (e.g. Tokenization.WORD). + tokenization: The tokenization method to use (e.g. ``Tokenization.WORD``). analyzer_config: Text analyzer settings (ASCII folding, stopword preset name), built via ``Configure.text_analyzer(...)``. ``stopword_preset`` may reference a built-in preset (``en`` / ``none``) or a name defined in ``stopword_presets``. - stopwords: Fallback stopword config applied when - ``analyzer_config.stopword_preset`` is not set. Same shape as a - collection's ``invertedIndexConfig.stopwords`` — a base preset - optionally tweaked with ``additions`` / ``removals``. An empty - ``preset`` defaults to ``en``. - stopword_presets: User-defined named stopword presets, each a plain - list of words. A name matching a built-in (``en`` / ``none``) - replaces the built-in entirely. - - Note: - ``stopwords`` and ``stopword_presets`` are mutually exclusive on the - server — pass one or the other, not both. The server returns HTTP - 422 if both are supplied. + stopwords: One-off stopwords block applied directly to this request. + Mutually exclusive with ``stopword_presets``. + stopword_presets: Named-preset catalog (name → word list). Entries + can be referenced from ``analyzer_config.stopword_preset`` or + override built-ins like ``en``. Mutually exclusive with + ``stopwords``. Returns: - A TokenizeResult with indexed and query token lists. The response - does not echo request fields back. + A ``TokenizeResult`` with indexed and query token lists. The generic + endpoint does not echo request fields back in the response. Raises: WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi index 2c2470f85..7edf8994a 100644 --- a/weaviate/tokenization/sync.pyi +++ b/weaviate/tokenization/sync.pyi @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, overload from weaviate.collections.classes.config import ( StopwordsCreate, @@ -11,12 +11,21 @@ from weaviate.tokenization.models import TokenizeResult from .executor import _TokenizationExecutor class _Tokenization(_TokenizationExecutor[ConnectionSync]): + @overload def text( self, text: str, tokenization: Tokenization, *, - analyzer_config: Optional[TextAnalyzerConfigCreate] = None, - stopwords: Optional[StopwordsCreate] = None, - stopword_presets: Optional[Dict[str, List[str]]] = None, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[StopwordsCreate] = ..., + ) -> TokenizeResult: ... + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., ) -> TokenizeResult: ... From 7924e457cdd3315325db9a9e7ce2cc51bf7d6d04 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 21 Apr 2026 15:58:08 +0200 Subject: [PATCH 95/99] Accept collection config classes as stopwords --- weaviate/classes/tokenization.py | 2 ++ weaviate/collections/classes/config.py | 20 ++++++++++++++++++++ weaviate/tokenization/async_.pyi | 5 +++-- weaviate/tokenization/executor.py | 21 +++++++++++++++++---- weaviate/tokenization/sync.pyi | 5 +++-- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py index ffb050614..0e89fc64b 100644 --- a/weaviate/classes/tokenization.py +++ b/weaviate/classes/tokenization.py @@ -1,4 +1,5 @@ from weaviate.collections.classes.config import ( + StopwordsConfig, StopwordsCreate, StopwordsPreset, TextAnalyzerConfigCreate, @@ -7,6 +8,7 @@ from weaviate.tokenization.models import TokenizeResult __all__ = [ + "StopwordsConfig", "StopwordsCreate", "StopwordsPreset", "TextAnalyzerConfigCreate", diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 068399d70..43d86375d 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1,5 +1,6 @@ import datetime from dataclasses import dataclass +from dataclasses import fields as _dataclass_fields from typing import ( Any, ClassVar, @@ -1649,6 +1650,25 @@ class _StopwordsConfig(_ConfigBase): StopwordsConfig = _StopwordsConfig StopwordsCreate = _StopwordsCreate +# Invariant: the read-side dataclass (_StopwordsConfig) and the write-side +# pydantic model (_StopwordsCreate) must carry the same set of field names so +# that values round-tripped from ``collection.config.get()`` can flow back into +# ``tokenization.text()`` without silent data loss. If a field is added to one +# but not the other, importing this module fails loudly; the read→write +# conversion in ``weaviate/tokenization/executor.py::_TokenizationExecutor.text`` +# depends on this parity. +_read_fields = {f.name for f in _dataclass_fields(_StopwordsConfig)} +_write_fields = set(_StopwordsCreate.model_fields.keys()) +if _read_fields != _write_fields: + raise RuntimeError( + "_StopwordsConfig / _StopwordsCreate field drift detected — " + f"read-only={_read_fields - _write_fields}, " + f"write-only={_write_fields - _read_fields}. " + "Update both classes together, or adapt the read→write conversion in " + "weaviate/tokenization/executor.py::_TokenizationExecutor.text." + ) +del _read_fields, _write_fields + @dataclass class _InvertedIndexConfig(_ConfigBase): diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi index 6bd2d9e8a..156e25c90 100644 --- a/weaviate/tokenization/async_.pyi +++ b/weaviate/tokenization/async_.pyi @@ -1,6 +1,7 @@ -from typing import Dict, List, Optional, overload +from typing import Dict, List, Optional, Union, overload from weaviate.collections.classes.config import ( + StopwordsConfig, StopwordsCreate, TextAnalyzerConfigCreate, Tokenization, @@ -18,7 +19,7 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): tokenization: Tokenization, *, analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., - stopwords: Optional[StopwordsCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., ) -> TokenizeResult: ... @overload async def text( diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 150cc6dd9..ea36e1cda 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -1,10 +1,11 @@ """Tokenize executor.""" -from typing import Any, Dict, Generic, List, Optional, overload +from typing import Any, Dict, Generic, List, Optional, Union, overload from httpx import Response from weaviate.collections.classes.config import ( + StopwordsConfig, StopwordsCreate, TextAnalyzerConfigCreate, Tokenization, @@ -29,7 +30,10 @@ def __check_version(self) -> None: # Overloads make ``stopwords`` and ``stopword_presets`` mutually exclusive # at type-check time. Passing both is additionally rejected at runtime with - # ``ValueError`` in the implementation below. + # ``ValueError`` in the implementation below. ``stopwords`` accepts either a + # ``StopwordsCreate`` (the write-side shape) or a ``StopwordsConfig`` (the + # read-side shape returned by ``collection.config.get()``), so values round- + # tripped through config reads can be passed back in directly. @overload def text( self, @@ -37,7 +41,7 @@ def text( tokenization: Tokenization, *, analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., - stopwords: Optional[StopwordsCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., ) -> executor.Result[TokenizeResult]: ... @overload @@ -56,7 +60,7 @@ def text( tokenization: Tokenization, *, analyzer_config: Optional[TextAnalyzerConfigCreate] = None, - stopwords: Optional[StopwordsCreate] = None, + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = None, stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> executor.Result[TokenizeResult]: """Tokenize text using the generic /v1/tokenize endpoint. @@ -134,6 +138,15 @@ def text( payload["analyzerConfig"] = ac_dict if stopwords is not None: + if isinstance(stopwords, StopwordsConfig): + # Widen from the read-side shape returned by config.get() to the + # write-side shape the server expects. Field parity between the + # two classes is enforced at import time in + # ``weaviate/collections/classes/config.py``, so iterating + # ``StopwordsCreate.model_fields`` copies every field. + stopwords = StopwordsCreate( + **{name: getattr(stopwords, name) for name in StopwordsCreate.model_fields} + ) sw_dict = stopwords._to_dict() if sw_dict: payload["stopwords"] = sw_dict diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi index 7edf8994a..389edd485 100644 --- a/weaviate/tokenization/sync.pyi +++ b/weaviate/tokenization/sync.pyi @@ -1,6 +1,7 @@ -from typing import Dict, List, Optional, overload +from typing import Dict, List, Optional, Union, overload from weaviate.collections.classes.config import ( + StopwordsConfig, StopwordsCreate, TextAnalyzerConfigCreate, Tokenization, @@ -18,7 +19,7 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]): tokenization: Tokenization, *, analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., - stopwords: Optional[StopwordsCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., ) -> TokenizeResult: ... @overload def text( From 64bed62ea2dc6f3a05984d1ae4ce0700600027c7 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 21 Apr 2026 16:24:06 +0200 Subject: [PATCH 96/99] Improve docstring --- weaviate/tokenization/executor.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index ea36e1cda..0d287ba0e 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -107,11 +107,15 @@ def text( ``stopword_preset`` may reference a built-in preset (``en`` / ``none``) or a name defined in ``stopword_presets``. stopwords: One-off stopwords block applied directly to this request. + Mirrors the collection-level ``invertedIndexConfig.stopwords`` + shape — hence the rich model with preset / additions / removals. Mutually exclusive with ``stopword_presets``. - stopword_presets: Named-preset catalog (name → word list). Entries - can be referenced from ``analyzer_config.stopword_preset`` or - override built-ins like ``en``. Mutually exclusive with - ``stopwords``. + stopword_presets: Named-preset catalog (name → word list). Mirrors + the property-level preset catalog — a plain mapping, since a + property only references a preset by name (via + ``analyzer_config.stopword_preset``) rather than carrying the + full stopwords block. Entries can override built-ins like + ``en``. Mutually exclusive with ``stopwords``. Returns: A ``TokenizeResult`` with indexed and query token lists. The generic From 220e839360848a8c67b7eab322a97b232d12d5c9 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 21 Apr 2026 16:26:17 +0200 Subject: [PATCH 97/99] Hook up tokenization and clean up model --- weaviate/classes/__init__.py | 2 ++ weaviate/tokenization/models.py | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/weaviate/classes/__init__.py b/weaviate/classes/__init__.py index d495744ac..69af5d920 100644 --- a/weaviate/classes/__init__.py +++ b/weaviate/classes/__init__.py @@ -13,6 +13,7 @@ rbac, replication, tenants, + tokenization, ) # noqa: F401 from .config import ConsistencyLevel @@ -29,6 +30,7 @@ "init", "query", "tenants", + "tokenization", "rbac", "replication", ] diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py index f8fe7cb67..baeac140c 100644 --- a/weaviate/tokenization/models.py +++ b/weaviate/tokenization/models.py @@ -2,7 +2,7 @@ from typing import List -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel class TokenizeResult(BaseModel): @@ -13,7 +13,5 @@ class TokenizeResult(BaseModel): query: Tokens as they would be used for querying (after stopword removal). """ - model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) - indexed: List[str] query: List[str] From 081aaef36f83890eeb839e44363851477c44d1ca Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Wed, 22 Apr 2026 16:54:36 +0200 Subject: [PATCH 98/99] Move property back to tokenization --- integration/test_tokenize.py | 18 ++++++----- weaviate/collections/config/async_.pyi | 2 -- weaviate/collections/config/executor.py | 40 ------------------------- weaviate/collections/config/sync.pyi | 2 -- weaviate/tokenization/async_.pyi | 3 ++ weaviate/tokenization/executor.py | 40 +++++++++++++++++++++++++ weaviate/tokenization/sync.pyi | 1 + 7 files changed, 54 insertions(+), 52 deletions(-) diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py index dc244d2c3..d2d46916d 100644 --- a/integration/test_tokenize.py +++ b/integration/test_tokenize.py @@ -288,7 +288,9 @@ def test_property_and_generic_endpoints_agree( stopwords = config.inverted_index_config.stopwords text = "the quick brown fox" - via_property = recipe_collection.config.tokenize_property(property_name="recipe", text=text) + via_property = client.tokenization.for_property( + collection=recipe_collection.name, property_name="recipe", text=text + ) via_generic = client.tokenization.text( text=text, tokenization=recipe.tokenization, @@ -325,8 +327,9 @@ def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None: ], } ) - col = client.collections.get("TestDeserPropTypes") - result = col.config.tokenize_property(property_name="tag", text=" Hello World ") + result = client.tokenization.for_property( + collection="TestDeserPropTypes", property_name="tag", text=" Hello World " + ) assert isinstance(result, TokenizeResult) assert result.indexed == ["Hello World"] finally: @@ -442,9 +445,8 @@ def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> Non def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: if client._connection._weaviate_version.is_at_least(1, 37, 0): pytest.skip("Version gate only applies to Weaviate < 1.37.0") - col = client.collections.get("Any") with pytest.raises(WeaviateUnsupportedFeatureError): - col.config.tokenize_property(property_name="title", text="hello") + client.tokenization.for_property(collection="Any", property_name="title", text="hello") # --------------------------------------------------------------------------- @@ -454,7 +456,7 @@ def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateC @pytest.mark.usefixtures("require_1_37") class TestAsyncClient: - """Verify text() and tokenize_property() work through the async client.""" + """Verify tokenization.text() and tokenization.for_property() work through the async client.""" @pytest.mark.asyncio async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: @@ -498,8 +500,8 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien ], } ) - col = async_client.collections.get("TestAsyncPropTokenize") - result = await col.config.tokenize_property( + result = await async_client.tokenization.for_property( + collection="TestAsyncPropTokenize", property_name="title", text="The quick brown fox", ) diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi index a1f740ded..015b70dab 100644 --- a/weaviate/collections/config/async_.pyi +++ b/weaviate/collections/config/async_.pyi @@ -27,7 +27,6 @@ from weaviate.collections.classes.config import ( from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate from weaviate.connect.v4 import ConnectionAsync -from weaviate.tokenization.models import TokenizeResult from .executor import _ConfigCollectionExecutor @@ -91,4 +90,3 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]): self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]] ) -> None: ... async def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ... - async def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ... diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index fe9f5ec0d..103ab70ac 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -56,7 +56,6 @@ WeaviateInvalidInputError, WeaviateUnsupportedFeatureError, ) -from weaviate.tokenization.models import TokenizeResult from weaviate.util import ( _capitalize_first_letter, _decode_json_response_dict, @@ -667,42 +666,3 @@ def resp(res: Response) -> bool: error_msg="Property may not exist", status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"), ) - - def tokenize_property( - self, - property_name: str, - text: str, - ) -> executor.Result[TokenizeResult]: - """Tokenize text using a property's configured tokenization settings. - - Args: - property_name: The property name whose tokenization config to use. - text: The text to tokenize. - - Returns: - A TokenizeResult with indexed and query token lists. - - Raises: - WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. - """ - if self._connection._weaviate_version.is_lower_than(1, 37, 0): - raise WeaviateUnsupportedFeatureError( - "Tokenization", - str(self._connection._weaviate_version), - "1.37.0", - ) - - path = f"/schema/{self._name}/properties/{property_name}/tokenize" - payload: Dict[str, Any] = {"text": text} - - def resp(response: Response) -> TokenizeResult: - return TokenizeResult.model_validate(response.json()) - - return executor.execute( - response_callback=resp, - method=self._connection.post, - path=path, - weaviate_object=payload, - error_msg="Property tokenization failed", - status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), - ) diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi index 3664a0e1b..e54d8c8fc 100644 --- a/weaviate/collections/config/sync.pyi +++ b/weaviate/collections/config/sync.pyi @@ -27,7 +27,6 @@ from weaviate.collections.classes.config import ( from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate from weaviate.connect.v4 import ConnectionSync -from weaviate.tokenization.models import TokenizeResult from .executor import _ConfigCollectionExecutor @@ -89,4 +88,3 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]): self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]] ) -> None: ... def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ... - def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ... diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi index 156e25c90..ba12abc2a 100644 --- a/weaviate/tokenization/async_.pyi +++ b/weaviate/tokenization/async_.pyi @@ -30,3 +30,6 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., stopword_presets: Optional[Dict[str, List[str]]] = ..., ) -> TokenizeResult: ... + async def for_property( + self, collection: str, property_name: str, text: str + ) -> TokenizeResult: ... diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py index 0d287ba0e..33f1c05f9 100644 --- a/weaviate/tokenization/executor.py +++ b/weaviate/tokenization/executor.py @@ -14,6 +14,7 @@ from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes from weaviate.exceptions import WeaviateUnsupportedFeatureError from weaviate.tokenization.models import TokenizeResult +from weaviate.util import _capitalize_first_letter class _TokenizationExecutor(Generic[ConnectionType]): @@ -189,3 +190,42 @@ def resp(response: Response) -> TokenizeResult: error_msg="Tokenization failed", status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), ) + + def for_property( + self, + collection: str, + property_name: str, + text: str, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using a property's configured tokenization settings. + + The server resolves the tokenization and analyzer configuration from + the property's schema, so callers only supply the text. + + Args: + collection: The collection that owns the property. + property_name: The property name whose tokenization config to use. + text: The text to tokenize. + + Returns: + A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + """ + self.__check_version() + + path = f"/schema/{_capitalize_first_letter(collection)}/properties/{property_name}/tokenize" + payload: Dict[str, Any] = {"text": text} + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path=path, + weaviate_object=payload, + error_msg="Property tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), + ) diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi index 389edd485..71aaaea5c 100644 --- a/weaviate/tokenization/sync.pyi +++ b/weaviate/tokenization/sync.pyi @@ -30,3 +30,4 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]): analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., stopword_presets: Optional[Dict[str, List[str]]] = ..., ) -> TokenizeResult: ... + def for_property(self, collection: str, property_name: str, text: str) -> TokenizeResult: ... From 846344f5f2f541fc18eb23f3d013a74c1cdc078d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Thu, 23 Apr 2026 10:10:50 +0100 Subject: [PATCH 99/99] Add changelog --- docs/changelog.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9ea29ab29..d84a58c83 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,21 @@ Changelog ========= +Version 4.21.0 +-------------- +This minor version includes: + - Support for new 1.37 features: + - Add support for the new ``blobHash`` property data type + - Add support for returning profiling when making queries with the ``return_metadata=["query_profile"]`` parameter + - Add support for on-demaned tokenization through the ``client.tokenize`` namespace + - Add support for managing permissions for accessing the native MCP server + - Add support for collection export + - Add support for incremental backups + - Minor bug fixes and improvements: + - Change ``alpha`` queries and aggregations to use server-side default parameter + - Fixes rare flakey behaviour of ``client.batch.stream`` on server hangup + + Version 4.20.5 -------------- This patch version includes: