diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8535e67b4..a1ff94f98 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -26,9 +26,9 @@ env: WEAVIATE_132: 1.32.27 WEAVIATE_133: 1.33.18 WEAVIATE_134: 1.34.19 - WEAVIATE_135: 1.35.15 - WEAVIATE_136: 1.36.6-8edcf08.amd64 - WEAVIATE_137: 1.37.0-dev-29d5c87.amd64 + WEAVIATE_135: 1.35.18 + WEAVIATE_136: 1.36.12 + WEAVIATE_137: 1.37.1-4e61e26.amd64 jobs: lint-and-format: diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 20840fafa..ddc92d0ed 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -31,6 +31,8 @@ services: DISABLE_LAZY_LOAD_SHARDS: 'true' GRPC_MAX_MESSAGE_SIZE: 100000000 # 100mb OBJECTS_TTL_DELETE_SCHEDULE: "@every 12h" # for objectTTL tests to work + EXPORT_ENABLED: 'true' + EXPORT_DEFAULT_PATH: "/var/lib/weaviate/exports" contextionary: environment: diff --git a/docs/changelog.rst b/docs/changelog.rst index 9ea29ab29..d84a58c83 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,21 @@ Changelog ========= +Version 4.21.0 +-------------- +This minor version includes: + - Support for new 1.37 features: + - Add support for the new ``blobHash`` property data type + - Add support for returning profiling when making queries with the ``return_metadata=["query_profile"]`` parameter + - Add support for on-demaned tokenization through the ``client.tokenize`` namespace + - Add support for managing permissions for accessing the native MCP server + - Add support for collection export + - Add support for incremental backups + - Minor bug fixes and improvements: + - Change ``alpha`` queries and aggregations to use server-side default parameter + - Fixes rare flakey behaviour of ``client.batch.stream`` on server hangup + + Version 4.20.5 -------------- This patch version includes: diff --git a/integration/conftest.py b/integration/conftest.py index 256517ea6..73a032f57 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -1,5 +1,9 @@ import os import time +import sys +import threading +import traceback +import pytest from typing import ( Any, AsyncGenerator, @@ -14,7 +18,6 @@ ) from typing import Callable, TypeVar -import pytest import pytest_asyncio from _pytest.fixtures import SubRequest @@ -500,3 +503,66 @@ def retry_on_http_error( raise # This should never be reached, but satisfies the type checker raise last_exception # type: ignore + + +TIMEOUT_SECONDS = 300 + + +def dump_all_stacks(): + frames = sys._current_frames() + lines = ["\n===== DEADLOCK DETECTED — THREAD DUMP =====\n"] + for thread in threading.enumerate(): + frame = frames.get(thread.ident) # pyright: ignore + lines.append(f"\n--- Thread: {thread.name} (id={thread.ident}) ---") + if frame: + lines.append("".join(traceback.format_stack(frame))) + else: + lines.append(" (no frame)\n") + lines.append("===========================================\n") + return "\n".join(lines) + + +class DeadlockWatchdog: + def __init__(self, timeout): + self.timeout = timeout + self._timer = None + + def start(self, label): + self._label = label + self._timer = threading.Timer(self.timeout, self._on_timeout) + self._timer.daemon = True + self._timer.start() + + def stop(self): + if self._timer: + self._timer.cancel() + self._timer = None + + def _on_timeout(self): + sys.stderr.write(f"\n[WATCHDOG] Hung at: '{self._label}' after {self.timeout}s\n") + sys.stderr.write(dump_all_stacks()) + sys.stderr.flush() + os._exit(1) # Hard kill — works reliably in xdist workers + + +_watchdog = DeadlockWatchdog(TIMEOUT_SECONDS) + + +# Covers setup + call + teardown +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_protocol(item, nextitem): + _watchdog.start(item.nodeid) + try: + yield + finally: + _watchdog.stop() + + +# Separately watch session-scoped fixture setup +@pytest.hookimpl(hookwrapper=True) +def pytest_sessionstart(session): + _watchdog.start("session startup / session-scoped fixtures") + try: + yield + finally: + _watchdog.stop() diff --git a/integration/test_batch_v4.py b/integration/test_batch_v4.py index f4ce7669e..3646f1b1f 100644 --- a/integration/test_batch_v4.py +++ b/integration/test_batch_v4.py @@ -718,7 +718,7 @@ def test_non_existant_collection(client_factory: ClientFactory) -> None: # not, so we do not check for errors here -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_number_of_stored_results_in_batch(client_factory: ClientFactory) -> None: client, name = client_factory() with client.batch.dynamic() as batch: @@ -818,7 +818,7 @@ def test_references_with_to_uuids(client_factory: ClientFactory) -> None: @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_add_one_hundred_thousand_objects_async_client( async_client_factory: AsyncClientFactory, ) -> None: @@ -849,7 +849,7 @@ async def test_add_one_hundred_thousand_objects_async_client( await client.collections.delete(name) -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_add_one_hundred_thousand_objects_sync_client( client_factory: ClientFactory, ) -> None: diff --git a/integration/test_client.py b/integration/test_client.py index b72f53655..3560cbbb7 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -334,7 +334,7 @@ def test_collection_name_capitalization( client.collections.delete(name_big) -def test_client_cluster_with_lazy_shard_loading( +def test_client_cluster_without_lazy_shard_loading( client: weaviate.WeaviateClient, request: SubRequest ) -> None: try: diff --git a/integration/test_collection_batch.py b/integration/test_collection_batch.py index e670e4883..f2bd5be61 100644 --- a/integration/test_collection_batch.py +++ b/integration/test_collection_batch.py @@ -271,7 +271,7 @@ def test_non_existant_collection(collection_factory_get: CollectionFactoryGet) - @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_batch_one_hundred_thousand_objects_async_collection( batch_collection_async: BatchCollectionAsync, ) -> None: @@ -299,7 +299,7 @@ async def test_batch_one_hundred_thousand_objects_async_collection( @pytest.mark.asyncio -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) async def test_ingest_one_hundred_thousand_data_objects_async( batch_collection_async: BatchCollectionAsync, ) -> None: @@ -321,7 +321,7 @@ async def test_ingest_one_hundred_thousand_data_objects_async( assert len(results.errors) == 0, [obj.message for obj in results.errors.values()] -@pytest.mark.timeout(600) +@pytest.mark.timeout(60) def test_ingest_one_hundred_thousand_data_objects( batch_collection: BatchCollection, ) -> None: diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index d634e66cb..3f33a30b7 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -42,7 +42,11 @@ IndexName, ) from weaviate.collections.classes.tenants import Tenant -from weaviate.exceptions import UnexpectedStatusCodeError, WeaviateInvalidInputError +from weaviate.exceptions import ( + UnexpectedStatusCodeError, + WeaviateInvalidInputError, + WeaviateUnsupportedFeatureError, +) from integration.conftest import retry_on_http_error @@ -2200,3 +2204,473 @@ def test_delete_property_index( assert config.properties[0].index_range_filters is False assert config.properties[0].index_searchable is _index_searchable assert config.properties[0].index_filterable is _index_filterable + + +def test_property_text_analyzer_ascii_fold_version_gate( + collection_factory: CollectionFactory, +) -> None: + """On Weaviate < 1.37 the client must raise before sending the request.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + + with pytest.raises(WeaviateUnsupportedFeatureError): + collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(ascii_fold=True), + ), + ], + ) + + +def test_collection_stopword_presets(collection_factory: CollectionFactory) -> None: + """User-defined stopword presets apply to properties that reference them. + + Properties can reference user-defined presets via text_analyzer.stopword_preset, + and built-in presets can coexist with user-defined ones. + """ + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + # User-defined French preset. + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + # Built-in English preset, set per property. + Property( + name="title_en", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset=StopwordsPreset.EN), + ), + # No stopword override → uses the collection-level default. + Property( + name="plain", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + title_fr = next(p for p in config.properties if p.name == "title_fr") + title_en = next(p for p in config.properties if p.name == "title_en") + plain = next(p for p in config.properties if p.name == "plain") + assert title_fr.text_analyzer is not None + assert title_fr.text_analyzer.stopword_preset == "fr" + assert title_en.text_analyzer is not None + assert title_en.text_analyzer.stopword_preset == "en" + assert plain.text_analyzer is None + + +def test_collection_stopword_presets_update(collection_factory: CollectionFactory) -> None: + """Updating a stopword preset is reflected in the config.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le"]}, + ), + properties=[ + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le"]} + + collection.config.update( + inverted_index_config=Reconfigure.inverted_index( + stopword_presets={"fr": ["la"]}, + ), + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["la"]} + + +def test_collection_stopword_presets_remove_in_use_is_rejected( + collection_factory: CollectionFactory, +) -> None: + """The server rejects removing a stopword preset still referenced by a property.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + ], + ) + + with pytest.raises(UnexpectedStatusCodeError): + collection.config.update( + inverted_index_config=Reconfigure.inverted_index(stopword_presets={}), + ) + + # The original preset must still be present after the rejected update. + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_inverted_index_stopword_presets_version_gate( + collection_factory: CollectionFactory, +) -> None: + """On Weaviate < 1.37 the client must raise before sending the request.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + + with pytest.raises(WeaviateUnsupportedFeatureError): + collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la"]}, + ), + ) + + +def test_collection_stopword_presets_remove_unused_is_allowed( + collection_factory: CollectionFactory, +) -> None: + """Removing a preset that no property references must succeed.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + ], + ) + + # Drop only 'es' (unused). 'fr' is still referenced by title. + collection.config.update( + inverted_index_config=Reconfigure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + ) + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_collection_stopword_presets_remove_referenced_by_nested_property_is_rejected( + collection_factory: CollectionFactory, +) -> None: + """A removed preset still referenced by a nested property must be rejected by the server.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="doc", + data_type=DataType.OBJECT, + nested_properties=[ + Property( + name="body", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + ], + ), + ], + ) + + with pytest.raises(UnexpectedStatusCodeError): + collection.config.update( + inverted_index_config=Reconfigure.inverted_index(stopword_presets={}), + ) + + # The original preset must still be present after the rejected update. + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + + +def test_collection_user_defined_stopword_preset_overrides_builtin( + collection_factory: CollectionFactory, +) -> None: + """A user-defined preset named 'en' is accepted and reflected in the config.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"en": ["hello"]}, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="en"), + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"en": ["hello"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.stopword_preset == "en" + + +def test_property_text_analyzer_combined_ascii_fold_and_stopword_preset( + collection_factory: CollectionFactory, +) -> None: + """A single property may combine ascii_fold and stopword_preset.""" + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer( + ascii_fold=True, + stopword_preset=StopwordsPreset.EN, + ), + ), + ], + ) + + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.stopword_preset == "en" + + +def test_property_text_analyzer_ascii_fold_immutable( + collection_factory: CollectionFactory, +) -> None: + """The asciiFold setting is immutable on an existing property. + + Adding a new property via add_property is the only way to introduce a different + analyzer; the original property's analyzer cannot be mutated. + """ + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["é"]), + ), + ], + ) + + # The config exposes the original ignore list and there's no client API + # surface to mutate text_analyzer on an existing property — it can only be + # set at create time. + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + # Adding a *new* property with a different analyzer is allowed. + collection.config.add_property( + Property( + name="title2", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), + ), + ) + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + title2 = next(p for p in config.properties if p.name == "title2") + # Original property's analyzer is unchanged. + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold_ignore == ["é"] + # New property has its own analyzer. + assert title2.text_analyzer is not None + assert title2.text_analyzer.ascii_fold_ignore == ["ñ"] + + +def test_stopwords_roundtrip_from_dict(collection_factory: CollectionFactory) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopwords_additions=["a"], + stopwords_preset=StopwordsPreset.EN, + stopwords_removals=["the"], + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" + ), + ), + ], + ) + config = collection.config.get() + assert config.inverted_index_config.stopwords.preset == StopwordsPreset.EN + assert config.inverted_index_config.stopwords.removals == ["the"] + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "fr" + + name = f"TestStopwordsRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) + + +def test_stopword_presets_roundtrip_from_dict( + collection_factory: CollectionFactory, +) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("stopword_presets requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + inverted_index_config=Configure.inverted_index( + stopword_presets={"fr": ["le", "la", "les"]}, + ), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ), + ], + ) + + config = collection.config.get() + assert config.inverted_index_config.stopword_presets == {"fr": ["le", "la", "les"]} + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.stopword_preset == "fr" + + name = f"TestPresetRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) + + +def test_text_analyzer_roundtrip_from_dict( + collection_factory: CollectionFactory, +) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("text_analyzer requires Weaviate >= 1.37.0") + + collection = collection_factory( + vectorizer_config=Configure.Vectorizer.none(), + properties=[ + Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer( + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ), + ), + ], + ) + + config = collection.config.get() + title = next(p for p in config.properties if p.name == "title") + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "en" + + name = f"TestAnalyzerRoundtrip{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) diff --git a/integration/test_collection_query_profile.py b/integration/test_collection_query_profile.py new file mode 100644 index 000000000..da93b90f5 --- /dev/null +++ b/integration/test_collection_query_profile.py @@ -0,0 +1,253 @@ +import re +from typing import Any + +import pytest + +from weaviate.collections import Collection +from weaviate.collections.classes.config import DataType, Property +from weaviate.collections.classes.data import DataObject +from weaviate.collections.classes.grpc import GroupBy, MetadataQuery +from weaviate.collections.classes.internal import SearchProfileReturn +from integration.conftest import CollectionFactory + +GO_DURATION_RE = re.compile(r"[\d.]+(ns|µs|ms|s|m|h)") + + +def assert_go_duration(value: str, label: str = "") -> None: + """Assert that a string looks like a Go duration (e.g. '1.234ms', '5.458µs').""" + assert GO_DURATION_RE.fullmatch(value), ( + f"Expected Go duration format for {label!r}, got {value!r}" + ) + + +def assert_common_profile(profile: SearchProfileReturn) -> None: + """Assertions shared by every search profile regardless of type.""" + assert len(profile.details) > 0, "Profile details should not be empty" + assert "total_took" in profile.details + assert_go_duration(profile.details["total_took"], "total_took") + for key, value in profile.details.items(): + assert isinstance(key, str) and key != "" + assert isinstance(value, str) and value != "" + + +def _create_and_populate(collection_factory: CollectionFactory) -> Collection[Any, Any]: + collection = collection_factory( + properties=[Property(name="text", data_type=DataType.TEXT)], + ) + if collection._connection._weaviate_version.is_lower_than(1, 36, 9): + pytest.skip("Query profiling requires Weaviate >= 1.36.9") + collection.data.insert_many( + [ + DataObject(properties={"text": "hello world"}, vector=[1.0, 0.0, 0.0]), + DataObject(properties={"text": "goodbye world"}, vector=[0.0, 1.0, 0.0]), + DataObject(properties={"text": "foo bar baz"}, vector=[0.0, 0.0, 1.0]), + ] + ) + return collection + + +def test_fetch_objects_with_query_profile(collection_factory: CollectionFactory) -> None: + """Test that query profiling works with fetch_objects (object lookup).""" + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( + return_metadata=MetadataQuery(query_profile=True), + ) + assert len(result.objects) == 3 + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert shard.name != "" + assert shard.node != "" + + assert "object" in shard.searches + assert "vector" not in shard.searches + assert "keyword" not in shard.searches + assert_common_profile(shard.searches["object"]) + + +def test_near_vector_with_query_profile(collection_factory: CollectionFactory) -> None: + """Test that query profiling works with near_vector search.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True, distance=True), + limit=2, + ) + assert len(result.objects) == 2 + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + assert "keyword" not in shard.searches + assert "object" not in shard.searches + vector_profile = shard.searches["vector"] + assert_common_profile(vector_profile) + + assert "vector_search_took" in vector_profile.details + assert_go_duration(vector_profile.details["vector_search_took"], "vector_search_took") + + assert "hnsw_flat_search" in vector_profile.details + assert vector_profile.details["hnsw_flat_search"] in ("true", "false") + + layer_keys = [k for k in vector_profile.details if k.startswith("knn_search_layer_")] + assert len(layer_keys) > 0, "Expected at least one knn_search_layer_*_took key" + for k in layer_keys: + assert_go_duration(vector_profile.details[k], k) + + assert "objects_took" in vector_profile.details + assert_go_duration(vector_profile.details["objects_took"], "objects_took") + + +def test_bm25_with_query_profile(collection_factory: CollectionFactory) -> None: + """Test that query profiling works with BM25 keyword search.""" + collection = _create_and_populate(collection_factory) + result = collection.query.bm25( + query="hello", + return_metadata=MetadataQuery(query_profile=True, score=True), + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "keyword" in shard.searches + assert "vector" not in shard.searches + assert "object" not in shard.searches + keyword_profile = shard.searches["keyword"] + assert_common_profile(keyword_profile) + + assert "kwd_method" in keyword_profile.details + assert keyword_profile.details["kwd_method"] != "" + + assert "kwd_time" in keyword_profile.details + assert_go_duration(keyword_profile.details["kwd_time"], "kwd_time") + + assert "kwd_1_tok_time" in keyword_profile.details + assert_go_duration(keyword_profile.details["kwd_1_tok_time"], "kwd_1_tok_time") + + assert "kwd_6_res_count" in keyword_profile.details + assert keyword_profile.details["kwd_6_res_count"].isdigit() + assert int(keyword_profile.details["kwd_6_res_count"]) >= 0 + + +def test_hybrid_with_query_profile(collection_factory: CollectionFactory) -> None: + """Test that query profiling works with hybrid search (both vector and keyword).""" + collection = _create_and_populate(collection_factory) + result = collection.query.hybrid( + query="hello", + vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + limit=2, + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches, "Hybrid should produce a 'vector' profile" + assert "keyword" in shard.searches, "Hybrid should produce a 'keyword' profile" + assert "object" not in shard.searches + + assert_common_profile(shard.searches["vector"]) + assert "vector_search_took" in shard.searches["vector"].details + + assert_common_profile(shard.searches["keyword"]) + assert "kwd_method" in shard.searches["keyword"].details + + +def test_near_vector_group_by_with_query_profile( + collection_factory: CollectionFactory, +) -> None: + """Test that query profiling works with group_by.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + group_by=GroupBy(prop="text", objects_per_group=1, number_of_groups=3), + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + assert_common_profile(shard.searches["vector"]) + + +def test_full_with_profile(collection_factory: CollectionFactory) -> None: + """Test that MetadataQuery.full_with_profile() returns profiling and all other metadata.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery.full_with_profile(), + limit=1, + ) + assert len(result.objects) == 1 + obj = result.objects[0] + assert obj.metadata.distance is not None + assert obj.metadata.creation_time is not None + assert obj.metadata.last_update_time is not None + assert obj.metadata.score is not None + assert obj.metadata.explain_score is not None + + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + assert_common_profile(result.query_profile.shards[0].searches["vector"]) + + +def test_full_excludes_query_profile(collection_factory: CollectionFactory) -> None: + """Test that MetadataQuery.full() does not include query profiling.""" + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( + return_metadata=MetadataQuery.full(), + ) + assert result.query_profile is None + + +def test_no_query_profile_when_not_requested( + collection_factory: CollectionFactory, +) -> None: + """Test that query_profile is None when not requested.""" + collection = _create_and_populate(collection_factory) + result = collection.query.fetch_objects( + return_metadata=MetadataQuery(distance=True), + ) + assert result.query_profile is None + + +def test_query_profile_with_metadata_list( + collection_factory: CollectionFactory, +) -> None: + """Test that query profiling works when using list-style metadata.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=["query_profile", "distance"], + limit=2, + ) + assert result.query_profile is not None + assert len(result.query_profile.shards) > 0 + + shard = result.query_profile.shards[0] + assert "vector" in shard.searches + assert_common_profile(shard.searches["vector"]) + + +def test_query_profile_details_are_strings( + collection_factory: CollectionFactory, +) -> None: + """Test that all detail keys and values are non-empty strings.""" + collection = _create_and_populate(collection_factory) + result = collection.query.near_vector( + near_vector=[1.0, 0.0, 0.0], + return_metadata=MetadataQuery(query_profile=True), + limit=1, + ) + assert result.query_profile is not None + for shard in result.query_profile.shards: + assert len(shard.searches) > 0, "Shard should have at least one search profile" + for search_type, profile in shard.searches.items(): + assert isinstance(search_type, str) and search_type != "" + assert len(profile.details) > 0 + for key, value in profile.details.items(): + assert isinstance(key, str) and key != "" + assert isinstance(value, str) and value != "" diff --git a/integration/test_export.py b/integration/test_export.py new file mode 100644 index 000000000..71e3a146f --- /dev/null +++ b/integration/test_export.py @@ -0,0 +1,222 @@ +import time +import uuid +from typing import Generator, List, Union + +import pytest +from _pytest.fixtures import SubRequest + +import weaviate +from weaviate.collections.classes.config import DataType, Property +from weaviate.exceptions import UnexpectedStatusCodeException +from weaviate.export.export import ( + ExportFileFormat, + ExportStatus, + ExportStorage, +) + +from .conftest import _sanitize_collection_name + +pytestmark = pytest.mark.xdist_group(name="export") + +BACKEND = ExportStorage.FILESYSTEM + +COLLECTION_NAME = "ExportTestCollection" + +OBJECT_PROPS = [{"title": f"object {i}", "count": i} for i in range(5)] + +OBJECT_IDS = [ + "fd34ccf4-1a2a-47ad-8446-231839366c3f", + "2653442b-05d8-4fa3-b46a-d4a152eb63bc", + "55374edb-17de-487f-86cb-9a9fbc30823f", + "124ff6aa-597f-44d0-8c13-62fbb1e66888", + "f787386e-7d1c-481f-b8c3-3dbfd8bbad85", +] + + +@pytest.fixture(scope="module") +def client() -> Generator[weaviate.WeaviateClient, None, None]: + client = weaviate.connect_to_local() + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + client.close() + pytest.skip("Collection export is not supported in versions lower than 1.37.0") + client.collections.delete(COLLECTION_NAME) + + col = client.collections.create( + name=COLLECTION_NAME, + properties=[ + Property(name="title", data_type=DataType.TEXT), + Property(name="count", data_type=DataType.INT), + ], + ) + for i, props in enumerate(OBJECT_PROPS): + col.data.insert(properties=props, uuid=OBJECT_IDS[i]) + + yield client + client.collections.delete(COLLECTION_NAME) + client.close() + + +def unique_export_id(name: str) -> str: + """Generate a unique export ID based on the test name.""" + name = _sanitize_collection_name(name) + random_part = str(uuid.uuid4()).replace("-", "")[:12] + return name + random_part + + +def test_create_export_with_waiting(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Create an export with wait_for_completion=True.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + assert COLLECTION_NAME in resp.collections + + +def test_create_export_without_waiting( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Create an export without waiting, then poll status.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + ) + assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] + + # poll until done + while True: + status = client.export.get_status(export_id=export_id, backend=BACKEND) + assert status.status in [ + ExportStatus.STARTED, + ExportStatus.TRANSFERRING, + ExportStatus.SUCCESS, + ] + if status.status == ExportStatus.SUCCESS: + break + time.sleep(0.1) + + assert status.export_id.lower() == export_id.lower() + + +def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Check status of a completed export.""" + export_id = unique_export_id(request.node.name) + + client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + + status = client.export.get_status(export_id=export_id, backend=BACKEND) + assert status.status == ExportStatus.SUCCESS + assert status.export_id.lower() == export_id.lower() + assert status.backend == BACKEND.value + + +def test_create_export_with_parquet_format( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Create an export explicitly specifying parquet format.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + + +@pytest.mark.parametrize("include", [[COLLECTION_NAME], COLLECTION_NAME]) +def test_create_export_include_as_str_and_list( + client: weaviate.WeaviateClient, include: Union[str, List[str]], request: SubRequest +) -> None: + """Verify include_collections accepts both str and list.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=include, + wait_for_completion=True, + ) + assert resp.status == ExportStatus.SUCCESS + assert COLLECTION_NAME in resp.collections + + +def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> None: + """Cancel a running export.""" + export_id = unique_export_id(request.node.name) + + resp = client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=[COLLECTION_NAME], + ) + assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS] + + client.export.cancel(export_id=export_id, backend=BACKEND) + + # verify it's cancelled or already completed (race condition) + start = time.time() + while time.time() - start < 5: + status = client.export.get_status(export_id=export_id, backend=BACKEND) + if status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS]: + break + time.sleep(0.1) + assert status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS] + + +def test_fail_on_non_existing_collection( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail export on non-existing collection.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(UnexpectedStatusCodeException): + client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=["NonExistingCollection"], + wait_for_completion=True, + ) + + +def test_fail_on_both_include_and_exclude( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail when both include and exclude collections are set.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(ValueError): + client.export.create( + export_id=export_id, + backend=BACKEND, + file_format=ExportFileFormat.PARQUET, + include_collections=COLLECTION_NAME, + exclude_collections="SomeOther", + ) + + +def test_fail_status_for_non_existing_export( + client: weaviate.WeaviateClient, request: SubRequest +) -> None: + """Fail checking status for non-existing export.""" + export_id = unique_export_id(request.node.name) + with pytest.raises(UnexpectedStatusCodeException): + client.export.get_status(export_id=export_id, backend=BACKEND) diff --git a/integration/test_rbac.py b/integration/test_rbac.py index d98d238a7..0f8657a2d 100644 --- a/integration/test_rbac.py +++ b/integration/test_rbac.py @@ -14,6 +14,7 @@ CollectionsPermissionOutput, DataPermissionOutput, GroupsPermissionOutput, + MCPPermissionOutput, NodesPermissionOutput, Role, ReplicatePermissionOutput, @@ -44,6 +45,7 @@ backups_permissions=[ BackupsPermissionOutput(collection="Test", actions={Actions.Backups.MANAGE}) ], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -62,6 +64,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -84,6 +87,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -104,6 +108,7 @@ DataPermissionOutput(collection="*", tenant="*", actions={Actions.Data.CREATE}) ], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -137,6 +142,7 @@ ), ], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -155,6 +161,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[ NodesPermissionOutput( verbosity="verbose", actions={Actions.Nodes.READ}, collection="Test" @@ -177,6 +184,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[ NodesPermissionOutput( verbosity="minimal", actions={Actions.Nodes.READ}, collection="*" @@ -203,6 +211,7 @@ ], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -221,6 +230,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[ TenantsPermissionOutput( @@ -247,6 +257,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[ TenantsPermissionOutput( @@ -290,6 +301,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -310,6 +322,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[ @@ -355,6 +368,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -379,6 +393,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -403,6 +418,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], @@ -410,6 +426,48 @@ ), 32, # Minimum version for alias permissions ), + ( + Permissions.mcp(create=True, read=True, update=True), + Role( + name="MCPAll", + alias_permissions=[], + cluster_permissions=[], + users_permissions=[], + collections_permissions=[], + roles_permissions=[], + data_permissions=[], + backups_permissions=[], + mcp_permissions=[ + MCPPermissionOutput( + actions={Actions.MCP.CREATE, Actions.MCP.READ, Actions.MCP.UPDATE} + ) + ], + nodes_permissions=[], + tenants_permissions=[], + replicate_permissions=[], + groups_permissions=[], + ), + 37, # Minimum version for MCP permissions + ), + ( + Permissions.mcp(read=True), + Role( + name="MCPRead", + alias_permissions=[], + cluster_permissions=[], + users_permissions=[], + collections_permissions=[], + roles_permissions=[], + data_permissions=[], + backups_permissions=[], + mcp_permissions=[MCPPermissionOutput(actions={Actions.MCP.READ})], + nodes_permissions=[], + tenants_permissions=[], + replicate_permissions=[], + groups_permissions=[], + ), + 37, # Minimum version for MCP permissions + ), ( Permissions.Groups.oidc(group="MyGroup", read=True), Role( @@ -421,6 +479,7 @@ roles_permissions=[], data_permissions=[], backups_permissions=[], + mcp_permissions=[], nodes_permissions=[], tenants_permissions=[], replicate_permissions=[], diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py new file mode 100644 index 000000000..d2d46916d --- /dev/null +++ b/integration/test_tokenize.py @@ -0,0 +1,512 @@ +"""Integration tests for the tokenization module. + +These tests cover the client's responsibilities: +- Correct serialization of inputs (enums, TextAnalyzerConfigCreate, StopwordsCreate) +- Correct deserialization of responses into the TokenizeResult object +- Client-side validation (TextAnalyzerConfigCreate, stopwords/stopword_presets mutex) +- Version gate (>= 1.37.0) +- Both sync and async client paths + +Server-side behavior this client relies on: +- Word tokenization defaults to preset "en" when no stopword config is sent. +- Both endpoints return only ``indexed`` and ``query``. +- ``stopwords`` and ``stopword_presets`` are mutually exclusive on the generic + endpoint — the server rejects requests that set both. +""" + +from typing import AsyncGenerator, Generator + +import pytest +import pytest_asyncio + +import weaviate +from weaviate.classes.tokenization import ( + StopwordsCreate, + StopwordsPreset, + TextAnalyzerConfigCreate, + Tokenization, + TokenizeResult, +) +from weaviate.config import AdditionalConfig +from weaviate.exceptions import WeaviateUnsupportedFeatureError + + +@pytest.fixture(scope="module") +def client() -> Generator[weaviate.WeaviateClient, None, None]: + c = weaviate.connect_to_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + yield c + c.close() + + +@pytest.fixture(autouse=False) +def require_1_37(client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + + +@pytest_asyncio.fixture +async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]: + c = weaviate.use_async_with_local( + additional_config=AdditionalConfig(timeout=(60, 120)), + ) + await c.connect() + yield c + await c.close() + + +@pytest.fixture +def recipe_collection(client: weaviate.WeaviateClient) -> Generator: + """Collection with a `recipe` word-tokenized property and an en + ["quick"] stopwords config.""" + name = "TestTokenizeRecipe" + client.collections.delete(name) + client.collections.create_from_dict( + { + "class": name, + "vectorizer": "none", + "invertedIndexConfig": { + "stopwords": {"preset": "en", "additions": ["quick"]}, + }, + "properties": [ + {"name": "recipe", "dataType": ["text"], "tokenization": "word"}, + ], + } + ) + try: + yield client.collections.get(name) + finally: + client.collections.delete(name) + + +# --------------------------------------------------------------------------- +# Serialization +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestSerialization: + """Verify the client correctly serializes different input forms.""" + + @pytest.mark.parametrize( + "tokenization,text,expected_indexed,expected_query", + [ + # "the" is an English stopword — filtered from the query output + # by the server's default "en" preset for word tokenization. + ( + Tokenization.WORD, + "The quick brown fox", + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + # Non-word tokenizations do not apply the default "en" preset. + ( + Tokenization.LOWERCASE, + "Hello World Test", + ["hello", "world", "test"], + ["hello", "world", "test"], + ), + ( + Tokenization.WHITESPACE, + "Hello World Test", + ["Hello", "World", "Test"], + ["Hello", "World", "Test"], + ), + (Tokenization.FIELD, " Hello World ", ["Hello World"], ["Hello World"]), + (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"], ["hel", "ell", "llo"]), + ], + ) + def test_tokenization_enum( + self, + client: weaviate.WeaviateClient, + tokenization: Tokenization, + text: str, + expected_indexed: list, + expected_query: list, + ) -> None: + result = client.tokenization.text(text=text, tokenization=tokenization) + assert isinstance(result, TokenizeResult) + assert result.indexed == expected_indexed + assert result.query == expected_query + + @pytest.mark.parametrize( + "call_kwargs,expected_indexed,expected_query", + [ + ( + {"text": "The quick brown fox"}, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": TextAnalyzerConfigCreate( + stopword_preset=StopwordsPreset.NONE + ), + }, + ["the", "quick", "brown", "fox"], + ["the", "quick", "brown", "fox"], + ), + ( + { + "text": "L'école est fermée", + "analyzer_config": TextAnalyzerConfigCreate(ascii_fold=True), + }, + ["l", "ecole", "est", "fermee"], + ["l", "ecole", "est", "fermee"], + ), + ( + { + "text": "L'école est fermée", + "analyzer_config": TextAnalyzerConfigCreate( + ascii_fold=True, ascii_fold_ignore=["é"] + ), + }, + ["l", "école", "est", "fermée"], + ["l", "école", "est", "fermée"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN), + }, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The quick brown fox", + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="en"), + }, + ["the", "quick", "brown", "fox"], + ["quick", "brown", "fox"], + ), + ( + { + "text": "The école est fermée", + "analyzer_config": TextAnalyzerConfigCreate( + ascii_fold=True, + ascii_fold_ignore=["é"], + stopword_preset=StopwordsPreset.EN, + ), + }, + ["the", "école", "est", "fermée"], + ["école", "est", "fermée"], + ), + ( + { + "text": "the quick brown fox", + "stopwords": StopwordsCreate( + preset=StopwordsPreset.EN, additions=["quick"], removals=None + ), + }, + ["the", "quick", "brown", "fox"], + ["brown", "fox"], + ), + ( + { + "text": "the quick hello world", + "stopwords": StopwordsCreate(preset=None, additions=["hello"], removals=None), + }, + ["the", "quick", "hello", "world"], + ["quick", "world"], + ), + ( + { + "text": "the quick is fast", + "stopwords": StopwordsCreate(preset=None, additions=None, removals=["the"]), + }, + ["the", "quick", "is", "fast"], + ["the", "quick", "fast"], + ), + ( + { + "text": "hello world test", + "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="custom"), + "stopword_presets": {"custom": ["test"]}, + }, + ["hello", "world", "test"], + ["hello", "world"], + ), + ( + { + "text": "the quick hello world", + "stopword_presets": {"en": ["hello"]}, + }, + ["the", "quick", "hello", "world"], + ["the", "quick", "world"], + ), + ], + ids=[ + "default_en_applied_for_word", + "opt_out_of_default_en", + "ascii_fold", + "ascii_fold_with_ignore", + "stopword_preset_enum", + "stopword_preset_string", + "ascii_fold_combined_with_stopwords", + "stopwords_fallback", + "stopwords_additions_default_preset_to_en", + "stopwords_removals_default_preset_to_en", + "stopword_presets_named_reference", + "stopword_presets_override_builtin_en", + ], + ) + def test_text_tokenize( + self, + client: weaviate.WeaviateClient, + call_kwargs: dict, + expected_indexed: list, + expected_query: list, + ) -> None: + result = client.tokenization.text(tokenization=Tokenization.WORD, **call_kwargs) + assert isinstance(result, TokenizeResult) + assert result.indexed == expected_indexed + assert result.query == expected_query + + def test_text_from_collection_config( + self, client: weaviate.WeaviateClient, recipe_collection + ) -> None: + """Values round-tripped through config.get() feed back into tokenization.text().""" + config = recipe_collection.config.get() + recipe = next(p for p in config.properties if p.name == "recipe") + stopwords = config.inverted_index_config.stopwords + result = client.tokenization.text( + text="the quick brown fox", + tokenization=recipe.tokenization, + stopwords=stopwords, + ) + assert result.indexed == ["the", "quick", "brown", "fox"] + assert result.query == ["brown", "fox"] + + def test_property_and_generic_endpoints_agree( + self, client: weaviate.WeaviateClient, recipe_collection + ) -> None: + """Property endpoint (server resolves config from schema) produces the same indexed/query as the generic endpoint fed the same config.""" + config = recipe_collection.config.get() + recipe = next(p for p in config.properties if p.name == "recipe") + stopwords = config.inverted_index_config.stopwords + + text = "the quick brown fox" + via_property = client.tokenization.for_property( + collection=recipe_collection.name, property_name="recipe", text=text + ) + via_generic = client.tokenization.text( + text=text, + tokenization=recipe.tokenization, + stopwords=stopwords, + ) + + assert via_property.indexed == via_generic.indexed + assert via_property.query == via_generic.query + + +# --------------------------------------------------------------------------- +# Deserialization +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestDeserialization: + """Verify the client correctly deserializes response fields into TokenizeResult.""" + + def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None: + """Property endpoint response deserializes into TokenizeResult — server resolves tokenization from the property's schema.""" + client.collections.delete("TestDeserPropTypes") + try: + client.collections.create_from_dict( + { + "class": "TestDeserPropTypes", + "vectorizer": "none", + "properties": [ + { + "name": "tag", + "dataType": ["text"], + "tokenization": "field", + }, + ], + } + ) + result = client.tokenization.for_property( + collection="TestDeserPropTypes", property_name="tag", text=" Hello World " + ) + assert isinstance(result, TokenizeResult) + assert result.indexed == ["Hello World"] + finally: + client.collections.delete("TestDeserPropTypes") + + +# --------------------------------------------------------------------------- +# Client-side validation +# --------------------------------------------------------------------------- + + +class TestClientSideValidation: + """Verify that client-side validation rejects invalid input before hitting the server.""" + + @pytest.mark.parametrize( + "kwargs", + [ + {"ascii_fold": False, "ascii_fold_ignore": ["é"]}, + {"ascii_fold_ignore": ["é"]}, + ], + ids=["explicit_false", "default"], + ) + def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None: + with pytest.raises(ValueError, match="asciiFoldIgnore"): + TextAnalyzerConfigCreate(**kwargs) + + @pytest.mark.parametrize( + "kwargs,expected", + [ + ( + {"ascii_fold": True, "ascii_fold_ignore": ["é", "ñ"]}, + {"asciiFold": True, "asciiFoldIgnore": ["é", "ñ"]}, + ), + ( + {"ascii_fold": True}, + {"asciiFold": True, "asciiFoldIgnore": None}, + ), + ( + {"stopword_preset": "en"}, + {"stopwordPreset": "en"}, + ), + ( + {}, + {"asciiFold": None, "asciiFoldIgnore": None, "stopwordPreset": None}, + ), + ], + ids=["fold_with_ignore", "fold_without_ignore", "stopword_preset_only", "empty"], + ) + def test_valid_config(self, kwargs: dict, expected: dict) -> None: + cfg = TextAnalyzerConfigCreate(**kwargs) + for attr, value in expected.items(): + assert getattr(cfg, attr) == value + + def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateClient) -> None: + """Client rejects the mutex violation locally with ValueError, before sending the request (which the server would also reject with 422).""" + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + with pytest.raises(ValueError, match="mutually exclusive"): + client.tokenization.text( + text="hello", + tokenization=Tokenization.WORD, + stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=None, removals=None), + stopword_presets={"custom": ["hello"]}, + ) + + @pytest.mark.parametrize( + "stopword_presets,match", + [ + ({"custom": "hello"}, "must be a list of strings"), + ( + { + "custom": StopwordsCreate( + preset=StopwordsPreset.EN, additions=None, removals=None + ), + }, + "must be a list of strings", + ), + ({"custom": ["hello", 123]}, "must contain only strings"), + ], + ids=["str_value", "pydantic_model_value", "non_string_element"], + ) + def test_stopword_presets_invalid_shape_raises( + self, + client: weaviate.WeaviateClient, + stopword_presets: dict, + match: str, + ) -> None: + """Client rejects malformed stopword_presets values locally before sending — str would silently split into characters; a pydantic model would serialize to field tuples.""" + if client._connection._weaviate_version.is_lower_than(1, 37, 0): + pytest.skip("Tokenization requires Weaviate >= 1.37.0") + with pytest.raises(ValueError, match=match): + client.tokenization.text( + text="hello", + tokenization=Tokenization.WORD, + stopword_presets=stopword_presets, + ) + + +# --------------------------------------------------------------------------- +# Version gate +# --------------------------------------------------------------------------- + + +class TestVersionGate: + """On Weaviate < 1.37 the client must raise before sending the request.""" + + def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + with pytest.raises(WeaviateUnsupportedFeatureError): + client.tokenization.text(text="hello", tokenization=Tokenization.WORD) + + def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None: + if client._connection._weaviate_version.is_at_least(1, 37, 0): + pytest.skip("Version gate only applies to Weaviate < 1.37.0") + with pytest.raises(WeaviateUnsupportedFeatureError): + client.tokenization.for_property(collection="Any", property_name="title", text="hello") + + +# --------------------------------------------------------------------------- +# Async client +# --------------------------------------------------------------------------- + + +@pytest.mark.usefixtures("require_1_37") +class TestAsyncClient: + """Verify tokenization.text() and tokenization.for_property() work through the async client.""" + + @pytest.mark.asyncio + async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + result = await async_client.tokenization.text( + text="The quick brown fox", + tokenization=Tokenization.WORD, + ) + assert isinstance(result, TokenizeResult) + assert result.indexed == ["the", "quick", "brown", "fox"] + # default "en" applied server-side. + assert result.query == ["quick", "brown", "fox"] + + @pytest.mark.asyncio + async def test_text_with_stopwords_fallback( + self, async_client: weaviate.WeaviateAsyncClient + ) -> None: + sw = StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None) + result = await async_client.tokenization.text( + text="the quick brown fox", + tokenization=Tokenization.WORD, + stopwords=sw, + ) + assert result.indexed == ["the", "quick", "brown", "fox"] + assert result.query == ["brown", "fox"] + + @pytest.mark.asyncio + async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None: + await async_client.collections.delete("TestAsyncPropTokenize") + try: + await async_client.collections.create_from_dict( + { + "class": "TestAsyncPropTokenize", + "vectorizer": "none", + "properties": [ + { + "name": "title", + "dataType": ["text"], + "tokenization": "word", + "textAnalyzer": {"stopwordPreset": "en"}, + }, + ], + } + ) + result = await async_client.tokenization.for_property( + collection="TestAsyncPropTokenize", + property_name="title", + text="The quick brown fox", + ) + assert isinstance(result, TokenizeResult) + assert result.indexed == ["the", "quick", "brown", "fox"] + assert result.query == ["quick", "brown", "fox"] + finally: + await async_client.collections.delete("TestAsyncPropTokenize") diff --git a/pytest.ini b/pytest.ini index f87230abf..71337d9e9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,4 @@ addopts = -m 'not profiling' --benchmark-skip -l --capture=sys --max-worker-rest markers = profiling: marks tests that can be profiled timeout: marks tests with a custom timeout in seconds (default: 300) -asyncio_default_fixture_loop_scope = function \ No newline at end of file +asyncio_default_fixture_loop_scope = function diff --git a/requirements-test.txt b/requirements-test.txt index c267bab24..cfa1996c5 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -3,6 +3,7 @@ pytest-cov==6.2.1 pytest-asyncio==1.3.0 pytest-benchmark==5.1.0 pytest-profiling==1.8.1 +pytest-timeout==2.4.0 coverage==7.10.7 pytest-xdist==3.7.0 werkzeug==3.1.6 diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 523ddc980..84bba4a63 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -4,18 +4,21 @@ from pydantic import ValidationError from weaviate.collections.classes.config import ( - _AsyncReplicationConfig, - _ReplicationConfig, - _ReplicationConfigUpdate, Configure, DataType, Property, Reconfigure, ReferenceProperty, + StopwordsPreset, + Tokenization, Vectorizers, + _AsyncReplicationConfig, _CollectionConfigCreate, _GenerativeProvider, + _ReplicationConfig, + _ReplicationConfigUpdate, _RerankerProvider, + _TextAnalyzerConfigCreate, _VectorizerConfigCreate, _ReplicationConfigCreate, ReplicationDeletionStrategy, @@ -1337,6 +1340,10 @@ def test_config_create_with_properties( name="blob", data_type=DataType.BLOB, ), + Property( + name="blob_hash", + data_type=DataType.BLOB_HASH, + ), Property( name="phone_number", data_type=DataType.PHONE_NUMBER, @@ -1400,6 +1407,10 @@ def test_config_create_with_properties( "dataType": ["blob"], "name": "blob", }, + { + "dataType": ["blobHash"], + "name": "blob_hash", + }, { "dataType": ["phoneNumber"], "name": "phone_number", @@ -3021,3 +3032,156 @@ def test_nested_property_with_id_name_is_allowed() -> None: ], ) assert prop.nestedProperties[0].name == "id" + + +class Test_TextAnalyzerConfigCreate: + def test_property_without_text_analyzer_omits_key(self) -> None: + prop = Property(name="title", data_type=DataType.TEXT) + assert "textAnalyzer" not in prop._to_dict() + + def test_property_with_ascii_fold_only(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=Configure.text_analyzer(ascii_fold=True), + ) + assert prop._to_dict()["textAnalyzer"] == {"asciiFold": True} + + def test_property_with_ascii_fold_and_ignore(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["é", "ñ"]), + ) + out = prop._to_dict() + assert out["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é", "ñ"], + } + assert out["tokenization"] == "word" + + def test_text_analyzer_rejects_ignore_without_ascii_fold(self) -> None: + with pytest.raises(ValidationError): + _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"]) + + def test_nested_property_with_text_analyzer(self) -> None: + prop = Property( + name="meta", + data_type=DataType.OBJECT, + nested_properties=[ + Property( + name="title", + data_type=DataType.TEXT, + text_analyzer=Configure.text_analyzer(ascii_fold=True, ascii_fold_ignore=["ñ"]), + ), + ], + ) + out = prop._to_dict() + assert out["nestedProperties"][0]["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["ñ"], + } + + def test_text_analyzer_rejects_wrong_types(self) -> None: + with pytest.raises(ValidationError): + _TextAnalyzerConfigCreate(ascii_fold="yes") # type: ignore[arg-type] + with pytest.raises(ValidationError): + _TextAnalyzerConfigCreate(ascii_fold_ignore="é") + + def test_text_analyzer_stopword_preset_builtin_enum(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset=StopwordsPreset.EN), + ) + assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "en"} + + def test_text_analyzer_stopword_preset_user_defined_string(self) -> None: + prop = Property( + name="title_fr", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ) + assert prop._to_dict()["textAnalyzer"] == {"stopwordPreset": "fr"} + + def test_text_analyzer_combined_ascii_fold_and_stopword_preset(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer( + ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset="fr" + ), + ) + assert prop._to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é"], + "stopwordPreset": "fr", + } + + def test_text_analyzer_stopword_preset_only_omits_other_keys(self) -> None: + prop = Property( + name="title", + data_type=DataType.TEXT, + tokenization=Tokenization.WORD, + text_analyzer=Configure.text_analyzer(stopword_preset="fr"), + ) + out = prop._to_dict() + assert "asciiFold" not in out["textAnalyzer"] + assert "asciiFoldIgnore" not in out["textAnalyzer"] + + +class TestInvertedIndexStopwordPresets: + def test_configure_inverted_index_with_stopword_presets(self) -> None: + ic = Configure.inverted_index( + stopword_presets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ) + out = ic._to_dict() + assert out["stopwordPresets"] == { + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + } + + def test_configure_inverted_index_without_stopword_presets_omits_key(self) -> None: + ic = Configure.inverted_index() + assert "stopwordPresets" not in ic._to_dict() + + def test_reconfigure_inverted_index_merges_stopword_presets(self) -> None: + rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le", "la"]}) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + } + merged = rc.merge_with_existing(existing) + assert merged["stopwordPresets"] == {"fr": ["le", "la"]} + # other fields untouched + assert merged["stopwords"]["preset"] == "en" + assert merged["bm25"]["b"] == 0.75 + + def test_reconfigure_inverted_index_replaces_existing_stopword_presets(self) -> None: + rc = Reconfigure.inverted_index(stopword_presets={"fr": ["le"]}) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "stopwordPresets": {"fr": ["le", "la", "les"], "es": ["el"]}, + } + merged = rc.merge_with_existing(existing) + # The new value fully replaces the prior dict (this matches the server-side + # PUT semantics — see test_tokenize.py::test_remove_unused_preset_is_allowed). + assert merged["stopwordPresets"] == {"fr": ["le"]} + + def test_reconfigure_inverted_index_without_stopword_presets_leaves_existing(self) -> None: + rc = Reconfigure.inverted_index(bm25_b=0.7, bm25_k1=1.1) + existing = { + "stopwords": {"preset": "en", "additions": None, "removals": None}, + "bm25": {"b": 0.75, "k1": 1.2}, + "stopwordPresets": {"fr": ["le", "la"]}, + } + merged = rc.merge_with_existing(existing) + assert merged["stopwordPresets"] == {"fr": ["le", "la"]} diff --git a/test/collection/test_config_methods.py b/test/collection/test_config_methods.py index fbc33b702..2e40acacc 100644 --- a/test/collection/test_config_methods.py +++ b/test/collection/test_config_methods.py @@ -1,4 +1,9 @@ -from weaviate.collections.classes.config_methods import _collection_configs_simple_from_json +from weaviate.collections.classes.config_methods import ( + _collection_config_from_json, + _collection_configs_simple_from_json, + _nested_properties_from_config, + _properties_from_config, +) def test_collection_config_simple_from_json_with_none_vectorizer_config() -> None: @@ -68,3 +73,175 @@ def test_collection_config_simple_from_json_with_none_vectorizer_config() -> Non assert "default" in vec_config assert vec_config["default"].vectorizer.model == {} assert vec_config["default"].vectorizer.source_properties is None + + +def _make_text_prop(name: str, **extra) -> dict: + base = { + "name": name, + "dataType": ["text"], + "indexFilterable": True, + "indexSearchable": True, + "indexRangeFilters": False, + "tokenization": "word", + } + base.update(extra) + return base + + +def test_properties_from_config_parses_text_analyzer() -> None: + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop( + "title", + textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["é"]}, + ), + _make_text_prop("body"), + ], + } + props = _properties_from_config(schema) + title = next(p for p in props if p.name == "title") + body = next(p for p in props if p.name == "body") + + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + + assert body.text_analyzer is None + + # The dataclass round-trips back to the wire format. + assert title.to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["é"], + } + assert "textAnalyzer" not in body.to_dict() + + +def test_properties_from_config_text_analyzer_omitted_when_no_ascii_fold() -> None: + """If the server response omits asciiFold, the client treats text_analyzer as unset.""" + schema = { + "vectorizer": "none", + "properties": [ + # Server response with textAnalyzer present but no asciiFold key + _make_text_prop("title", textAnalyzer={"asciiFoldIgnore": ["é"]}), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is None + + +def test_nested_properties_from_config_parses_text_analyzer() -> None: + nested = _nested_properties_from_config( + [ + _make_text_prop( + "title", + textAnalyzer={"asciiFold": True, "asciiFoldIgnore": ["ñ"]}, + ), + ] + ) + assert nested[0].text_analyzer is not None + assert nested[0].text_analyzer.ascii_fold is True + assert nested[0].text_analyzer.ascii_fold_ignore == ["ñ"] + assert nested[0].to_dict()["textAnalyzer"] == { + "asciiFold": True, + "asciiFoldIgnore": ["ñ"], + } + + +def test_properties_from_config_parses_stopword_preset_only() -> None: + """A property with only stopwordPreset (no asciiFold) must still produce a text_analyzer.""" + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop("title", textAnalyzer={"stopwordPreset": "fr"}), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is False + assert title.text_analyzer.ascii_fold_ignore is None + assert title.text_analyzer.stopword_preset == "fr" + + +def test_properties_from_config_parses_combined_text_analyzer() -> None: + schema = { + "vectorizer": "none", + "properties": [ + _make_text_prop( + "title", + textAnalyzer={ + "asciiFold": True, + "asciiFoldIgnore": ["é"], + "stopwordPreset": "fr", + }, + ), + ], + } + title = _properties_from_config(schema)[0] + assert title.text_analyzer is not None + assert title.text_analyzer.ascii_fold is True + assert title.text_analyzer.ascii_fold_ignore == ["é"] + assert title.text_analyzer.stopword_preset == "fr" + + +def _full_schema(class_name: str, **inverted_overrides) -> dict: + inverted = { + "bm25": {"b": 0.75, "k1": 1.2}, + "cleanupIntervalSeconds": 60, + "stopwords": {"preset": "en", "additions": None, "removals": None}, + } + inverted.update(inverted_overrides) + return { + "class": class_name, + "vectorizer": "none", + "properties": [], + "invertedIndexConfig": inverted, + "replicationConfig": {"factor": 1, "deletionStrategy": "NoAutomatedResolution"}, + "shardingConfig": { + "virtualPerPhysical": 128, + "desiredCount": 1, + "actualCount": 1, + "desiredVirtualCount": 128, + "actualVirtualCount": 128, + "key": "_id", + "strategy": "hash", + "function": "murmur3", + }, + "vectorIndexType": "hnsw", + "vectorIndexConfig": { + "skip": False, + "cleanupIntervalSeconds": 300, + "maxConnections": 64, + "efConstruction": 128, + "ef": -1, + "dynamicEfMin": 100, + "dynamicEfMax": 500, + "dynamicEfFactor": 8, + "vectorCacheMaxObjects": 1000000000000, + "flatSearchCutoff": 40000, + "distance": "cosine", + }, + } + + +def test_collection_config_parses_stopword_presets() -> None: + """The inverted index config exposes stopwordPresets when present in the schema.""" + schema = _full_schema( + "TestStopwordPresets", + stopwordPresets={ + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + }, + ) + full = _collection_config_from_json(schema) + assert full.inverted_index_config.stopword_presets == { + "fr": ["le", "la", "les"], + "es": ["el", "la", "los"], + } + + +def test_collection_config_stopword_presets_absent() -> None: + """If the server response omits stopwordPresets, the parsed value is None.""" + schema = _full_schema("TestNoStopwordPresets") + full = _collection_config_from_json(schema) + assert full.inverted_index_config.stopword_presets is None diff --git a/weaviate/__init__.py b/weaviate/__init__.py index 562b142bc..2e7e5e58b 100644 --- a/weaviate/__init__.py +++ b/weaviate/__init__.py @@ -21,6 +21,7 @@ embedded, exceptions, outputs, + tokenization, types, ) from .client import Client, WeaviateAsyncClient, WeaviateClient @@ -67,6 +68,7 @@ "embedded", "exceptions", "outputs", + "tokenization", "types", "use_async_with_custom", "use_async_with_embedded", diff --git a/weaviate/backup/executor.py b/weaviate/backup/executor.py index b50515a2f..ea29e4512 100644 --- a/weaviate/backup/executor.py +++ b/weaviate/backup/executor.py @@ -88,6 +88,16 @@ def create( wait_for_completion=wait_for_completion, ) + if ( + incremental_base_backup_id is not None + and self._connection._weaviate_version.is_lower_than(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "Incremental backups", + str(self._connection._weaviate_version), + "1.37.0", + ) + payload: dict = { "id": backup_id, "include": include_collections, diff --git a/weaviate/classes/__init__.py b/weaviate/classes/__init__.py index 467a17f37..69af5d920 100644 --- a/weaviate/classes/__init__.py +++ b/weaviate/classes/__init__.py @@ -5,6 +5,7 @@ batch, config, data, + export, generate, generics, init, @@ -12,6 +13,7 @@ rbac, replication, tenants, + tokenization, ) # noqa: F401 from .config import ConsistencyLevel @@ -22,11 +24,13 @@ "config", "ConsistencyLevel", "data", + "export", "generate", "generics", "init", "query", "tenants", + "tokenization", "rbac", "replication", ] diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py index ce1faf993..c154062d3 100644 --- a/weaviate/classes/config.py +++ b/weaviate/classes/config.py @@ -11,7 +11,10 @@ ReferenceProperty, ReplicationDeletionStrategy, Rerankers, + StopwordsCreate, StopwordsPreset, + TextAnalyzerConfig, + TextAnalyzerConfigCreate, Tokenization, VectorDistances, ) @@ -38,7 +41,10 @@ "PQEncoderType", "ReferenceProperty", "Rerankers", + "StopwordsCreate", "StopwordsPreset", + "TextAnalyzerConfig", + "TextAnalyzerConfigCreate", "Tokenization", "Vectorizers", "VectorDistances", diff --git a/weaviate/classes/export.py b/weaviate/classes/export.py new file mode 100644 index 000000000..d14fb3f07 --- /dev/null +++ b/weaviate/classes/export.py @@ -0,0 +1,9 @@ +from weaviate.export.export import ( + ExportFileFormat, + ExportStorage, +) + +__all__ = [ + "ExportFileFormat", + "ExportStorage", +] diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py new file mode 100644 index 000000000..0e89fc64b --- /dev/null +++ b/weaviate/classes/tokenization.py @@ -0,0 +1,17 @@ +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsCreate, + StopwordsPreset, + TextAnalyzerConfigCreate, + Tokenization, +) +from weaviate.tokenization.models import TokenizeResult + +__all__ = [ + "StopwordsConfig", + "StopwordsCreate", + "StopwordsPreset", + "TextAnalyzerConfigCreate", + "Tokenization", + "TokenizeResult", +] diff --git a/weaviate/client.py b/weaviate/client.py index d7f9080f4..fe5ad17fe 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -20,8 +20,10 @@ from .connect.v4 import ConnectionAsync, ConnectionSync from .debug import _Debug, _DebugAsync from .embedded import EmbeddedOptions +from .export import _Export, _ExportAsync from .groups import _Groups, _GroupsAsync from .rbac import _Roles, _RolesAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER from .users import _Users, _UsersAsync @@ -76,12 +78,14 @@ def __init__( ) self.alias = _AliasAsync(self._connection) self.backup = _BackupAsync(self._connection) + self.export = _ExportAsync(self._connection) self.batch = _BatchClientWrapperAsync(self._connection) self.cluster = _ClusterAsync(self._connection) self.collections = _CollectionsAsync(self._connection) self.debug = _DebugAsync(self._connection) self.groups = _GroupsAsync(self._connection) self.roles = _RolesAsync(self._connection) + self.tokenization = _TokenizationAsync(self._connection) self.users = _UsersAsync(self._connection) async def __aenter__(self) -> "WeaviateAsyncClient": @@ -152,11 +156,13 @@ def __init__( consistency_level=None, ) self.backup = _Backup(self._connection) + self.export = _Export(self._connection) self.cluster = _Cluster(self._connection) self.collections = collections self.debug = _Debug(self._connection) self.groups = _Groups(self._connection) self.roles = _Roles(self._connection) + self.tokenization = _Tokenization(self._connection) self.users = _Users(self._connection) def __enter__(self) -> "WeaviateClient": diff --git a/weaviate/client.pyi b/weaviate/client.pyi index 9b32af15f..d7b99eba6 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -20,7 +20,9 @@ from .backup import _Backup, _BackupAsync from .cluster import _Cluster, _ClusterAsync from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync from .debug import _Debug, _DebugAsync +from .export import _Export, _ExportAsync from .rbac import _Roles, _RolesAsync +from .tokenization import _Tokenization, _TokenizationAsync from .types import NUMBER TIMEOUT_TYPE = Union[Tuple[NUMBER, NUMBER], NUMBER] @@ -29,12 +31,14 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]): _connection: ConnectionAsync alias: _AliasAsync backup: _BackupAsync + export: _ExportAsync batch: _BatchClientWrapperAsync collections: _CollectionsAsync cluster: _ClusterAsync debug: _DebugAsync groups: _GroupsAsync roles: _RolesAsync + tokenization: _TokenizationAsync users: _UsersAsync async def close(self) -> None: ... @@ -52,12 +56,14 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]): _connection: ConnectionSync alias: _Alias backup: _Backup + export: _Export batch: _BatchClientWrapper collections: _Collections cluster: _Cluster debug: _Debug groups: _Groups roles: _Roles + tokenization: _Tokenization users: _Users def close(self) -> None: ... diff --git a/weaviate/collections/aggregations/hybrid/async_.pyi b/weaviate/collections/aggregations/hybrid/async_.pyi index 0bf5a2aca..335dd53d0 100644 --- a/weaviate/collections/aggregations/hybrid/async_.pyi +++ b/weaviate/collections/aggregations/hybrid/async_.pyi @@ -19,7 +19,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -36,7 +36,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -53,7 +53,7 @@ class _HybridAsync(_HybridExecutor[ConnectionAsync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, diff --git a/weaviate/collections/aggregations/hybrid/executor.py b/weaviate/collections/aggregations/hybrid/executor.py index 0bcd3a289..4c3f882e6 100644 --- a/weaviate/collections/aggregations/hybrid/executor.py +++ b/weaviate/collections/aggregations/hybrid/executor.py @@ -22,7 +22,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -40,7 +40,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -58,7 +58,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -75,7 +75,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, diff --git a/weaviate/collections/aggregations/hybrid/sync.pyi b/weaviate/collections/aggregations/hybrid/sync.pyi index 336e5b8d4..ff9374b7f 100644 --- a/weaviate/collections/aggregations/hybrid/sync.pyi +++ b/weaviate/collections/aggregations/hybrid/sync.pyi @@ -19,7 +19,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -36,7 +36,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, @@ -53,7 +53,7 @@ class _Hybrid(_HybridExecutor[ConnectionSync]): self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[List[float]] = None, query_properties: Optional[List[str]] = None, object_limit: Optional[int] = None, diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 45cbfd0e5..43d86375d 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -1,5 +1,6 @@ import datetime from dataclasses import dataclass +from dataclasses import fields as _dataclass_fields from typing import ( Any, ClassVar, @@ -14,7 +15,14 @@ cast, ) -from pydantic import AnyHttpUrl, Field, TypeAdapter, ValidationInfo, field_validator +from pydantic import ( + AnyHttpUrl, + Field, + TypeAdapter, + ValidationInfo, + field_validator, + model_validator, +) from typing_extensions import TypeAlias from typing_extensions import deprecated as typing_deprecated @@ -141,6 +149,7 @@ class DataType(str, BaseEnum): UUID_ARRAY: UUID array data type. GEO_COORDINATES: Geo coordinates data type. BLOB: Blob data type. + BLOB_HASH: Blob hash data type. PHONE_NUMBER: Phone number data type. OBJECT: Object data type. OBJECT_ARRAY: Object array data type. @@ -160,6 +169,7 @@ class DataType(str, BaseEnum): UUID_ARRAY = "uuid[]" GEO_COORDINATES = "geoCoordinates" BLOB = "blob" + BLOB_HASH = "blobHash" PHONE_NUMBER = "phoneNumber" OBJECT = "object" OBJECT_ARRAY = "object[]" @@ -379,12 +389,14 @@ class _InvertedIndexConfigCreate(_ConfigCreateModel): indexPropertyLength: Optional[bool] indexNullState: Optional[bool] stopwords: _StopwordsCreate + stopwordPresets: Optional[Dict[str, List[str]]] = None class _InvertedIndexConfigUpdate(_ConfigUpdateModel): bm25: Optional[_BM25ConfigUpdate] cleanupIntervalSeconds: Optional[int] stopwords: Optional[_StopwordsUpdate] + stopwordPresets: Optional[Dict[str, List[str]]] = None class _MultiTenancyConfigCreate(_ConfigCreateModel): @@ -1636,6 +1648,26 @@ class _StopwordsConfig(_ConfigBase): StopwordsConfig = _StopwordsConfig +StopwordsCreate = _StopwordsCreate + +# Invariant: the read-side dataclass (_StopwordsConfig) and the write-side +# pydantic model (_StopwordsCreate) must carry the same set of field names so +# that values round-tripped from ``collection.config.get()`` can flow back into +# ``tokenization.text()`` without silent data loss. If a field is added to one +# but not the other, importing this module fails loudly; the read→write +# conversion in ``weaviate/tokenization/executor.py::_TokenizationExecutor.text`` +# depends on this parity. +_read_fields = {f.name for f in _dataclass_fields(_StopwordsConfig)} +_write_fields = set(_StopwordsCreate.model_fields.keys()) +if _read_fields != _write_fields: + raise RuntimeError( + "_StopwordsConfig / _StopwordsCreate field drift detected — " + f"read-only={_read_fields - _write_fields}, " + f"write-only={_write_fields - _read_fields}. " + "Update both classes together, or adapt the read→write conversion in " + "weaviate/tokenization/executor.py::_TokenizationExecutor.text." + ) +del _read_fields, _write_fields @dataclass @@ -1646,6 +1678,7 @@ class _InvertedIndexConfig(_ConfigBase): index_property_length: bool index_timestamps: bool stopwords: StopwordsConfig + stopword_presets: Optional[Dict[str, List[str]]] = None InvertedIndexConfig = _InvertedIndexConfig @@ -1670,6 +1703,16 @@ class _PropertyVectorizerConfig: PropertyVectorizerConfig = _PropertyVectorizerConfig +@dataclass +class _TextAnalyzerConfig(_ConfigBase): + ascii_fold: bool + ascii_fold_ignore: Optional[List[str]] + stopword_preset: Optional[str] + + +TextAnalyzerConfig = _TextAnalyzerConfig + + @dataclass class _NestedProperty(_ConfigBase): data_type: DataType @@ -1678,6 +1721,7 @@ class _NestedProperty(_ConfigBase): index_searchable: bool name: str nested_properties: Optional[List["NestedProperty"]] + text_analyzer: Optional[_TextAnalyzerConfig] tokenization: Optional[Tokenization] def to_dict(self) -> Dict[str, Any]: @@ -1711,6 +1755,7 @@ class _Property(_PropertyBase): index_range_filters: bool index_searchable: bool nested_properties: Optional[List[NestedProperty]] + text_analyzer: Optional[_TextAnalyzerConfig] tokenization: Optional[Tokenization] vectorizer_config: Optional[PropertyVectorizerConfig] vectorizer: Optional[str] @@ -1723,6 +1768,8 @@ def to_dict(self) -> Dict[str, Any]: out["indexSearchable"] = self.index_searchable out["indexRangeFilters"] = self.index_range_filters out["tokenization"] = self.tokenization.value if self.tokenization else None + if self.text_analyzer is not None: + out["textAnalyzer"] = self.text_analyzer.to_dict() if self.nested_properties is not None and len(self.nested_properties) > 0: out["nestedProperties"] = [np.to_dict() for np in self.nested_properties] module_config: Dict[str, Any] = {} @@ -2160,6 +2207,47 @@ class _ShardStatus: ShardStatus = _ShardStatus +class _TextAnalyzerConfigCreate(_ConfigCreateModel): + """Text analysis options for a property. + + Configures per-property text analysis for `text` and `text[]` properties that use an + inverted index (searchable or filterable). Supports ASCII folding (accent/diacritic + handling) and selecting a stopword preset that overrides the collection-level + `invertedIndexConfig.stopwords` setting for this property only. + + Attributes: + ascii_fold: If True, accent/diacritic marks are folded to their base characters + during indexing and search (e.g. 'école' matches 'ecole'). If omitted, the + field is not sent to the server and the server default (False) applies. + ascii_fold_ignore: Optional list of characters that should be excluded from + ASCII folding (e.g. ['é'] keeps 'é' from being folded to 'e'). If omitted, + the field is not sent to the server. + stopword_preset: Stopword preset name. Overrides the collection-level + `invertedIndexConfig.stopwords` for this property. Only applies to + properties using `Tokenization.WORD`. Accepts a built-in preset + (`StopwordsPreset.EN` or `StopwordsPreset.NONE`) or the name of a + user-defined preset declared in + `Configure.inverted_index(stopword_presets=...)`. + + All settings are immutable after the property is created. + """ + + asciiFold: Optional[bool] = Field(default=None, alias="ascii_fold") + asciiFoldIgnore: Optional[List[str]] = Field(default=None, alias="ascii_fold_ignore") + stopwordPreset: Optional[Union[StopwordsPreset, str]] = Field( + default=None, alias="stopword_preset" + ) + + @model_validator(mode="after") + def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate": + if self.asciiFold is not True and self.asciiFoldIgnore is not None: + raise ValueError("asciiFoldIgnore cannot be set when asciiFold is not enabled") + return self + + +TextAnalyzerConfigCreate = _TextAnalyzerConfigCreate + + class Property(_ConfigCreateModel): """This class defines the structure of a data property that a collection can have within Weaviate. @@ -2172,6 +2260,9 @@ class Property(_ConfigCreateModel): index_searchable: Whether the property should be searchable in the inverted index. nested_properties: nested properties for data type OBJECT and OBJECT_ARRAY`. skip_vectorization: Whether to skip vectorization of the property. Defaults to `False`. + text_analyzer: Text analysis options for the property. Configures ASCII folding + behavior for text and text[] properties using an inverted index. Immutable + after the property is created. tokenization: The tokenization method to use for the inverted index. Defaults to `None`. vectorize_property_name: Whether to vectorize the property name. Defaults to `True`. """ @@ -2186,6 +2277,7 @@ class Property(_ConfigCreateModel): default=None, alias="nested_properties" ) skip_vectorization: bool = Field(default=False) + textAnalyzer: Optional[_TextAnalyzerConfigCreate] = Field(default=None, alias="text_analyzer") tokenization: Optional[Tokenization] = Field(default=None) vectorize_property_name: bool = Field(default=True) @@ -2217,6 +2309,8 @@ def _to_dict( if isinstance(self.nestedProperties, list) else [self.nestedProperties._to_dict()] ) + if self.textAnalyzer is not None: + ret_dict["textAnalyzer"] = self.textAnalyzer._to_dict() return ret_dict @@ -2566,6 +2660,30 @@ class Configure: ObjectTTL = _ObjectTTL Replication = _Replication + @staticmethod + def text_analyzer( + ascii_fold: Optional[bool] = None, + ascii_fold_ignore: Optional[List[str]] = None, + stopword_preset: Optional[Union[StopwordsPreset, str]] = None, + ) -> _TextAnalyzerConfigCreate: + """Create a text analyzer config for a property. + + Args: + ascii_fold: If True, accent/diacritic marks are folded to their base + characters during indexing and search (e.g. 'école' matches 'ecole'). + ascii_fold_ignore: Optional list of characters that should be excluded + from ASCII folding (e.g. ``['é']`` keeps 'é' from being folded to + 'e'). Requires ``ascii_fold=True``. + stopword_preset: Stopword preset name to override the collection-level + stopwords for this property. Accepts a ``StopwordsPreset`` or a + user-defined preset name. + """ + return _TextAnalyzerConfigCreate( + ascii_fold=ascii_fold, + ascii_fold_ignore=ascii_fold_ignore, + stopword_preset=stopword_preset, + ) + @staticmethod def inverted_index( bm25_b: Optional[float] = None, @@ -2577,11 +2695,17 @@ def inverted_index( stopwords_preset: Optional[StopwordsPreset] = None, stopwords_additions: Optional[List[str]] = None, stopwords_removals: Optional[List[str]] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> _InvertedIndexConfigCreate: """Create an `InvertedIndexConfigCreate` object to be used when defining the configuration of the keyword searching algorithm of Weaviate. Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details! + stopword_presets: User-defined named stopword lists keyed by preset name. Each value + is a flat list of stopword strings. A preset can be referenced from a property's + `text_analyzer.stopword_preset` to override the collection-level stopwords for + that property only. Requires Weaviate >= 1.37.0. + + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details on the other parameters. """ # noqa: D417 (missing argument descriptions in the docstring) if bm25_b is None and bm25_k1 is not None or bm25_k1 is None and bm25_b is not None: raise ValueError("bm25_b and bm25_k1 must be specified together") @@ -2601,6 +2725,7 @@ def inverted_index( additions=stopwords_additions, removals=stopwords_removals, ), + stopwordPresets=stopword_presets, ) @staticmethod @@ -2875,13 +3000,19 @@ def inverted_index( stopwords_additions: Optional[List[str]] = None, stopwords_preset: Optional[StopwordsPreset] = None, stopwords_removals: Optional[List[str]] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, ) -> _InvertedIndexConfigUpdate: """Create an `InvertedIndexConfigUpdate` object. Use this method when defining the `inverted_index_config` argument in `collection.update()`. Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for a more detailed view! + stopword_presets: User-defined named stopword lists keyed by preset name. Each value + is a flat list of stopword strings. Passing this replaces the entire user-defined + stopword preset map for the collection. Removing a preset still referenced by a + property is rejected by the server. Requires Weaviate >= 1.37.0. + + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#configure-the-inverted-index) for details on the other parameters. """ # noqa: D417 (missing argument descriptions in the docstring) return _InvertedIndexConfigUpdate( bm25=_BM25ConfigUpdate(b=bm25_b, k1=bm25_k1), @@ -2891,6 +3022,7 @@ def inverted_index( additions=stopwords_additions, removals=stopwords_removals, ), + stopwordPresets=stopword_presets, ) @staticmethod diff --git a/weaviate/collections/classes/config_base.py b/weaviate/collections/classes/config_base.py index fc696fdfb..aa572795e 100644 --- a/weaviate/collections/classes/config_base.py +++ b/weaviate/collections/classes/config_base.py @@ -29,7 +29,7 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: continue if isinstance(val, Enum): schema[cls_field] = str(val.value) - elif isinstance(val, (int, float, bool, str, list)): + elif isinstance(val, (int, float, bool, str, list, dict)): schema[cls_field] = val elif isinstance(val, _QuantizerConfigUpdate): quantizers = ["pq", "bq", "sq"] diff --git a/weaviate/collections/classes/config_methods.py b/weaviate/collections/classes/config_methods.py index c150394f1..691cf208d 100644 --- a/weaviate/collections/classes/config_methods.py +++ b/weaviate/collections/classes/config_methods.py @@ -39,6 +39,7 @@ _ShardingConfig, _SQConfig, _StopwordsConfig, + _TextAnalyzerConfig, _VectorIndexConfigDynamic, _VectorIndexConfigFlat, _VectorIndexConfigHFresh, @@ -356,6 +357,7 @@ def _collection_config_from_json(schema: Dict[str, Any]) -> _CollectionConfig: additions=schema["invertedIndexConfig"]["stopwords"]["additions"], removals=schema["invertedIndexConfig"]["stopwords"]["removals"], ), + stopword_presets=schema["invertedIndexConfig"].get("stopwordPresets"), ), multi_tenancy_config=_MultiTenancyConfig( enabled=schema.get("multiTenancyConfig", {}).get("enabled", False), @@ -462,6 +464,21 @@ def _collection_configs_simple_from_json( return dict(sorted(configs.items())) +def _text_analyzer_from_config(prop: Dict[str, Any]) -> Optional[_TextAnalyzerConfig]: + ta = prop.get("textAnalyzer") + if ta is None: + return None + # The server normalizes an empty TextAnalyzer to nil (see usecases/schema/validation.go), + # so the only meaningful signal is the presence of one of the configured fields. + if "asciiFold" not in ta and "stopwordPreset" not in ta: + return None + return _TextAnalyzerConfig( + ascii_fold=ta.get("asciiFold", False), + ascii_fold_ignore=ta.get("asciiFoldIgnore"), + stopword_preset=ta.get("stopwordPreset"), + ) + + def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedProperty]: return [ _NestedProperty( @@ -475,6 +492,7 @@ def _nested_properties_from_config(props: List[Dict[str, Any]]) -> List[_NestedP if prop.get("nestedProperties") is not None else None ), + text_analyzer=_text_analyzer_from_config(prop), tokenization=( Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), @@ -497,6 +515,7 @@ def _properties_from_config(schema: Dict[str, Any]) -> List[_Property]: if prop.get("nestedProperties") is not None else None ), + text_analyzer=_text_analyzer_from_config(prop), tokenization=( Tokenization(prop["tokenization"]) if prop.get("tokenization") is not None else None ), diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index bff0e35ca..bdcc53dd9 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -90,10 +90,32 @@ class MetadataQuery(_WeaviateInput): score: bool = Field(default=False) explain_score: bool = Field(default=False) is_consistent: bool = Field(default=False) + query_profile: bool = Field(default=False) @classmethod def full(cls) -> "MetadataQuery": - """Return a MetadataQuery with all fields set to True.""" + """Return a MetadataQuery with all fields set to True. + + NOTE: `query_profile` is excluded because it adds performance overhead. + Use `full_with_profile()` to include it. + """ + return cls( + creation_time=True, + last_update_time=True, + distance=True, + certainty=True, + score=True, + explain_score=True, + is_consistent=True, + ) + + @classmethod + def full_with_profile(cls) -> "MetadataQuery": + """Return a MetadataQuery with all fields set to True, including query profiling. + + Query profiling adds per-shard execution timing breakdowns to the response + but has performance overhead. Requires Weaviate >= 1.36.9. + """ return cls( creation_time=True, last_update_time=True, @@ -102,6 +124,7 @@ def full(cls) -> "MetadataQuery": score=True, explain_score=True, is_consistent=True, + query_profile=True, ) @@ -117,6 +140,7 @@ class _MetadataQuery: explain_score: bool = False is_consistent: bool = False vectors: Optional[List[str]] = None + query_profile: bool = False @classmethod def from_public( @@ -138,6 +162,7 @@ def from_public( score=public.score, explain_score=public.explain_score, is_consistent=public.is_consistent, + query_profile=public.query_profile, ) ) @@ -152,6 +177,7 @@ def from_public( "score", "explain_score", "is_consistent", + "query_profile", ] ], MetadataQuery, diff --git a/weaviate/collections/classes/internal.py b/weaviate/collections/classes/internal.py index f4d542f66..55a69c5cd 100644 --- a/weaviate/collections/classes/internal.py +++ b/weaviate/collections/classes/internal.py @@ -90,6 +90,29 @@ def _is_empty(self) -> bool: ) +@dataclass +class SearchProfileReturn: + """Profiling details for a single search type within a shard.""" + + details: Dict[str, str] + + +@dataclass +class ShardProfileReturn: + """Profiling data for a single shard.""" + + name: str + node: str + searches: Dict[str, SearchProfileReturn] + + +@dataclass +class QueryProfileReturn: + """Per-shard query profiling data returned when `query_profile=True` is set in metadata.""" + + shards: List[ShardProfileReturn] + + @dataclass class GroupByMetadataReturn: """Metadata of an object returned by a group by query.""" @@ -210,6 +233,7 @@ class GenerativeReturn(Generic[P, R]): __generated: Optional[str] objects: List[GenerativeObject[P, R]] generative: Optional[GenerativeGrouped] + query_profile: Optional[QueryProfileReturn] # init required because of nuances of dataclass when defining @property generated and private var __generated def __init__( @@ -217,10 +241,12 @@ def __init__( generated: Optional[str], objects: List[GenerativeObject[P, R]], generative: Optional[GenerativeGrouped], + query_profile: Optional[QueryProfileReturn] = None, ) -> None: self.__generated = generated self.objects = objects self.generative = generative + self.query_profile = query_profile @property @deprecated( @@ -257,6 +283,7 @@ class GenerativeGroupByReturn(Generic[P, R]): objects: List[GroupByObject[P, R]] groups: Dict[str, GenerativeGroup[P, R]] generated: Optional[str] + query_profile: Optional[QueryProfileReturn] = None @dataclass @@ -265,6 +292,7 @@ class GroupByReturn(Generic[P, R]): objects: List[GroupByObject[P, R]] groups: Dict[str, Group[P, R]] + query_profile: Optional[QueryProfileReturn] = None @dataclass @@ -272,6 +300,7 @@ class QueryReturn(Generic[P, R]): """The return type of a query within the `.query` namespace of a collection.""" objects: List[Object[P, R]] + query_profile: Optional[QueryProfileReturn] = None _GQLEntryReturnType: TypeAlias = Dict[str, List[Dict[str, Any]]] diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index 8497cdf51..2a733356c 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -44,13 +44,14 @@ _check_references_generic, ) from weaviate.collections.collection import Collection, CollectionAsync +from weaviate.collections.config.executor import _any_property_has_text_analyzer from weaviate.connect import executor from weaviate.connect.v4 import ( ConnectionAsync, ConnectionType, _ExpectedStatusCodes, ) -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.util import _capitalize_first_letter, _decode_json_response_dict from weaviate.validator import _validate_input, _ValidateArgument from weaviate.warnings import _Warnings @@ -213,6 +214,23 @@ def create( _Warnings.vectorizer_config_in_config_create() if vector_index_config is not None: _Warnings.vector_index_config_in_config_create() + if properties is not None and _any_property_has_text_analyzer(properties): + if not self._connection._weaviate_version.is_at_least(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Property text_analyzer (asciiFold / stopword_preset)", + str(self._connection._weaviate_version), + "1.37.0", + ) + if ( + inverted_index_config is not None + and inverted_index_config.stopwordPresets is not None + and not self._connection._weaviate_version.is_at_least(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "InvertedIndexConfig stopword_presets", + str(self._connection._weaviate_version), + "1.37.0", + ) try: config = _CollectionConfigCreate( description=description, diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index c95cba5a3..103ab70ac 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -6,6 +6,7 @@ List, Literal, Optional, + Sequence, Tuple, Union, cast, @@ -53,6 +54,7 @@ from weaviate.connect.v4 import ConnectionAsync, ConnectionType, _ExpectedStatusCodes from weaviate.exceptions import ( WeaviateInvalidInputError, + WeaviateUnsupportedFeatureError, ) from weaviate.util import ( _capitalize_first_letter, @@ -63,6 +65,20 @@ from weaviate.warnings import _Warnings +def _any_property_has_text_analyzer(properties: Sequence[Property]) -> bool: + return any(_property_has_text_analyzer(p) for p in properties) + + +def _property_has_text_analyzer(prop: Property) -> bool: + if prop.textAnalyzer is not None: + return True + nested = prop.nestedProperties + if nested is None: + return False + nested_list = nested if isinstance(nested, list) else [nested] + return any(_property_has_text_analyzer(np) for np in nested_list) + + class _ConfigCollectionExecutor(Generic[ConnectionType]): def __init__( self, @@ -199,6 +215,16 @@ def update( ), ): _Warnings.vectorizer_config_in_config_update() + if ( + inverted_index_config is not None + and inverted_index_config.stopwordPresets is not None + and not self._connection._weaviate_version.is_at_least(1, 37, 0) + ): + raise WeaviateUnsupportedFeatureError( + "InvertedIndexConfig stopword_presets", + str(self._connection._weaviate_version), + "1.37.0", + ) try: config = _CollectionConfigUpdate( description=description, @@ -244,6 +270,15 @@ async def _execute() -> None: return executor.result(resp(schema)) def __add_property(self, additional_property: PropertyType) -> executor.Result[None]: + if isinstance(additional_property, Property) and _property_has_text_analyzer( + additional_property + ): + if not self._connection._weaviate_version.is_at_least(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Property text_analyzer (asciiFold)", + str(self._connection._weaviate_version), + "1.37.0", + ) path = f"/schema/{self._name}/properties" obj = additional_property._to_dict() diff --git a/weaviate/collections/grpc/query.py b/weaviate/collections/grpc/query.py index 1ac014ada..d635a3f2f 100644 --- a/weaviate/collections/grpc/query.py +++ b/weaviate/collections/grpc/query.py @@ -521,6 +521,7 @@ def _metadata_to_grpc(self, metadata: _MetadataQuery) -> search_get_pb2.Metadata score=metadata.score, is_consistent=metadata.is_consistent, vectors=metadata.vectors, + query_profile=metadata.query_profile, ) def __resolve_property(self, prop: QueryNested) -> search_get_pb2.ObjectPropertiesRequest: diff --git a/weaviate/collections/grpc/shared.py b/weaviate/collections/grpc/shared.py index 593811fb3..714db67cb 100644 --- a/weaviate/collections/grpc/shared.py +++ b/weaviate/collections/grpc/shared.py @@ -686,11 +686,16 @@ def _parse_hybrid( else: vector_bytes = vector_bytes_tmp + use_alpha_param = self._weaviate_version.is_at_least( + 1, 36, 6 + ) # TODO: change to 1.36.7 once it's released return ( base_search_pb2.Hybrid( properties=properties, query=query, - alpha=float(alpha) if alpha is not None else None, + alpha=None if use_alpha_param else (alpha if alpha is not None else 0.7), + alpha_param=alpha if use_alpha_param else None, + use_alpha_param=use_alpha_param, fusion_type=( cast( base_search_pb2.Hybrid.FusionType, diff --git a/weaviate/collections/queries/base_executor.py b/weaviate/collections/queries/base_executor.py index e721d9edb..f3eb36399 100644 --- a/weaviate/collections/queries/base_executor.py +++ b/weaviate/collections/queries/base_executor.py @@ -40,9 +40,12 @@ GroupByReturn, MetadataReturn, Object, + QueryProfileReturn, QueryReturn, ReturnProperties, ReturnReferences, + SearchProfileReturn, + ShardProfileReturn, WeaviateProperties, _CrossReference, _extract_properties_from_data_model, @@ -53,7 +56,7 @@ from weaviate.collections.grpc.query import _QueryGRPC from weaviate.collections.grpc.shared import _ByteOps, _Unpack from weaviate.connect.v4 import ConnectionType -from weaviate.exceptions import WeaviateInvalidInputError +from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError from weaviate.proto.v1 import base_pb2, generative_pb2, properties_pb2, search_get_pb2 from weaviate.types import INCLUDE_VECTOR from weaviate.util import ( @@ -452,6 +455,25 @@ def __result_to_group_by_object( belongs_to_group=group_name, ) + def __extract_query_profile( + self, res: search_get_pb2.SearchReply + ) -> Optional[QueryProfileReturn]: + if not res.HasField("query_profile"): + return None + return QueryProfileReturn( + shards=[ + ShardProfileReturn( + name=shard.name, + node=shard.node, + searches={ + key: SearchProfileReturn(details=dict(profile.details)) + for key, profile in shard.searches.items() + }, + ) + for shard in res.query_profile.shards + ] + ) + def _result_to_query_return( self, res: search_get_pb2.SearchReply, @@ -461,7 +483,8 @@ def _result_to_query_return( objects=[ self.__result_to_query_object(obj.properties, obj.metadata, options) for obj in res.results - ] + ], + query_profile=self.__extract_query_profile(res), ) def _result_to_generative_query_return( @@ -480,6 +503,7 @@ def _result_to_generative_query_return( generative=self.__extract_generative_grouped_from_generative( res.generative_grouped_results ), + query_profile=self.__extract_query_profile(res), ) def _result_to_generative_return( @@ -507,7 +531,11 @@ def _result_to_groupby_return( objects_group_by: List[GroupByObject] = [ obj for group in groups.values() for obj in group.objects ] - return GroupByReturn(objects=objects_group_by, groups=groups) + return GroupByReturn( + objects=objects_group_by, + groups=groups, + query_profile=self.__extract_query_profile(res), + ) def _result_to_generative_groupby_return( self, @@ -537,6 +565,7 @@ def _result_to_generative_groupby_return( generated=( res.generative_grouped_result if res.generative_grouped_result != "" else None ), + query_profile=self.__extract_query_profile(res), ) def _result_to_query_or_groupby_return( @@ -615,6 +644,13 @@ def _parse_return_metadata( ret_md = cast(MetadataQuery, return_metadata) else: ret_md = MetadataQuery(**{str(prop): True for prop in return_metadata}) + + if ret_md is not None and ret_md.query_profile: + if self._connection._weaviate_version.is_lower_than(1, 36, 9): + raise WeaviateUnsupportedFeatureError( + "Query profiling", str(self._connection._weaviate_version), "1.36.9" + ) + return _MetadataQuery.from_public(ret_md, include_vector) def _parse_return_references( diff --git a/weaviate/collections/queries/hybrid/generate/async_.pyi b/weaviate/collections/queries/hybrid/generate/async_.pyi index 259f5787b..8c9faec33 100644 --- a/weaviate/collections/queries/hybrid/generate/async_.pyi +++ b/weaviate/collections/queries/hybrid/generate/async_.pyi @@ -42,7 +42,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -69,7 +69,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -96,7 +96,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -123,7 +123,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -150,7 +150,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -177,7 +177,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -204,7 +204,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -231,7 +231,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -258,7 +258,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -285,7 +285,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -312,7 +312,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -339,7 +339,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -366,7 +366,7 @@ class _HybridGenerateAsync( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/generate/executor.py b/weaviate/collections/queries/hybrid/generate/executor.py index f516f0b5e..37bfb1b69 100644 --- a/weaviate/collections/queries/hybrid/generate/executor.py +++ b/weaviate/collections/queries/hybrid/generate/executor.py @@ -53,7 +53,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -81,7 +81,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -109,7 +109,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -137,7 +137,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -165,7 +165,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -193,7 +193,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -223,7 +223,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -251,7 +251,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -279,7 +279,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -307,7 +307,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -335,7 +335,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -363,7 +363,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -392,7 +392,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -421,7 +421,7 @@ def hybrid( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/generate/sync.pyi b/weaviate/collections/queries/hybrid/generate/sync.pyi index d67103519..f48966fb5 100644 --- a/weaviate/collections/queries/hybrid/generate/sync.pyi +++ b/weaviate/collections/queries/hybrid/generate/sync.pyi @@ -41,7 +41,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -68,7 +68,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -95,7 +95,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -122,7 +122,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -176,7 +176,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -203,7 +203,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -230,7 +230,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -257,7 +257,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -284,7 +284,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -311,7 +311,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -338,7 +338,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -365,7 +365,7 @@ class _HybridGenerate( grouped_task: Union[str, _GroupedTask, None] = None, grouped_properties: Optional[List[str]] = None, generative_provider: Optional[_GenerativeConfigRuntime] = None, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/async_.pyi b/weaviate/collections/queries/hybrid/query/async_.pyi index 346231bf0..9336f6ffe 100644 --- a/weaviate/collections/queries/hybrid/query/async_.pyi +++ b/weaviate/collections/queries/hybrid/query/async_.pyi @@ -34,7 +34,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -57,7 +57,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -80,7 +80,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -103,7 +103,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -126,7 +126,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -172,7 +172,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -195,7 +195,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -218,7 +218,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -241,7 +241,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -264,7 +264,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -310,7 +310,7 @@ class _HybridQueryAsync( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/executor.py b/weaviate/collections/queries/hybrid/query/executor.py index 4c2090d76..213d8b6e0 100644 --- a/weaviate/collections/queries/hybrid/query/executor.py +++ b/weaviate/collections/queries/hybrid/query/executor.py @@ -45,7 +45,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -69,7 +69,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -93,7 +93,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -117,7 +117,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -141,7 +141,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -165,7 +165,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -191,7 +191,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -215,7 +215,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -239,7 +239,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -263,7 +263,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -311,7 +311,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -336,7 +336,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -361,7 +361,7 @@ def hybrid( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/collections/queries/hybrid/query/sync.pyi b/weaviate/collections/queries/hybrid/query/sync.pyi index 584c61263..01b4a84f8 100644 --- a/weaviate/collections/queries/hybrid/query/sync.pyi +++ b/weaviate/collections/queries/hybrid/query/sync.pyi @@ -34,7 +34,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -57,7 +57,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -80,7 +80,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -103,7 +103,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -126,7 +126,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -149,7 +149,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -172,7 +172,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -195,7 +195,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -218,7 +218,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -241,7 +241,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -264,7 +264,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -287,7 +287,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, @@ -310,7 +310,7 @@ class _HybridQuery( self, query: Optional[str], *, - alpha: NUMBER = 0.7, + alpha: Optional[NUMBER] = None, vector: Optional[HybridVectorType] = None, query_properties: Optional[List[str]] = None, fusion_type: Optional[HybridFusion] = None, diff --git a/weaviate/connect/v4.py b/weaviate/connect/v4.py index adac4be38..56ece8ca2 100644 --- a/weaviate/connect/v4.py +++ b/weaviate/connect/v4.py @@ -1019,8 +1019,8 @@ def grpc_batch_stream( self, requests: Generator[batch_pb2.BatchStreamRequest, None, None], ) -> Generator[batch_pb2.BatchStreamReply, None, None]: - assert self.grpc_stub is not None try: + assert self.grpc_stub is not None for msg in self.grpc_stub.BatchStream( request_iterator=requests, timeout=self.timeout_config.stream, diff --git a/weaviate/exceptions.py b/weaviate/exceptions.py index 2a5b429d5..ce0fe6f7e 100644 --- a/weaviate/exceptions.py +++ b/weaviate/exceptions.py @@ -141,6 +141,14 @@ class BackupCanceledError(WeaviateBaseError): """Backup canceled Exception.""" +class ExportFailedError(WeaviateBaseError): + """Export Failed Exception.""" + + +class ExportCanceledError(WeaviateBaseError): + """Export Canceled Exception.""" + + class EmptyResponseError(WeaviateBaseError): """Occurs when an HTTP request unexpectedly returns an empty response.""" diff --git a/weaviate/export/__init__.py b/weaviate/export/__init__.py new file mode 100644 index 000000000..91de2d448 --- /dev/null +++ b/weaviate/export/__init__.py @@ -0,0 +1,7 @@ +"""Module for collection export operations.""" + +from .async_ import _ExportAsync +from .executor import ExportStorage +from .sync import _Export + +__all__ = ["ExportStorage", "_ExportAsync", "_Export"] diff --git a/weaviate/export/async_.py b/weaviate/export/async_.py new file mode 100644 index 000000000..8bd1e3c44 --- /dev/null +++ b/weaviate/export/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.export.executor import _ExportExecutor + + +@executor.wrap("async") +class _ExportAsync(_ExportExecutor[ConnectionAsync]): + pass diff --git a/weaviate/export/async_.pyi b/weaviate/export/async_.pyi new file mode 100644 index 000000000..4f1b82ced --- /dev/null +++ b/weaviate/export/async_.pyi @@ -0,0 +1,37 @@ +from typing import List, Literal, Union, overload + +from weaviate.connect.v4 import ConnectionAsync +from weaviate.export.export import ( + ExportCreateReturn, + ExportFileFormat, + ExportStatusReturn, + ExportStorage, +) + +from .executor import _ExportExecutor + +class _ExportAsync(_ExportExecutor[ConnectionAsync]): + @overload + async def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[True], + ) -> ExportStatusReturn: ... + @overload + async def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, + ) -> ExportCreateReturn: ... + async def get_status(self, *, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + async def cancel(self, *, export_id: str, backend: ExportStorage) -> bool: ... diff --git a/weaviate/export/executor.py b/weaviate/export/executor.py new file mode 100644 index 000000000..516273c73 --- /dev/null +++ b/weaviate/export/executor.py @@ -0,0 +1,338 @@ +"""Export class definition.""" + +import asyncio +import time +from typing import Generic, List, Literal, Tuple, Union, overload + +from httpx import Response + +from weaviate.backup.backup import STORAGE_NAMES +from weaviate.connect import executor +from weaviate.connect.v4 import ( + Connection, + ConnectionAsync, + ConnectionType, + _ExpectedStatusCodes, +) +from weaviate.exceptions import ( + EmptyResponseException, + ExportCanceledError, + ExportFailedError, + WeaviateUnsupportedFeatureError, +) +from weaviate.export.export import ( + ExportCreateReturn, + ExportFileFormat, + ExportStatus, + ExportStatusReturn, + ExportStorage, +) +from weaviate.util import ( + _capitalize_first_letter, + _decode_json_response_dict, +) + + +class _ExportExecutor(Generic[ConnectionType]): + def __init__(self, connection: Connection): + self._connection = connection + + @overload + def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[True], + ) -> executor.Result[ExportStatusReturn]: ... + + @overload + def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, + ) -> executor.Result[ExportCreateReturn]: ... + + def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: bool = False, + ) -> executor.Result[Union[ExportCreateReturn, ExportStatusReturn]]: + """Create an export of all/per collection Weaviate objects. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where to create the export. + file_format: The file format of the export (e.g. ExportFileFormat.PARQUET). + include_collections: The collection/list of collections to be included in the export. If not specified all + collections will be included. Either `include_collections` or `exclude_collections` can be set. + exclude_collections: The collection/list of collections to be excluded in the export. + Either `include_collections` or `exclude_collections` can be set. + wait_for_completion: Whether to wait until the export is done. By default False. + + Returns: + An `ExportCreateReturn` when `wait_for_completion=False`, or an `ExportStatusReturn` + when `wait_for_completion=True` and the export completes successfully. + + Raises: + weaviate.exceptions.UnexpectedStatusCodeError: If weaviate reports a non-OK status. + TypeError: One of the arguments have a wrong type. + """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) + ( + export_id, + backend, + include_collections, + exclude_collections, + ) = _get_and_validate_create_arguments( + export_id=export_id, + backend=backend, + include_classes=include_collections, + exclude_classes=exclude_collections, + wait_for_completion=wait_for_completion, + ) + + payload: dict = { + "id": export_id, + "file_format": file_format.value, + } + if include_collections: + payload["include"] = include_collections + if exclude_collections: + payload["exclude"] = exclude_collections + + path = f"/export/{backend.value}" + + if isinstance(self._connection, ConnectionAsync): + + async def _execute() -> Union[ExportCreateReturn, ExportStatusReturn]: + res = await executor.aresult( + self._connection.post( + path=path, + weaviate_object=payload, + error_msg="Export creation failed due to connection error.", + ) + ) + create_status = _decode_json_response_dict(res, "Export creation") + assert create_status is not None + if wait_for_completion: + while True: + status = await executor.aresult( + self.get_status( + export_id=export_id, + backend=backend, + ) + ) + if status.status == ExportStatus.SUCCESS: + return status + if status.status == ExportStatus.FAILED: + raise ExportFailedError(f"Export failed with error: {status.error}") + if status.status == ExportStatus.CANCELED: + raise ExportCanceledError( + f"Export was canceled with error: {status.error}" + ) + await asyncio.sleep(1) + return ExportCreateReturn(**create_status) + + return _execute() + + res = executor.result( + self._connection.post( + path=path, + weaviate_object=payload, + error_msg="Export creation failed due to connection error.", + ) + ) + create_status = _decode_json_response_dict(res, "Export creation") + assert create_status is not None + if wait_for_completion: + while True: + status = executor.result( + self.get_status( + export_id=export_id, + backend=backend, + ) + ) + if status.status == ExportStatus.SUCCESS: + return status + if status.status == ExportStatus.FAILED: + raise ExportFailedError(f"Export failed with error: {status.error}") + if status.status == ExportStatus.CANCELED: + raise ExportCanceledError(f"Export was canceled with error: {status.error}") + time.sleep(1) + return ExportCreateReturn(**create_status) + + def get_status( + self, + *, + export_id: str, + backend: ExportStorage, + ) -> executor.Result[ExportStatusReturn]: + """Check the status of an export. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where the export was created. + + Returns: + An `ExportStatusReturn` object that contains the export status response. + """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) + export_id, backend = _get_and_validate_get_status( + export_id=export_id, + backend=backend, + ) + + url_path = f"/export/{backend.value}/{export_id}" + + def resp(res: Response) -> ExportStatusReturn: + typed_response = _decode_json_response_dict(res, "Export status check") + if typed_response is None: + raise EmptyResponseException() + return ExportStatusReturn(**typed_response) + + return executor.execute( + response_callback=resp, + method=self._connection.get, + path=url_path, + error_msg="Export status check failed due to connection error.", + ) + + def cancel( + self, + *, + export_id: str, + backend: ExportStorage, + ) -> executor.Result[bool]: + """Cancel a running export. + + Args: + export_id: The identifier name of the export. + backend: The backend storage where the export was created. + + Returns: + True if the export was cancelled, False if the export had already finished. + """ + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Collection export", + str(self._connection._weaviate_version), + "1.37.0", + ) + export_id, backend = _get_and_validate_get_status( + export_id=export_id, + backend=backend, + ) + url_path = f"/export/{backend.value}/{export_id}" + + def resp(res: Response) -> bool: + if res.status_code == 204: + return True + # 409 means export already finished — not an error, just already done + if res.status_code == 409: + return False + return False + + return executor.execute( + response_callback=resp, + method=self._connection.delete, + path=url_path, + error_msg="Export cancel failed due to connection error.", + status_codes=_ExpectedStatusCodes(ok_in=[204, 409], error="cancel export"), + ) + + +def _get_and_validate_create_arguments( + export_id: str, + backend: Union[str, ExportStorage], + include_classes: Union[List[str], str, None], + exclude_classes: Union[List[str], str, None], + wait_for_completion: bool, +) -> Tuple[str, ExportStorage, List[str], List[str]]: + if not isinstance(export_id, str): + raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + export_id = export_id.lower() + if isinstance(backend, str): + try: + backend = ExportStorage(backend.lower()) + except ValueError: + raise ValueError( + f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." + ) + + if not isinstance(wait_for_completion, bool): + raise TypeError( + f"'wait_for_completion' must be of type bool. Given type: {type(wait_for_completion)}." + ) + + if include_classes is not None: + if isinstance(include_classes, str): + include_classes = [include_classes] + elif not isinstance(include_classes, list): + raise TypeError( + "'include_collections' must be of type str, list of str or None. " + f"Given type: {type(include_classes)}." + ) + else: + include_classes = [] + + if exclude_classes is not None: + if isinstance(exclude_classes, str): + exclude_classes = [exclude_classes] + elif not isinstance(exclude_classes, list): + raise TypeError( + "'exclude_collections' must be of type str, list of str or None. " + f"Given type: {type(exclude_classes)}." + ) + else: + exclude_classes = [] + + if include_classes and exclude_classes: + raise ValueError( + "Either 'include_collections' OR 'exclude_collections' can be set, not both." + ) + + include_classes = [_capitalize_first_letter(cls) for cls in include_classes] + exclude_classes = [_capitalize_first_letter(cls) for cls in exclude_classes] + + return (export_id, backend, include_classes, exclude_classes) + + +def _get_and_validate_get_status( + export_id: str, backend: Union[str, ExportStorage] +) -> Tuple[str, ExportStorage]: + if not isinstance(export_id, str): + raise TypeError(f"'export_id' must be of type str. Given type: {type(export_id)}.") + export_id = export_id.lower() + if isinstance(backend, str): + try: + backend = ExportStorage(backend.lower()) + except ValueError: + raise ValueError( + f"'backend' must have one of these values: {STORAGE_NAMES}. Given value: {backend}." + ) + + return (export_id, backend) diff --git a/weaviate/export/export.py b/weaviate/export/export.py new file mode 100644 index 000000000..fe94afa5c --- /dev/null +++ b/weaviate/export/export.py @@ -0,0 +1,71 @@ +"""Export models and enums.""" + +from datetime import datetime +from enum import Enum +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from weaviate.backup.backup import BackupStorage + +ExportStorage = BackupStorage + + +class ExportFileFormat(str, Enum): + """Which file format should be used for the export.""" + + PARQUET = "parquet" + + +class ExportStatus(str, Enum): + """The status of an export.""" + + STARTED = "STARTED" + TRANSFERRING = "TRANSFERRING" + SUCCESS = "SUCCESS" + FAILED = "FAILED" + CANCELED = "CANCELED" + + +class ShardExportStatus(str, Enum): + """The status of an individual shard export.""" + + TRANSFERRING = "TRANSFERRING" + SUCCESS = "SUCCESS" + FAILED = "FAILED" + SKIPPED = "SKIPPED" + + +class ShardProgress(BaseModel): + """Progress of a single shard export.""" + + status: ShardExportStatus + objects_exported: int = Field(alias="objectsExported", default=0) + error: Optional[str] = None + skip_reason: Optional[str] = Field(alias="skipReason", default=None) + + model_config = {"populate_by_name": True} + + +class ExportCreateReturn(BaseModel): + """Return type of the export creation method.""" + + export_id: str = Field(alias="id") + backend: str + path: str + status: ExportStatus + started_at: Optional[datetime] = Field(alias="startedAt", default=None) + collections: List[str] = Field(default_factory=list, alias="classes") + + model_config = {"populate_by_name": True} + + +class ExportStatusReturn(ExportCreateReturn): + """Return type of the export status method.""" + + completed_at: Optional[datetime] = Field(alias="completedAt", default=None) + shard_status: Optional[Dict[str, Dict[str, ShardProgress]]] = Field( + alias="shardStatus", default=None + ) + error: Optional[str] = None + took_in_ms: Optional[int] = Field(alias="tookInMs", default=None) diff --git a/weaviate/export/sync.py b/weaviate/export/sync.py new file mode 100644 index 000000000..0510601f8 --- /dev/null +++ b/weaviate/export/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.export.executor import _ExportExecutor + + +@executor.wrap("sync") +class _Export(_ExportExecutor[ConnectionSync]): + pass diff --git a/weaviate/export/sync.pyi b/weaviate/export/sync.pyi new file mode 100644 index 000000000..afe7284e2 --- /dev/null +++ b/weaviate/export/sync.pyi @@ -0,0 +1,39 @@ +from typing import List, Literal, Union, overload + +from weaviate.connect.v4 import ( + ConnectionSync, +) +from weaviate.export.export import ( + ExportCreateReturn, + ExportFileFormat, + ExportStatusReturn, + ExportStorage, +) + +from .executor import _ExportExecutor + +class _Export(_ExportExecutor[ConnectionSync]): + @overload + def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[True], + ) -> ExportStatusReturn: ... + @overload + def create( + self, + *, + export_id: str, + backend: ExportStorage, + file_format: ExportFileFormat, + include_collections: Union[List[str], str, None] = None, + exclude_collections: Union[List[str], str, None] = None, + wait_for_completion: Literal[False] = False, + ) -> ExportCreateReturn: ... + def get_status(self, *, export_id: str, backend: ExportStorage) -> ExportStatusReturn: ... + def cancel(self, *, export_id: str, backend: ExportStorage) -> bool: ... diff --git a/weaviate/outputs/__init__.py b/weaviate/outputs/__init__.py index 62193fc35..75cb031e0 100644 --- a/weaviate/outputs/__init__.py +++ b/weaviate/outputs/__init__.py @@ -1,4 +1,17 @@ -from . import aggregate, backup, batch, cluster, config, data, query, replication, tenants, users +from . import ( + aggregate, + backup, + batch, + cluster, + config, + data, + export, + query, + replication, + tenants, + tokenization, + users, +) __all__ = [ "aggregate", @@ -7,8 +20,10 @@ "cluster", "config", "data", + "export", "query", "replication", "tenants", + "tokenization", "users", ] diff --git a/weaviate/outputs/export.py b/weaviate/outputs/export.py new file mode 100644 index 000000000..de0554de4 --- /dev/null +++ b/weaviate/outputs/export.py @@ -0,0 +1,17 @@ +from weaviate.export.export import ( + ExportCreateReturn, + ExportStatus, + ExportStatusReturn, + ExportStorage, + ShardExportStatus, + ShardProgress, +) + +__all__ = [ + "ExportCreateReturn", + "ExportStatus", + "ExportStatusReturn", + "ExportStorage", + "ShardExportStatus", + "ShardProgress", +] diff --git a/weaviate/outputs/query.py b/weaviate/outputs/query.py index 42ede14b3..625d669d8 100644 --- a/weaviate/outputs/query.py +++ b/weaviate/outputs/query.py @@ -32,11 +32,14 @@ Object, ObjectSingleReturn, QueryNearMediaReturnType, + QueryProfileReturn, QueryReturn, QueryReturnType, QuerySingleReturn, ReferenceInput, ReferenceInputs, + SearchProfileReturn, + ShardProfileReturn, ) from weaviate.collections.classes.types import ( GeoCoordinate, @@ -75,11 +78,14 @@ "GenerativeGroup", "PhoneNumberType", "QueryNearMediaReturnType", + "QueryProfileReturn", "QueryReturnType", "QueryReturn", "QuerySingleReturn", "ReferenceInput", "ReferenceInputs", + "SearchProfileReturn", + "ShardProfileReturn", "Sorting", "TargetVectorJoinType", "WeaviateField", diff --git a/weaviate/outputs/tokenization.py b/weaviate/outputs/tokenization.py new file mode 100644 index 000000000..0854f8b0d --- /dev/null +++ b/weaviate/outputs/tokenization.py @@ -0,0 +1,5 @@ +from weaviate.tokenization.models import TokenizeResult + +__all__ = [ + "TokenizeResult", +] diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.py b/weaviate/proto/v1/v4216/v1/base_search_pb2.py index 5767fdb57..3abd15ebf 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.py @@ -14,7 +14,7 @@ from weaviate.proto.v1.v4216.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -26,6 +26,8 @@ _VECTORFORTARGET.fields_by_name['vector_bytes']._serialized_options = b'\030\001' _HYBRID.fields_by_name['vector']._options = None _HYBRID.fields_by_name['vector']._serialized_options = b'\030\001' + _HYBRID.fields_by_name['alpha']._options = None + _HYBRID.fields_by_name['alpha']._serialized_options = b'\030\001' _HYBRID.fields_by_name['vector_bytes']._options = None _HYBRID.fields_by_name['vector_bytes']._serialized_options = b'\030\001' _HYBRID.fields_by_name['target_vectors']._options = None @@ -56,8 +58,8 @@ _NEARTHERMALSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' _NEARIMUSEARCH.fields_by_name['target_vectors']._options = None _NEARIMUSEARCH.fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -69,31 +71,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi index bac1b47b8..440631c88 100644 --- a/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v4216/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors"] + __slots__ = ["query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors"] class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = [] FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -87,6 +87,8 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -100,9 +102,11 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ["vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors"] diff --git a/weaviate/proto/v1/v4216/v1/search_get_pb2.py b/weaviate/proto/v1/v4216/v1/search_get_pb2.py index 7885e57d6..78f20076e 100644 --- a/weaviate/proto/v1/v4216/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v4216/v1/search_get_pb2.py @@ -17,7 +17,7 @@ from weaviate.proto.v1.v4216.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,6 +31,10 @@ _SEARCHREQUEST.fields_by_name['uses_125_api']._serialized_options = b'\030\001' _SEARCHREPLY.fields_by_name['generative_grouped_result']._options = None _SEARCHREPLY.fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY._options = None + _QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY._serialized_options = b'8\001' + _QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY._options = None + _QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY._serialized_options = b'8\001' _GROUPBYRESULT.fields_by_name['generative']._options = None _GROUPBYRESULT.fields_by_name['generative']._serialized_options = b'\030\001' _METADATARESULT.fields_by_name['vector']._options = None @@ -46,27 +50,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi b/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi index 9dd1ee0d0..a0300e672 100644 --- a/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v4216/v1/search_get_pb2.pyi @@ -88,7 +88,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ["uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors"] + __slots__ = ["uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile"] UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -99,6 +99,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -109,7 +110,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ["non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties"] @@ -154,18 +156,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ["took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results"] + __slots__ = ["took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile"] TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ["shards"] + class SearchProfile(_message.Message): + __slots__ = ["details"] + class DetailsEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ["name", "node", "searches"] + class SearchesEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ["score"] diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.py b/weaviate/proto/v1/v5261/v1/base_search_pb2.py index cde241b1e..2658b6cc5 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.py @@ -15,7 +15,7 @@ from weaviate.proto.v1.v5261.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -27,6 +27,8 @@ _globals['_VECTORFORTARGET'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector']._serialized_options = b'\030\001' + _globals['_HYBRID'].fields_by_name['alpha']._loaded_options = None + _globals['_HYBRID'].fields_by_name['alpha']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector_bytes']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['target_vectors']._loaded_options = None @@ -57,8 +59,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -70,31 +72,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi index 5f1871ac7..b89f04bfe 100644 --- a/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v5261/v1/base_search_pb2.pyi @@ -67,7 +67,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -87,6 +87,8 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -100,9 +102,11 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") diff --git a/weaviate/proto/v1/v5261/v1/search_get_pb2.py b/weaviate/proto/v1/v5261/v1/search_get_pb2.py index 04356c19f..7686973af 100644 --- a/weaviate/proto/v1/v5261/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v5261/v1/search_get_pb2.py @@ -18,7 +18,7 @@ from weaviate.proto.v1.v5261.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -32,6 +32,10 @@ _globals['_SEARCHREQUEST'].fields_by_name['uses_125_api']._serialized_options = b'\030\001' _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._loaded_options = None _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_options = b'8\001' + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_options = b'8\001' _globals['_GROUPBYRESULT'].fields_by_name['generative']._loaded_options = None _globals['_GROUPBYRESULT'].fields_by_name['generative']._serialized_options = b'\030\001' _globals['_METADATARESULT'].fields_by_name['vector']._loaded_options = None @@ -47,27 +51,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi b/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi index 4a28237d9..783ca3564 100644 --- a/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v5261/v1/search_get_pb2.pyi @@ -88,7 +88,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors") + __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile") UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -99,6 +99,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -109,7 +110,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ("non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties") @@ -154,18 +156,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results") + __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile") TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ("shards",) + class SearchProfile(_message.Message): + __slots__ = ("details",) + class DetailsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ("name", "node", "searches") + class SearchesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ("score",) diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.py b/weaviate/proto/v1/v6300/v1/base_search_pb2.py index cbf099302..9a19ea0ec 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.py +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.py @@ -25,7 +25,7 @@ from weaviate.proto.v1.v6300.v1 import base_pb2 as v1_dot_base__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\xd0\x04\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\r\n\x05\x61lpha\x18\x04 \x01(\x02\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operator\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14v1/base_search.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\"2\n\x10WeightsForTarget\x12\x0e\n\x06target\x18\x01 \x01(\t\x12\x0e\n\x06weight\x18\x02 \x01(\x02\"\x98\x01\n\x07Targets\x12\x16\n\x0etarget_vectors\x18\x01 \x03(\t\x12\x33\n\x0b\x63ombination\x18\x02 \x01(\x0e\x32\x1e.weaviate.v1.CombinationMethod\x12:\n\x13weights_for_targets\x18\x04 \x03(\x0b\x32\x1d.weaviate.v1.WeightsForTargetJ\x04\x08\x03\x10\x04\"`\n\x0fVectorForTarget\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x18\n\x0cvector_bytes\x18\x02 \x01(\x0c\x42\x02\x18\x01\x12%\n\x07vectors\x18\x03 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"\xe1\x01\n\x15SearchOperatorOptions\x12=\n\x08operator\x18\x01 \x01(\x0e\x32+.weaviate.v1.SearchOperatorOptions.Operator\x12$\n\x17minimum_or_tokens_match\x18\x02 \x01(\x05H\x00\x88\x01\x01\"G\n\x08Operator\x12\x18\n\x14OPERATOR_UNSPECIFIED\x10\x00\x12\x0f\n\x0bOPERATOR_OR\x10\x01\x12\x10\n\x0cOPERATOR_AND\x10\x02\x42\x1a\n\x18_minimum_or_tokens_match\"\x97\x05\n\x06Hybrid\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12\x12\n\x06vector\x18\x03 \x03(\x02\x42\x02\x18\x01\x12\x11\n\x05\x61lpha\x18\x04 \x01(\x02\x42\x02\x18\x01\x12\x33\n\x0b\x66usion_type\x18\x05 \x01(\x0e\x32\x1e.weaviate.v1.Hybrid.FusionType\x12\x18\n\x0cvector_bytes\x18\x06 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x07 \x03(\tB\x02\x18\x01\x12.\n\tnear_text\x18\x08 \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearch\x12,\n\x0bnear_vector\x18\t \x01(\x0b\x32\x17.weaviate.v1.NearVector\x12%\n\x07targets\x18\n \x01(\x0b\x32\x14.weaviate.v1.Targets\x12\x45\n\x14\x62m25_search_operator\x18\x0b \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x01\x88\x01\x01\x12\x18\n\x0b\x61lpha_param\x18\x0c \x01(\x02H\x02\x88\x01\x01\x12\x17\n\x0fuse_alpha_param\x18\r \x01(\x08\x12\x19\n\x0fvector_distance\x18\x14 \x01(\x02H\x00\x12%\n\x07vectors\x18\x15 \x03(\x0b\x32\x14.weaviate.v1.Vectors\"a\n\nFusionType\x12\x1b\n\x17\x46USION_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12\x46USION_TYPE_RANKED\x10\x01\x12\x1e\n\x1a\x46USION_TYPE_RELATIVE_SCORE\x10\x02\x42\x0b\n\tthresholdB\x17\n\x15_bm25_search_operatorB\x0e\n\x0c_alpha_param\"\xad\x03\n\nNearVector\x12\x12\n\x06vector\x18\x01 \x03(\x02\x42\x02\x18\x01\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x18\n\x0cvector_bytes\x18\x04 \x01(\x0c\x42\x02\x18\x01\x12\x1a\n\x0etarget_vectors\x18\x05 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x06 \x01(\x0b\x32\x14.weaviate.v1.Targets\x12K\n\x11vector_per_target\x18\x07 \x03(\x0b\x32,.weaviate.v1.NearVector.VectorPerTargetEntryB\x02\x18\x01\x12\x38\n\x12vector_for_targets\x18\x08 \x03(\x0b\x32\x1c.weaviate.v1.VectorForTarget\x12%\n\x07vectors\x18\t \x03(\x0b\x32\x14.weaviate.v1.Vectors\x1a\x36\n\x14VectorPerTargetEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x0c:\x02\x38\x01\x42\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa5\x01\n\nNearObject\x12\n\n\x02id\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xf0\x02\n\x0eNearTextSearch\x12\r\n\x05query\x18\x01 \x03(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x36\n\x07move_to\x18\x04 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x02\x88\x01\x01\x12\x38\n\tmove_away\x18\x05 \x01(\x0b\x32 .weaviate.v1.NearTextSearch.MoveH\x03\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x06 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x07 \x01(\x0b\x32\x14.weaviate.v1.Targets\x1a\x36\n\x04Move\x12\r\n\x05\x66orce\x18\x01 \x01(\x02\x12\x10\n\x08\x63oncepts\x18\x02 \x03(\t\x12\r\n\x05uuids\x18\x03 \x03(\tB\x0c\n\n_certaintyB\x0b\n\t_distanceB\n\n\x08_move_toB\x0c\n\n_move_away\"\xad\x01\n\x0fNearImageSearch\x12\r\n\x05image\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearAudioSearch\x12\r\n\x05\x61udio\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearVideoSearch\x12\r\n\x05video\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xad\x01\n\x0fNearDepthSearch\x12\r\n\x05\x64\x65pth\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xb1\x01\n\x11NearThermalSearch\x12\x0f\n\x07thermal\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\xa9\x01\n\rNearIMUSearch\x12\x0b\n\x03imu\x18\x01 \x01(\t\x12\x16\n\tcertainty\x18\x02 \x01(\x01H\x00\x88\x01\x01\x12\x15\n\x08\x64istance\x18\x03 \x01(\x01H\x01\x88\x01\x01\x12\x1a\n\x0etarget_vectors\x18\x04 \x03(\tB\x02\x18\x01\x12%\n\x07targets\x18\x05 \x01(\x0b\x32\x14.weaviate.v1.TargetsB\x0c\n\n_certaintyB\x0b\n\t_distance\"\x7f\n\x04\x42M25\x12\r\n\x05query\x18\x01 \x01(\t\x12\x12\n\nproperties\x18\x02 \x03(\t\x12@\n\x0fsearch_operator\x18\x03 \x01(\x0b\x32\".weaviate.v1.SearchOperatorOptionsH\x00\x88\x01\x01\x42\x12\n\x10_search_operator*\xee\x01\n\x11\x43ombinationMethod\x12\"\n\x1e\x43OMBINATION_METHOD_UNSPECIFIED\x10\x00\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_SUM\x10\x01\x12\x1f\n\x1b\x43OMBINATION_METHOD_TYPE_MIN\x10\x02\x12#\n\x1f\x43OMBINATION_METHOD_TYPE_AVERAGE\x10\x03\x12*\n&COMBINATION_METHOD_TYPE_RELATIVE_SCORE\x10\x04\x12\"\n\x1e\x43OMBINATION_METHOD_TYPE_MANUAL\x10\x05\x42t\n#io.weaviate.client.grpc.protocol.v1B\x17WeaviateProtoBaseSearchZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -37,6 +37,8 @@ _globals['_VECTORFORTARGET'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector']._serialized_options = b'\030\001' + _globals['_HYBRID'].fields_by_name['alpha']._loaded_options = None + _globals['_HYBRID'].fields_by_name['alpha']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['vector_bytes']._loaded_options = None _globals['_HYBRID'].fields_by_name['vector_bytes']._serialized_options = b'\030\001' _globals['_HYBRID'].fields_by_name['target_vectors']._loaded_options = None @@ -67,8 +69,8 @@ _globals['_NEARTHERMALSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._loaded_options = None _globals['_NEARIMUSEARCH'].fields_by_name['target_vectors']._serialized_options = b'\030\001' - _globals['_COMBINATIONMETHOD']._serialized_start=3337 - _globals['_COMBINATIONMETHOD']._serialized_end=3575 + _globals['_COMBINATIONMETHOD']._serialized_start=3408 + _globals['_COMBINATIONMETHOD']._serialized_end=3646 _globals['_WEIGHTSFORTARGET']._serialized_start=52 _globals['_WEIGHTSFORTARGET']._serialized_end=102 _globals['_TARGETS']._serialized_start=105 @@ -80,31 +82,31 @@ _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_start=484 _globals['_SEARCHOPERATOROPTIONS_OPERATOR']._serialized_end=555 _globals['_HYBRID']._serialized_start=586 - _globals['_HYBRID']._serialized_end=1178 - _globals['_HYBRID_FUSIONTYPE']._serialized_start=1043 - _globals['_HYBRID_FUSIONTYPE']._serialized_end=1140 - _globals['_NEARVECTOR']._serialized_start=1181 - _globals['_NEARVECTOR']._serialized_end=1610 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1529 - _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1583 - _globals['_NEAROBJECT']._serialized_start=1613 - _globals['_NEAROBJECT']._serialized_end=1778 - _globals['_NEARTEXTSEARCH']._serialized_start=1781 - _globals['_NEARTEXTSEARCH']._serialized_end=2149 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2042 - _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2096 - _globals['_NEARIMAGESEARCH']._serialized_start=2152 - _globals['_NEARIMAGESEARCH']._serialized_end=2325 - _globals['_NEARAUDIOSEARCH']._serialized_start=2328 - _globals['_NEARAUDIOSEARCH']._serialized_end=2501 - _globals['_NEARVIDEOSEARCH']._serialized_start=2504 - _globals['_NEARVIDEOSEARCH']._serialized_end=2677 - _globals['_NEARDEPTHSEARCH']._serialized_start=2680 - _globals['_NEARDEPTHSEARCH']._serialized_end=2853 - _globals['_NEARTHERMALSEARCH']._serialized_start=2856 - _globals['_NEARTHERMALSEARCH']._serialized_end=3033 - _globals['_NEARIMUSEARCH']._serialized_start=3036 - _globals['_NEARIMUSEARCH']._serialized_end=3205 - _globals['_BM25']._serialized_start=3207 - _globals['_BM25']._serialized_end=3334 + _globals['_HYBRID']._serialized_end=1249 + _globals['_HYBRID_FUSIONTYPE']._serialized_start=1098 + _globals['_HYBRID_FUSIONTYPE']._serialized_end=1195 + _globals['_NEARVECTOR']._serialized_start=1252 + _globals['_NEARVECTOR']._serialized_end=1681 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_start=1600 + _globals['_NEARVECTOR_VECTORPERTARGETENTRY']._serialized_end=1654 + _globals['_NEAROBJECT']._serialized_start=1684 + _globals['_NEAROBJECT']._serialized_end=1849 + _globals['_NEARTEXTSEARCH']._serialized_start=1852 + _globals['_NEARTEXTSEARCH']._serialized_end=2220 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_start=2113 + _globals['_NEARTEXTSEARCH_MOVE']._serialized_end=2167 + _globals['_NEARIMAGESEARCH']._serialized_start=2223 + _globals['_NEARIMAGESEARCH']._serialized_end=2396 + _globals['_NEARAUDIOSEARCH']._serialized_start=2399 + _globals['_NEARAUDIOSEARCH']._serialized_end=2572 + _globals['_NEARVIDEOSEARCH']._serialized_start=2575 + _globals['_NEARVIDEOSEARCH']._serialized_end=2748 + _globals['_NEARDEPTHSEARCH']._serialized_start=2751 + _globals['_NEARDEPTHSEARCH']._serialized_end=2924 + _globals['_NEARTHERMALSEARCH']._serialized_start=2927 + _globals['_NEARTHERMALSEARCH']._serialized_end=3104 + _globals['_NEARIMUSEARCH']._serialized_start=3107 + _globals['_NEARIMUSEARCH']._serialized_end=3276 + _globals['_BM25']._serialized_start=3278 + _globals['_BM25']._serialized_end=3405 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi index 80abcb05d..67a53e25a 100644 --- a/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi +++ b/weaviate/proto/v1/v6300/v1/base_search_pb2.pyi @@ -68,7 +68,7 @@ class SearchOperatorOptions(_message.Message): def __init__(self, operator: _Optional[_Union[SearchOperatorOptions.Operator, str]] = ..., minimum_or_tokens_match: _Optional[int] = ...) -> None: ... class Hybrid(_message.Message): - __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "vector_distance", "vectors") + __slots__ = ("query", "properties", "vector", "alpha", "fusion_type", "vector_bytes", "target_vectors", "near_text", "near_vector", "targets", "bm25_search_operator", "alpha_param", "use_alpha_param", "vector_distance", "vectors") class FusionType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): __slots__ = () FUSION_TYPE_UNSPECIFIED: _ClassVar[Hybrid.FusionType] @@ -88,6 +88,8 @@ class Hybrid(_message.Message): NEAR_VECTOR_FIELD_NUMBER: _ClassVar[int] TARGETS_FIELD_NUMBER: _ClassVar[int] BM25_SEARCH_OPERATOR_FIELD_NUMBER: _ClassVar[int] + ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] + USE_ALPHA_PARAM_FIELD_NUMBER: _ClassVar[int] VECTOR_DISTANCE_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] query: str @@ -101,9 +103,11 @@ class Hybrid(_message.Message): near_vector: NearVector targets: Targets bm25_search_operator: SearchOperatorOptions + alpha_param: float + use_alpha_param: bool vector_distance: float vectors: _containers.RepeatedCompositeFieldContainer[_base_pb2.Vectors] - def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... + def __init__(self, query: _Optional[str] = ..., properties: _Optional[_Iterable[str]] = ..., vector: _Optional[_Iterable[float]] = ..., alpha: _Optional[float] = ..., fusion_type: _Optional[_Union[Hybrid.FusionType, str]] = ..., vector_bytes: _Optional[bytes] = ..., target_vectors: _Optional[_Iterable[str]] = ..., near_text: _Optional[_Union[NearTextSearch, _Mapping]] = ..., near_vector: _Optional[_Union[NearVector, _Mapping]] = ..., targets: _Optional[_Union[Targets, _Mapping]] = ..., bm25_search_operator: _Optional[_Union[SearchOperatorOptions, _Mapping]] = ..., alpha_param: _Optional[float] = ..., use_alpha_param: bool = ..., vector_distance: _Optional[float] = ..., vectors: _Optional[_Iterable[_Union[_base_pb2.Vectors, _Mapping]]] = ...) -> None: ... class NearVector(_message.Message): __slots__ = ("vector", "certainty", "distance", "vector_bytes", "target_vectors", "targets", "vector_per_target", "vector_for_targets", "vectors") diff --git a/weaviate/proto/v1/v6300/v1/search_get_pb2.py b/weaviate/proto/v1/v6300/v1/search_get_pb2.py index 88dfb5992..1b38e4fb6 100644 --- a/weaviate/proto/v1/v6300/v1/search_get_pb2.py +++ b/weaviate/proto/v1/v6300/v1/search_get_pb2.py @@ -28,7 +28,7 @@ from weaviate.proto.v1.v6300.v1 import properties_pb2 as v1_dot_properties__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xdd\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xae\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_results\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13v1/search_get.proto\x12\x0bweaviate.v1\x1a\rv1/base.proto\x1a\x14v1/base_search.proto\x1a\x13v1/generative.proto\x1a\x13v1/properties.proto\"\x9c\x0b\n\rSearchRequest\x12\x12\n\ncollection\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\n \x01(\t\x12=\n\x11\x63onsistency_level\x18\x0b \x01(\x0e\x32\x1d.weaviate.v1.ConsistencyLevelH\x00\x88\x01\x01\x12\x37\n\nproperties\x18\x14 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequestH\x01\x88\x01\x01\x12\x33\n\x08metadata\x18\x15 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequestH\x02\x88\x01\x01\x12+\n\x08group_by\x18\x16 \x01(\x0b\x32\x14.weaviate.v1.GroupByH\x03\x88\x01\x01\x12\r\n\x05limit\x18\x1e \x01(\r\x12\x0e\n\x06offset\x18\x1f \x01(\r\x12\x0f\n\x07\x61utocut\x18 \x01(\r\x12\r\n\x05\x61\x66ter\x18! \x01(\t\x12$\n\x07sort_by\x18\" \x03(\x0b\x32\x13.weaviate.v1.SortBy\x12*\n\x07\x66ilters\x18( \x01(\x0b\x32\x14.weaviate.v1.FiltersH\x04\x88\x01\x01\x12/\n\rhybrid_search\x18) \x01(\x0b\x32\x13.weaviate.v1.HybridH\x05\x88\x01\x01\x12+\n\x0b\x62m25_search\x18* \x01(\x0b\x32\x11.weaviate.v1.BM25H\x06\x88\x01\x01\x12\x31\n\x0bnear_vector\x18+ \x01(\x0b\x32\x17.weaviate.v1.NearVectorH\x07\x88\x01\x01\x12\x31\n\x0bnear_object\x18, \x01(\x0b\x32\x17.weaviate.v1.NearObjectH\x08\x88\x01\x01\x12\x33\n\tnear_text\x18- \x01(\x0b\x32\x1b.weaviate.v1.NearTextSearchH\t\x88\x01\x01\x12\x35\n\nnear_image\x18. \x01(\x0b\x32\x1c.weaviate.v1.NearImageSearchH\n\x88\x01\x01\x12\x35\n\nnear_audio\x18/ \x01(\x0b\x32\x1c.weaviate.v1.NearAudioSearchH\x0b\x88\x01\x01\x12\x35\n\nnear_video\x18\x30 \x01(\x0b\x32\x1c.weaviate.v1.NearVideoSearchH\x0c\x88\x01\x01\x12\x35\n\nnear_depth\x18\x31 \x01(\x0b\x32\x1c.weaviate.v1.NearDepthSearchH\r\x88\x01\x01\x12\x39\n\x0cnear_thermal\x18\x32 \x01(\x0b\x32\x1e.weaviate.v1.NearThermalSearchH\x0e\x88\x01\x01\x12\x31\n\x08near_imu\x18\x33 \x01(\x0b\x32\x1a.weaviate.v1.NearIMUSearchH\x0f\x88\x01\x01\x12\x36\n\ngenerative\x18< \x01(\x0b\x32\x1d.weaviate.v1.GenerativeSearchH\x10\x88\x01\x01\x12(\n\x06rerank\x18= \x01(\x0b\x32\x13.weaviate.v1.RerankH\x11\x88\x01\x01\x12\x18\n\x0cuses_123_api\x18\x64 \x01(\x08\x42\x02\x18\x01\x12\x18\n\x0cuses_125_api\x18\x65 \x01(\x08\x42\x02\x18\x01\x12\x14\n\x0cuses_127_api\x18\x66 \x01(\x08\x42\x14\n\x12_consistency_levelB\r\n\x0b_propertiesB\x0b\n\t_metadataB\x0b\n\t_group_byB\n\n\x08_filtersB\x10\n\x0e_hybrid_searchB\x0e\n\x0c_bm25_searchB\x0e\n\x0c_near_vectorB\x0e\n\x0c_near_objectB\x0c\n\n_near_textB\r\n\x0b_near_imageB\r\n\x0b_near_audioB\r\n\x0b_near_videoB\r\n\x0b_near_depthB\x0f\n\r_near_thermalB\x0b\n\t_near_imuB\r\n\x0b_generativeB\t\n\x07_rerank\"L\n\x07GroupBy\x12\x0c\n\x04path\x18\x01 \x03(\t\x12\x18\n\x10number_of_groups\x18\x02 \x01(\x05\x12\x19\n\x11objects_per_group\x18\x03 \x01(\x05\")\n\x06SortBy\x12\x11\n\tascending\x18\x01 \x01(\x08\x12\x0c\n\x04path\x18\x02 \x03(\t\"\xf4\x01\n\x0fMetadataRequest\x12\x0c\n\x04uuid\x18\x01 \x01(\x08\x12\x0e\n\x06vector\x18\x02 \x01(\x08\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x04 \x01(\x08\x12\x10\n\x08\x64istance\x18\x05 \x01(\x08\x12\x11\n\tcertainty\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x08\x12\x15\n\rexplain_score\x18\x08 \x01(\x08\x12\x15\n\ris_consistent\x18\t \x01(\x08\x12\x0f\n\x07vectors\x18\n \x03(\t\x12\x15\n\rquery_profile\x18\x0b \x01(\x08\"\xd1\x01\n\x11PropertiesRequest\x12\x1a\n\x12non_ref_properties\x18\x01 \x03(\t\x12\x39\n\x0eref_properties\x18\x02 \x03(\x0b\x32!.weaviate.v1.RefPropertiesRequest\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\x12$\n\x1creturn_all_nonref_properties\x18\x0b \x01(\x08\"\x8b\x01\n\x17ObjectPropertiesRequest\x12\x11\n\tprop_name\x18\x01 \x01(\t\x12\x1c\n\x14primitive_properties\x18\x02 \x03(\t\x12?\n\x11object_properties\x18\x03 \x03(\x0b\x32$.weaviate.v1.ObjectPropertiesRequest\"\xb1\x01\n\x14RefPropertiesRequest\x12\x1a\n\x12reference_property\x18\x01 \x01(\t\x12\x32\n\nproperties\x18\x02 \x01(\x0b\x32\x1e.weaviate.v1.PropertiesRequest\x12.\n\x08metadata\x18\x03 \x01(\x0b\x32\x1c.weaviate.v1.MetadataRequest\x12\x19\n\x11target_collection\x18\x04 \x01(\t\"8\n\x06Rerank\x12\x10\n\x08property\x18\x01 \x01(\t\x12\x12\n\x05query\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\x08\n\x06_query\"\xf7\x02\n\x0bSearchReply\x12\x0c\n\x04took\x18\x01 \x01(\x02\x12*\n\x07results\x18\x02 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12*\n\x19generative_grouped_result\x18\x03 \x01(\tB\x02\x18\x01H\x00\x88\x01\x01\x12\x34\n\x10group_by_results\x18\x04 \x03(\x0b\x32\x1a.weaviate.v1.GroupByResult\x12\x46\n\x1agenerative_grouped_results\x18\x05 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x01\x88\x01\x01\x12\x35\n\rquery_profile\x18\x06 \x01(\x0b\x32\x19.weaviate.v1.QueryProfileH\x02\x88\x01\x01\x42\x1c\n\x1a_generative_grouped_resultB\x1d\n\x1b_generative_grouped_resultsB\x10\n\x0e_query_profile\"\x9e\x03\n\x0cQueryProfile\x12\x36\n\x06shards\x18\x01 \x03(\x0b\x32&.weaviate.v1.QueryProfile.ShardProfile\x1a\x86\x01\n\rSearchProfile\x12\x45\n\x07\x64\x65tails\x18\x01 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.SearchProfile.DetailsEntry\x1a.\n\x0c\x44\x65tailsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\xcc\x01\n\x0cShardProfile\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04node\x18\x02 \x01(\t\x12\x46\n\x08searches\x18\x03 \x03(\x0b\x32\x34.weaviate.v1.QueryProfile.ShardProfile.SearchesEntry\x1aX\n\rSearchesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x36\n\x05value\x18\x02 \x01(\x0b\x32\'.weaviate.v1.QueryProfile.SearchProfile:\x02\x38\x01\"\x1c\n\x0bRerankReply\x12\r\n\x05score\x18\x01 \x01(\x01\"\xe9\x02\n\rGroupByResult\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cmin_distance\x18\x02 \x01(\x02\x12\x14\n\x0cmax_distance\x18\x03 \x01(\x02\x12\x19\n\x11number_of_objects\x18\x04 \x01(\x03\x12*\n\x07objects\x18\x05 \x03(\x0b\x32\x19.weaviate.v1.SearchResult\x12-\n\x06rerank\x18\x06 \x01(\x0b\x32\x18.weaviate.v1.RerankReplyH\x00\x88\x01\x01\x12\x39\n\ngenerative\x18\x07 \x01(\x0b\x32\x1c.weaviate.v1.GenerativeReplyB\x02\x18\x01H\x01\x88\x01\x01\x12=\n\x11generative_result\x18\x08 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x02\x88\x01\x01\x42\t\n\x07_rerankB\r\n\x0b_generativeB\x14\n\x12_generative_result\"\xb7\x01\n\x0cSearchResult\x12\x31\n\nproperties\x18\x01 \x01(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12-\n\x08metadata\x18\x02 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12\x36\n\ngenerative\x18\x03 \x01(\x0b\x32\x1d.weaviate.v1.GenerativeResultH\x00\x88\x01\x01\x42\r\n\x0b_generative\"\xf7\x04\n\x0eMetadataResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x06vector\x18\x02 \x03(\x02\x42\x02\x18\x01\x12\x1a\n\x12\x63reation_time_unix\x18\x03 \x01(\x03\x12\"\n\x1a\x63reation_time_unix_present\x18\x04 \x01(\x08\x12\x1d\n\x15last_update_time_unix\x18\x05 \x01(\x03\x12%\n\x1dlast_update_time_unix_present\x18\x06 \x01(\x08\x12\x10\n\x08\x64istance\x18\x07 \x01(\x02\x12\x18\n\x10\x64istance_present\x18\x08 \x01(\x08\x12\x11\n\tcertainty\x18\t \x01(\x02\x12\x19\n\x11\x63\x65rtainty_present\x18\n \x01(\x08\x12\r\n\x05score\x18\x0b \x01(\x02\x12\x15\n\rscore_present\x18\x0c \x01(\x08\x12\x15\n\rexplain_score\x18\r \x01(\t\x12\x1d\n\x15\x65xplain_score_present\x18\x0e \x01(\x08\x12\x1a\n\ris_consistent\x18\x0f \x01(\x08H\x00\x88\x01\x01\x12\x16\n\ngenerative\x18\x10 \x01(\tB\x02\x18\x01\x12\x1e\n\x12generative_present\x18\x11 \x01(\x08\x42\x02\x18\x01\x12\x1d\n\x15is_consistent_present\x18\x12 \x01(\x08\x12\x14\n\x0cvector_bytes\x18\x13 \x01(\x0c\x12\x13\n\x0bid_as_bytes\x18\x14 \x01(\x0c\x12\x14\n\x0crerank_score\x18\x15 \x01(\x01\x12\x1c\n\x14rerank_score_present\x18\x16 \x01(\x08\x12%\n\x07vectors\x18\x17 \x03(\x0b\x32\x14.weaviate.v1.VectorsB\x10\n\x0e_is_consistent\"\x88\x02\n\x10PropertiesResult\x12\x33\n\tref_props\x18\x02 \x03(\x0b\x32 .weaviate.v1.RefPropertiesResult\x12\x19\n\x11target_collection\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x1b.weaviate.v1.MetadataResult\x12.\n\rnon_ref_props\x18\x0b \x01(\x0b\x32\x17.weaviate.v1.Properties\x12\x1b\n\x13ref_props_requested\x18\x0c \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x08\x10\tJ\x04\x08\t\x10\nJ\x04\x08\n\x10\x0b\"[\n\x13RefPropertiesResult\x12\x31\n\nproperties\x18\x01 \x03(\x0b\x32\x1d.weaviate.v1.PropertiesResult\x12\x11\n\tprop_name\x18\x02 \x01(\tBs\n#io.weaviate.client.grpc.protocol.v1B\x16WeaviateProtoSearchGetZ4github.com/weaviate/weaviate/grpc/generated;protocolb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -42,6 +42,10 @@ _globals['_SEARCHREQUEST'].fields_by_name['uses_125_api']._serialized_options = b'\030\001' _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._loaded_options = None _globals['_SEARCHREPLY'].fields_by_name['generative_grouped_result']._serialized_options = b'\030\001' + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_options = b'8\001' + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._loaded_options = None + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_options = b'8\001' _globals['_GROUPBYRESULT'].fields_by_name['generative']._loaded_options = None _globals['_GROUPBYRESULT'].fields_by_name['generative']._serialized_options = b'\030\001' _globals['_METADATARESULT'].fields_by_name['vector']._loaded_options = None @@ -57,27 +61,37 @@ _globals['_SORTBY']._serialized_start=1632 _globals['_SORTBY']._serialized_end=1673 _globals['_METADATAREQUEST']._serialized_start=1676 - _globals['_METADATAREQUEST']._serialized_end=1897 - _globals['_PROPERTIESREQUEST']._serialized_start=1900 - _globals['_PROPERTIESREQUEST']._serialized_end=2109 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2112 - _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2251 - _globals['_REFPROPERTIESREQUEST']._serialized_start=2254 - _globals['_REFPROPERTIESREQUEST']._serialized_end=2431 - _globals['_RERANK']._serialized_start=2433 - _globals['_RERANK']._serialized_end=2489 - _globals['_SEARCHREPLY']._serialized_start=2492 - _globals['_SEARCHREPLY']._serialized_end=2794 - _globals['_RERANKREPLY']._serialized_start=2796 - _globals['_RERANKREPLY']._serialized_end=2824 - _globals['_GROUPBYRESULT']._serialized_start=2827 - _globals['_GROUPBYRESULT']._serialized_end=3188 - _globals['_SEARCHRESULT']._serialized_start=3191 - _globals['_SEARCHRESULT']._serialized_end=3374 - _globals['_METADATARESULT']._serialized_start=3377 - _globals['_METADATARESULT']._serialized_end=4008 - _globals['_PROPERTIESRESULT']._serialized_start=4011 - _globals['_PROPERTIESRESULT']._serialized_end=4275 - _globals['_REFPROPERTIESRESULT']._serialized_start=4277 - _globals['_REFPROPERTIESRESULT']._serialized_end=4368 + _globals['_METADATAREQUEST']._serialized_end=1920 + _globals['_PROPERTIESREQUEST']._serialized_start=1923 + _globals['_PROPERTIESREQUEST']._serialized_end=2132 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_start=2135 + _globals['_OBJECTPROPERTIESREQUEST']._serialized_end=2274 + _globals['_REFPROPERTIESREQUEST']._serialized_start=2277 + _globals['_REFPROPERTIESREQUEST']._serialized_end=2454 + _globals['_RERANK']._serialized_start=2456 + _globals['_RERANK']._serialized_end=2512 + _globals['_SEARCHREPLY']._serialized_start=2515 + _globals['_SEARCHREPLY']._serialized_end=2890 + _globals['_QUERYPROFILE']._serialized_start=2893 + _globals['_QUERYPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_start=2966 + _globals['_QUERYPROFILE_SEARCHPROFILE']._serialized_end=3100 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_start=3054 + _globals['_QUERYPROFILE_SEARCHPROFILE_DETAILSENTRY']._serialized_end=3100 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_start=3103 + _globals['_QUERYPROFILE_SHARDPROFILE']._serialized_end=3307 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_start=3219 + _globals['_QUERYPROFILE_SHARDPROFILE_SEARCHESENTRY']._serialized_end=3307 + _globals['_RERANKREPLY']._serialized_start=3309 + _globals['_RERANKREPLY']._serialized_end=3337 + _globals['_GROUPBYRESULT']._serialized_start=3340 + _globals['_GROUPBYRESULT']._serialized_end=3701 + _globals['_SEARCHRESULT']._serialized_start=3704 + _globals['_SEARCHRESULT']._serialized_end=3887 + _globals['_METADATARESULT']._serialized_start=3890 + _globals['_METADATARESULT']._serialized_end=4521 + _globals['_PROPERTIESRESULT']._serialized_start=4524 + _globals['_PROPERTIESRESULT']._serialized_end=4788 + _globals['_REFPROPERTIESRESULT']._serialized_start=4790 + _globals['_REFPROPERTIESRESULT']._serialized_end=4881 # @@protoc_insertion_point(module_scope) diff --git a/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi b/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi index 8dd3cb881..f631fb396 100644 --- a/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi +++ b/weaviate/proto/v1/v6300/v1/search_get_pb2.pyi @@ -89,7 +89,7 @@ class SortBy(_message.Message): def __init__(self, ascending: bool = ..., path: _Optional[_Iterable[str]] = ...) -> None: ... class MetadataRequest(_message.Message): - __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors") + __slots__ = ("uuid", "vector", "creation_time_unix", "last_update_time_unix", "distance", "certainty", "score", "explain_score", "is_consistent", "vectors", "query_profile") UUID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] CREATION_TIME_UNIX_FIELD_NUMBER: _ClassVar[int] @@ -100,6 +100,7 @@ class MetadataRequest(_message.Message): EXPLAIN_SCORE_FIELD_NUMBER: _ClassVar[int] IS_CONSISTENT_FIELD_NUMBER: _ClassVar[int] VECTORS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] uuid: bool vector: bool creation_time_unix: bool @@ -110,7 +111,8 @@ class MetadataRequest(_message.Message): explain_score: bool is_consistent: bool vectors: _containers.RepeatedScalarFieldContainer[str] - def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ...) -> None: ... + query_profile: bool + def __init__(self, uuid: bool = ..., vector: bool = ..., creation_time_unix: bool = ..., last_update_time_unix: bool = ..., distance: bool = ..., certainty: bool = ..., score: bool = ..., explain_score: bool = ..., is_consistent: bool = ..., vectors: _Optional[_Iterable[str]] = ..., query_profile: bool = ...) -> None: ... class PropertiesRequest(_message.Message): __slots__ = ("non_ref_properties", "ref_properties", "object_properties", "return_all_nonref_properties") @@ -155,18 +157,54 @@ class Rerank(_message.Message): def __init__(self, property: _Optional[str] = ..., query: _Optional[str] = ...) -> None: ... class SearchReply(_message.Message): - __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results") + __slots__ = ("took", "results", "generative_grouped_result", "group_by_results", "generative_grouped_results", "query_profile") TOOK_FIELD_NUMBER: _ClassVar[int] RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULT_FIELD_NUMBER: _ClassVar[int] GROUP_BY_RESULTS_FIELD_NUMBER: _ClassVar[int] GENERATIVE_GROUPED_RESULTS_FIELD_NUMBER: _ClassVar[int] + QUERY_PROFILE_FIELD_NUMBER: _ClassVar[int] took: float results: _containers.RepeatedCompositeFieldContainer[SearchResult] generative_grouped_result: str group_by_results: _containers.RepeatedCompositeFieldContainer[GroupByResult] generative_grouped_results: _generative_pb2.GenerativeResult - def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ...) -> None: ... + query_profile: QueryProfile + def __init__(self, took: _Optional[float] = ..., results: _Optional[_Iterable[_Union[SearchResult, _Mapping]]] = ..., generative_grouped_result: _Optional[str] = ..., group_by_results: _Optional[_Iterable[_Union[GroupByResult, _Mapping]]] = ..., generative_grouped_results: _Optional[_Union[_generative_pb2.GenerativeResult, _Mapping]] = ..., query_profile: _Optional[_Union[QueryProfile, _Mapping]] = ...) -> None: ... + +class QueryProfile(_message.Message): + __slots__ = ("shards",) + class SearchProfile(_message.Message): + __slots__ = ("details",) + class DetailsEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: str + def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ... + DETAILS_FIELD_NUMBER: _ClassVar[int] + details: _containers.ScalarMap[str, str] + def __init__(self, details: _Optional[_Mapping[str, str]] = ...) -> None: ... + class ShardProfile(_message.Message): + __slots__ = ("name", "node", "searches") + class SearchesEntry(_message.Message): + __slots__ = ("key", "value") + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: QueryProfile.SearchProfile + def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[QueryProfile.SearchProfile, _Mapping]] = ...) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + NODE_FIELD_NUMBER: _ClassVar[int] + SEARCHES_FIELD_NUMBER: _ClassVar[int] + name: str + node: str + searches: _containers.MessageMap[str, QueryProfile.SearchProfile] + def __init__(self, name: _Optional[str] = ..., node: _Optional[str] = ..., searches: _Optional[_Mapping[str, QueryProfile.SearchProfile]] = ...) -> None: ... + SHARDS_FIELD_NUMBER: _ClassVar[int] + shards: _containers.RepeatedCompositeFieldContainer[QueryProfile.ShardProfile] + def __init__(self, shards: _Optional[_Iterable[_Union[QueryProfile.ShardProfile, _Mapping]]] = ...) -> None: ... class RerankReply(_message.Message): __slots__ = ("score",) diff --git a/weaviate/rbac/models.py b/weaviate/rbac/models.py index df5a230a5..8e0989542 100644 --- a/weaviate/rbac/models.py +++ b/weaviate/rbac/models.py @@ -252,6 +252,16 @@ def values() -> List[str]: return [action.value for action in BackupsAction] +class MCPAction(str, _Action, Enum): + CREATE = "create_mcp" + READ = "read_mcp" + UPDATE = "update_mcp" + + @staticmethod + def values() -> List[str]: + return [action.value for action in MCPAction] + + class ReplicateAction(str, _Action, Enum): CREATE = "create_replicate" READ = "read_replicate" @@ -407,6 +417,16 @@ def _to_weaviate(self) -> List[WeaviatePermission]: ] +class _MCPPermission(_Permission[MCPAction]): + def _to_weaviate(self) -> List[WeaviatePermission]: + return [ + { + "action": action, + } + for action in self.actions + ] + + class _ClusterPermission(_Permission[ClusterAction]): def _to_weaviate(self) -> List[WeaviatePermission]: return [ @@ -470,6 +490,10 @@ class BackupsPermissionOutput(_BackupsPermission): pass +class MCPPermissionOutput(_MCPPermission): + pass + + class NodesPermissionOutput(_NodesPermission): pass @@ -486,6 +510,7 @@ class TenantsPermissionOutput(_TenantsPermission): RolesPermissionOutput, UsersPermissionOutput, BackupsPermissionOutput, + MCPPermissionOutput, NodesPermissionOutput, TenantsPermissionOutput, ReplicatePermissionOutput, @@ -507,6 +532,7 @@ class Role(RoleBase): roles_permissions: List[RolesPermissionOutput] users_permissions: List[UsersPermissionOutput] backups_permissions: List[BackupsPermissionOutput] + mcp_permissions: List[MCPPermissionOutput] nodes_permissions: List[NodesPermissionOutput] tenants_permissions: List[TenantsPermissionOutput] replicate_permissions: List[ReplicatePermissionOutput] @@ -522,6 +548,7 @@ def permissions(self) -> List[PermissionsOutputType]: permissions.extend(self.roles_permissions) permissions.extend(self.users_permissions) permissions.extend(self.backups_permissions) + permissions.extend(self.mcp_permissions) permissions.extend(self.nodes_permissions) permissions.extend(self.tenants_permissions) permissions.extend(self.replicate_permissions) @@ -537,6 +564,7 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": roles_permissions: List[RolesPermissionOutput] = [] data_permissions: List[DataPermissionOutput] = [] backups_permissions: List[BackupsPermissionOutput] = [] + mcp_permissions: List[MCPPermissionOutput] = [] nodes_permissions: List[NodesPermissionOutput] = [] tenants_permissions: List[TenantsPermissionOutput] = [] replicate_permissions: List[ReplicatePermissionOutput] = [] @@ -605,6 +633,10 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": actions={BackupsAction(permission["action"])}, ) ) + elif permission["action"] in MCPAction.values(): + mcp_permissions.append( + MCPPermissionOutput(actions={MCPAction(permission["action"])}) + ) elif permission["action"] in NodesAction.values(): nodes = permission.get("nodes") if nodes is not None: @@ -658,6 +690,7 @@ def _from_weaviate_role(cls, role: WeaviateRole) -> "Role": groups_permissions=_join_permissions(groups_permissions), data_permissions=_join_permissions(data_permissions), backups_permissions=_join_permissions(backups_permissions), + mcp_permissions=_join_permissions(mcp_permissions), nodes_permissions=_join_permissions(nodes_permissions), tenants_permissions=_join_permissions(tenants_permissions), replicate_permissions=_join_permissions(replicate_permissions), @@ -710,6 +743,7 @@ class Actions: Cluster = ClusterAction Nodes = NodesAction Backups = BackupsAction + MCP = MCPAction Tenants = TenantsAction Users = UsersAction Replicate = ReplicateAction @@ -1020,6 +1054,21 @@ def backup( permissions.append(permission) return permissions + @staticmethod + def mcp( + *, create: bool = False, read: bool = False, update: bool = False + ) -> PermissionsCreateType: + actions: Set[MCPAction] = set() + if create: + actions.add(MCPAction.CREATE) + if read: + actions.add(MCPAction.READ) + if update: + actions.add(MCPAction.UPDATE) + if len(actions) > 0: + return [_MCPPermission(actions=actions)] + return [] + @staticmethod def cluster(*, read: bool = False) -> PermissionsCreateType: if read: diff --git a/weaviate/tokenization/__init__.py b/weaviate/tokenization/__init__.py new file mode 100644 index 000000000..ac0c34cbf --- /dev/null +++ b/weaviate/tokenization/__init__.py @@ -0,0 +1,7 @@ +"""Module for tokenization operations.""" + +from .async_ import _TokenizationAsync +from .models import TokenizeResult +from .sync import _Tokenization + +__all__ = ["_Tokenization", "_TokenizationAsync", "TokenizeResult"] diff --git a/weaviate/tokenization/async_.py b/weaviate/tokenization/async_.py new file mode 100644 index 000000000..5406a39dd --- /dev/null +++ b/weaviate/tokenization/async_.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionAsync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("async") +class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): + pass diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi new file mode 100644 index 000000000..ba12abc2a --- /dev/null +++ b/weaviate/tokenization/async_.pyi @@ -0,0 +1,35 @@ +from typing import Dict, List, Optional, Union, overload + +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsCreate, + TextAnalyzerConfigCreate, + Tokenization, +) +from weaviate.connect.v4 import ConnectionAsync +from weaviate.tokenization.models import TokenizeResult + +from .executor import _TokenizationExecutor + +class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]): + @overload + async def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., + ) -> TokenizeResult: ... + @overload + async def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., + ) -> TokenizeResult: ... + async def for_property( + self, collection: str, property_name: str, text: str + ) -> TokenizeResult: ... diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py new file mode 100644 index 000000000..33f1c05f9 --- /dev/null +++ b/weaviate/tokenization/executor.py @@ -0,0 +1,231 @@ +"""Tokenize executor.""" + +from typing import Any, Dict, Generic, List, Optional, Union, overload + +from httpx import Response + +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsCreate, + TextAnalyzerConfigCreate, + Tokenization, +) +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes +from weaviate.exceptions import WeaviateUnsupportedFeatureError +from weaviate.tokenization.models import TokenizeResult +from weaviate.util import _capitalize_first_letter + + +class _TokenizationExecutor(Generic[ConnectionType]): + def __init__(self, connection: ConnectionType): + self._connection = connection + + def __check_version(self) -> None: + if self._connection._weaviate_version.is_lower_than(1, 37, 0): + raise WeaviateUnsupportedFeatureError( + "Tokenization", + str(self._connection._weaviate_version), + "1.37.0", + ) + + # Overloads make ``stopwords`` and ``stopword_presets`` mutually exclusive + # at type-check time. Passing both is additionally rejected at runtime with + # ``ValueError`` in the implementation below. ``stopwords`` accepts either a + # ``StopwordsCreate`` (the write-side shape) or a ``StopwordsConfig`` (the + # read-side shape returned by ``collection.config.get()``), so values round- + # tripped through config reads can be passed back in directly. + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., + ) -> executor.Result[TokenizeResult]: ... + + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., + ) -> executor.Result[TokenizeResult]: ... + + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = None, + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = None, + stopword_presets: Optional[Dict[str, List[str]]] = None, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using the generic /v1/tokenize endpoint. + + For ``word`` tokenization the server defaults to the built-in ``en`` + stopword preset when no stopword configuration is supplied. Pass + ``analyzer_config=TextAnalyzerConfigCreate(stopword_preset="none")`` + or equivalent to opt out. + + Call patterns for stopword handling (``stopwords`` and + ``stopword_presets`` are mutually exclusive — pass at most one): + + 1. **No stopword config** — rely on the server default (``en`` for + word tokenization, none otherwise):: + + client.tokenization.text(text=..., tokenization=Tokenization.WORD) + + 2. **Apply a one-off stopwords block** via ``stopwords`` — the block + filters the query tokens directly, same shape as a collection's + ``invertedIndexConfig.stopwords``:: + + client.tokenization.text( + text=..., + tokenization=Tokenization.WORD, + stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=["foo"]), + ) + + 3. **Register a named-preset catalog** via ``stopword_presets`` and + reference one by name from ``analyzer_config.stopword_preset``. + The catalog can also override built-in presets such as ``en``:: + + client.tokenization.text( + text=..., + tokenization=Tokenization.WORD, + analyzer_config=TextAnalyzerConfigCreate(stopword_preset="custom"), + stopword_presets={"custom": ["foo", "bar"]}, + ) + + Args: + text: The text to tokenize. + tokenization: The tokenization method to use (e.g. ``Tokenization.WORD``). + analyzer_config: Text analyzer settings (ASCII folding, stopword + preset name), built via ``Configure.text_analyzer(...)``. + ``stopword_preset`` may reference a built-in preset + (``en`` / ``none``) or a name defined in ``stopword_presets``. + stopwords: One-off stopwords block applied directly to this request. + Mirrors the collection-level ``invertedIndexConfig.stopwords`` + shape — hence the rich model with preset / additions / removals. + Mutually exclusive with ``stopword_presets``. + stopword_presets: Named-preset catalog (name → word list). Mirrors + the property-level preset catalog — a plain mapping, since a + property only references a preset by name (via + ``analyzer_config.stopword_preset``) rather than carrying the + full stopwords block. Entries can override built-ins like + ``en``. Mutually exclusive with ``stopwords``. + + Returns: + A ``TokenizeResult`` with indexed and query token lists. The generic + endpoint does not echo request fields back in the response. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + ValueError: If both ``stopwords`` and ``stopword_presets`` are passed, + or if any ``stopword_presets`` value is not a list/tuple of strings. + """ + self.__check_version() + + if stopwords is not None and stopword_presets is not None: + raise ValueError("stopwords and stopword_presets are mutually exclusive; pass only one") + + payload: Dict[str, Any] = { + "text": text, + "tokenization": tokenization.value, + } + + if analyzer_config is not None: + ac_dict = analyzer_config._to_dict() + if ac_dict: + payload["analyzerConfig"] = ac_dict + + if stopwords is not None: + if isinstance(stopwords, StopwordsConfig): + # Widen from the read-side shape returned by config.get() to the + # write-side shape the server expects. Field parity between the + # two classes is enforced at import time in + # ``weaviate/collections/classes/config.py``, so iterating + # ``StopwordsCreate.model_fields`` copies every field. + stopwords = StopwordsCreate( + **{name: getattr(stopwords, name) for name in StopwordsCreate.model_fields} + ) + sw_dict = stopwords._to_dict() + if sw_dict: + payload["stopwords"] = sw_dict + + if stopword_presets is not None: + # Plain word-list shape matching a collection's + # invertedIndexConfig.stopwordPresets. Reject str (would + # silently split into characters) and pydantic models / + # other non-sequence shapes up-front so callers get a clear + # error instead of a malformed payload. + validated: Dict[str, List[str]] = {} + for name, words in stopword_presets.items(): + if isinstance(words, (str, bytes)): + raise ValueError( + f"stopword_presets[{name!r}] must be a list of strings, " + f"got {type(words).__name__}" + ) + if not isinstance(words, (list, tuple)): + raise ValueError( + f"stopword_presets[{name!r}] must be a list of strings, " + f"got {type(words).__name__}" + ) + if not all(isinstance(w, str) for w in words): + raise ValueError(f"stopword_presets[{name!r}] must contain only strings") + validated[name] = list(words) + payload["stopwordPresets"] = validated + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path="/tokenize", + weaviate_object=payload, + error_msg="Tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"), + ) + + def for_property( + self, + collection: str, + property_name: str, + text: str, + ) -> executor.Result[TokenizeResult]: + """Tokenize text using a property's configured tokenization settings. + + The server resolves the tokenization and analyzer configuration from + the property's schema, so callers only supply the text. + + Args: + collection: The collection that owns the property. + property_name: The property name whose tokenization config to use. + text: The text to tokenize. + + Returns: + A TokenizeResult with indexed and query token lists. + + Raises: + WeaviateUnsupportedFeatureError: If the server version is below 1.37.0. + """ + self.__check_version() + + path = f"/schema/{_capitalize_first_letter(collection)}/properties/{property_name}/tokenize" + payload: Dict[str, Any] = {"text": text} + + def resp(response: Response) -> TokenizeResult: + return TokenizeResult.model_validate(response.json()) + + return executor.execute( + response_callback=resp, + method=self._connection.post, + path=path, + weaviate_object=payload, + error_msg="Property tokenization failed", + status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"), + ) diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py new file mode 100644 index 000000000..baeac140c --- /dev/null +++ b/weaviate/tokenization/models.py @@ -0,0 +1,17 @@ +"""Return types for tokenization operations.""" + +from typing import List + +from pydantic import BaseModel + + +class TokenizeResult(BaseModel): + """Result of a tokenization operation. + + Attributes: + indexed: Tokens as they would be stored in the inverted index. + query: Tokens as they would be used for querying (after stopword removal). + """ + + indexed: List[str] + query: List[str] diff --git a/weaviate/tokenization/sync.py b/weaviate/tokenization/sync.py new file mode 100644 index 000000000..ab28cc98e --- /dev/null +++ b/weaviate/tokenization/sync.py @@ -0,0 +1,8 @@ +from weaviate.connect import executor +from weaviate.connect.v4 import ConnectionSync +from weaviate.tokenization.executor import _TokenizationExecutor + + +@executor.wrap("sync") +class _Tokenization(_TokenizationExecutor[ConnectionSync]): + pass diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi new file mode 100644 index 000000000..71aaaea5c --- /dev/null +++ b/weaviate/tokenization/sync.pyi @@ -0,0 +1,33 @@ +from typing import Dict, List, Optional, Union, overload + +from weaviate.collections.classes.config import ( + StopwordsConfig, + StopwordsCreate, + TextAnalyzerConfigCreate, + Tokenization, +) +from weaviate.connect.v4 import ConnectionSync +from weaviate.tokenization.models import TokenizeResult + +from .executor import _TokenizationExecutor + +class _Tokenization(_TokenizationExecutor[ConnectionSync]): + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ..., + ) -> TokenizeResult: ... + @overload + def text( + self, + text: str, + tokenization: Tokenization, + *, + analyzer_config: Optional[TextAnalyzerConfigCreate] = ..., + stopword_presets: Optional[Dict[str, List[str]]] = ..., + ) -> TokenizeResult: ... + def for_property(self, collection: str, property_name: str, text: str) -> TokenizeResult: ...