weaviate · dirkkul · Apr 22, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
@@ -26,9 +26,9 @@ env:
   WEAVIATE_132: 1.32.27
   WEAVIATE_133: 1.33.18
   WEAVIATE_134: 1.34.19
-  WEAVIATE_135: 1.35.17
-  WEAVIATE_136: 1.36.10
-  WEAVIATE_137: 1.37.1
+  WEAVIATE_135: 1.35.18
+  WEAVIATE_136: 1.36.12
+  WEAVIATE_137: 1.37.1-4e61e26.amd64
 
 jobs:
   lint-and-format:

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
diff --git a/weaviate/classes/__init__.py b/weaviate/classes/__init__.py
@@ -13,6 +13,7 @@
     rbac,
     replication,
     tenants,
+    tokenization,
 )  # noqa: F401
 from .config import ConsistencyLevel
 
@@ -29,6 +30,7 @@
     "init",
     "query",
     "tenants",
+    "tokenization",
     "rbac",
     "replication",
 ]
diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py
@@ -11,8 +11,10 @@
     ReferenceProperty,
     ReplicationDeletionStrategy,
     Rerankers,
+    StopwordsCreate,
     StopwordsPreset,
     TextAnalyzerConfig,
+    TextAnalyzerConfigCreate,
     Tokenization,
     VectorDistances,
 )
@@ -39,8 +41,10 @@
     "PQEncoderType",
     "ReferenceProperty",
     "Rerankers",
+    "StopwordsCreate",
     "StopwordsPreset",
     "TextAnalyzerConfig",
+    "TextAnalyzerConfigCreate",
     "Tokenization",
     "Vectorizers",
     "VectorDistances",

diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py
@@ -0,0 +1,17 @@
+from weaviate.collections.classes.config import (
+    StopwordsConfig,
+    StopwordsCreate,
+    StopwordsPreset,
+    TextAnalyzerConfigCreate,
+    Tokenization,
+)
+from weaviate.tokenization.models import TokenizeResult
+
+__all__ = [
+    "StopwordsConfig",
+    "StopwordsCreate",
+    "StopwordsPreset",
+    "TextAnalyzerConfigCreate",
+    "Tokenization",
+    "TokenizeResult",
+]
diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py
@@ -1,5 +1,6 @@
 import datetime
 from dataclasses import dataclass
+from dataclasses import fields as _dataclass_fields
 from typing import (
     Any,
     ClassVar,
@@ -1647,6 +1648,26 @@ class _StopwordsConfig(_ConfigBase):
 
 
 StopwordsConfig = _StopwordsConfig
+StopwordsCreate = _StopwordsCreate
+
+# Invariant: the read-side dataclass (_StopwordsConfig) and the write-side
+# pydantic model (_StopwordsCreate) must carry the same set of field names so
+# that values round-tripped from ``collection.config.get()`` can flow back into
+# ``tokenization.text()`` without silent data loss. If a field is added to one
+# but not the other, importing this module fails loudly; the read→write
+# conversion in ``weaviate/tokenization/executor.py::_TokenizationExecutor.text``
+# depends on this parity.
+_read_fields = {f.name for f in _dataclass_fields(_StopwordsConfig)}
+_write_fields = set(_StopwordsCreate.model_fields.keys())
+if _read_fields != _write_fields:
+    raise RuntimeError(
+        "_StopwordsConfig / _StopwordsCreate field drift detected — "
+        f"read-only={_read_fields - _write_fields}, "
+        f"write-only={_write_fields - _read_fields}. "
+        "Update both classes together, or adapt the read→write conversion in "
+        "weaviate/tokenization/executor.py::_TokenizationExecutor.text."
+    )
+del _read_fields, _write_fields
 
 
 @dataclass
@@ -2224,6 +2245,9 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate":
         return self
 
 
+TextAnalyzerConfigCreate = _TextAnalyzerConfigCreate
+
+
 class Property(_ConfigCreateModel):
     """This class defines the structure of a data property that a collection can have within Weaviate.
 

diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
 from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
 from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
 from weaviate.connect.v4 import ConnectionAsync
-from weaviate.tokenization.models import TokenizeResult
 
 from .executor import _ConfigCollectionExecutor
 
@@ -91,4 +90,3 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]):
         self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
     ) -> None: ...
     async def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
-    async def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...
diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py
@@ -56,7 +56,6 @@
     WeaviateInvalidInputError,
     WeaviateUnsupportedFeatureError,
 )
-from weaviate.tokenization.models import TokenizeResult
 from weaviate.util import (
     _capitalize_first_letter,
     _decode_json_response_dict,
@@ -667,42 +666,3 @@ def resp(res: Response) -> bool:
             error_msg="Property may not exist",
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"),
         )
-
-    def tokenize_property(
-        self,
-        property_name: str,
-        text: str,
-    ) -> executor.Result[TokenizeResult]:
-        """Tokenize text using a property's configured tokenization settings.
-
-        Args:
-            property_name: The property name whose tokenization config to use.
-            text: The text to tokenize.
-
-        Returns:
-            A TokenizeResult with indexed and query token lists.
-
-        Raises:
-            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
-        """
-        if self._connection._weaviate_version.is_lower_than(1, 37, 0):
-            raise WeaviateUnsupportedFeatureError(
-                "Tokenization",
-                str(self._connection._weaviate_version),
-                "1.37.0",
-            )
-
-        path = f"/schema/{self._name}/properties/{property_name}/tokenize"
-        payload: Dict[str, Any] = {"text": text}
-
-        def resp(response: Response) -> TokenizeResult:
-            return TokenizeResult.model_validate(response.json())
-
-        return executor.execute(
-            response_callback=resp,
-            method=self._connection.post,
-            path=path,
-            weaviate_object=payload,
-            error_msg="Property tokenization failed",
-            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
-        )
diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
 from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
 from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
 from weaviate.connect.v4 import ConnectionSync
-from weaviate.tokenization.models import TokenizeResult
 
 from .executor import _ConfigCollectionExecutor
 
@@ -89,4 +88,3 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]):
         self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
     ) -> None: ...
     def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
-    def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...
diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi
@@ -1,21 +1,35 @@
-from typing import Dict, Optional
+from typing import Dict, List, Optional, Union, overload
 
 from weaviate.collections.classes.config import (
+    StopwordsConfig,
+    StopwordsCreate,
+    TextAnalyzerConfigCreate,
     Tokenization,
-    _StopwordsCreate,
-    _TextAnalyzerConfigCreate,
 )
 from weaviate.connect.v4 import ConnectionAsync
 from weaviate.tokenization.models import TokenizeResult
 
 from .executor import _TokenizationExecutor
 
 class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
+    @overload
     async def text(
         self,
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ...,
+    ) -> TokenizeResult: ...
+    @overload
+    async def text(
+        self,
+        text: str,
+        tokenization: Tokenization,
+        *,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopword_presets: Optional[Dict[str, List[str]]] = ...,
+    ) -> TokenizeResult: ...
+    async def for_property(
+        self, collection: str, property_name: str, text: str
     ) -> TokenizeResult: ...