From 3624e8b571d8c388befc61e232e0cecff832b0f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Mon, 20 Apr 2026 15:52:55 +0100
Subject: [PATCH 01/14] refactor: tokenization executor and models to support
 stopword configurations and improve response handling

---
 integration/test_tokenize.py      | 245 +++++++++++++++++++-----------
 weaviate/tokenization/executor.py |  49 +++++-
 weaviate/tokenization/models.py   |  46 +-----
 3 files changed, 202 insertions(+), 138 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 97587235b..d692a4808 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -2,10 +2,17 @@
 
 These tests cover the client's responsibilities:
 - Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate)
-- Correct deserialization of responses into typed objects
-- Client-side validation (_TextAnalyzerConfigCreate rejects invalid input)
+- Correct deserialization of responses into the TokenizeResult object
+- Client-side validation (_TextAnalyzerConfigCreate, stopwords/stopword_presets mutex)
 - Version gate (>= 1.37.0)
 - Both sync and async client paths
+
+Server-side behavior this client relies on:
+- Word tokenization defaults to preset "en" when no stopword config is sent.
+- The generic /v1/tokenize response is minimal: only ``indexed`` and ``query``
+  are returned. The property-level endpoint additionally returns ``tokenization``.
+- ``stopwords`` and ``stopword_presets`` are mutually exclusive on the generic
+  endpoint — the server rejects requests that set both.
 """
 
 from typing import AsyncGenerator, Generator
@@ -15,9 +22,7 @@
 
 import weaviate
 from weaviate.collections.classes.config import (
-    StopwordsConfig,
     StopwordsPreset,
-    TextAnalyzerConfig,
     Tokenization,
     _StopwordsCreate,
     _TextAnalyzerConfigCreate,
@@ -62,13 +67,31 @@ class TestSerialization:
     """Verify the client correctly serializes different input forms."""
 
     @pytest.mark.parametrize(
-        "tokenization,text,expected_tokens",
+        "tokenization,text,expected_indexed,expected_query",
         [
-            (Tokenization.WORD, "The quick brown fox", ["the", "quick", "brown", "fox"]),
-            (Tokenization.LOWERCASE, "Hello World Test", ["hello", "world", "test"]),
-            (Tokenization.WHITESPACE, "Hello World Test", ["Hello", "World", "Test"]),
-            (Tokenization.FIELD, "  Hello World  ", ["Hello World"]),
-            (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"]),
+            # "the" is an English stopword — filtered from the query output
+            # by the server's default "en" preset for word tokenization.
+            (
+                Tokenization.WORD,
+                "The quick brown fox",
+                ["the", "quick", "brown", "fox"],
+                ["quick", "brown", "fox"],
+            ),
+            # Non-word tokenizations do not apply the default "en" preset.
+            (
+                Tokenization.LOWERCASE,
+                "Hello World Test",
+                ["hello", "world", "test"],
+                ["hello", "world", "test"],
+            ),
+            (
+                Tokenization.WHITESPACE,
+                "Hello World Test",
+                ["Hello", "World", "Test"],
+                ["Hello", "World", "Test"],
+            ),
+            (Tokenization.FIELD, "  Hello World  ", ["Hello World"], ["Hello World"]),
+            (Tokenization.TRIGRAM, "Hello", ["hel", "ell", "llo"], ["hel", "ell", "llo"]),
         ],
     )
     def test_tokenization_enum(
@@ -76,19 +99,35 @@ def test_tokenization_enum(
         client: weaviate.WeaviateClient,
         tokenization: Tokenization,
         text: str,
-        expected_tokens: list,
+        expected_indexed: list,
+        expected_query: list,
     ) -> None:
         result = client.tokenization.text(text=text, tokenization=tokenization)
         assert isinstance(result, TokenizeResult)
-        assert result.tokenization == tokenization
-        assert result.indexed == expected_tokens
-        assert result.query == expected_tokens
+        assert result.indexed == expected_indexed
+        assert result.query == expected_query
+        # Generic endpoint does not echo tokenization back.
+        assert result.tokenization is None
+
+    def test_default_en_applied_for_word(self, client: weaviate.WeaviateClient) -> None:
+        """Word tokenization defaults to the 'en' preset when no stopword
+        config is supplied."""
+        result = client.tokenization.text(
+            text="The quick brown fox", tokenization=Tokenization.WORD
+        )
+        assert result.indexed == ["the", "quick", "brown", "fox"]
+        # "the" removed by the server's default en preset.
+        assert result.query == ["quick", "brown", "fox"]
 
-    def test_no_analyzer_config(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenization.text(text="hello world", tokenization=Tokenization.WORD)
-        assert result.tokenization == Tokenization.WORD
-        assert result.indexed == ["hello", "world"]
-        assert result.analyzer_config is None
+    def test_opt_out_of_default_en(self, client: weaviate.WeaviateClient) -> None:
+        """analyzerConfig.stopwordPreset='none' disables the default en."""
+        cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.NONE)
+        result = client.tokenization.text(
+            text="The quick brown fox",
+            tokenization=Tokenization.WORD,
+            analyzer_config=cfg,
+        )
+        assert result.query == ["the", "quick", "brown", "fox"]
 
     def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None:
         cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
@@ -140,33 +179,74 @@ def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClien
         assert "the" not in result.query
         assert "école" in result.query
 
-    def test_stopword_presets_custom_additions(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(stopword_preset="custom")
+    def test_stopwords_fallback(self, client: weaviate.WeaviateClient) -> None:
+        """Top-level stopwords acts as the fallback detector when no
+        analyzerConfig.stopwordPreset is set."""
+        sw = _StopwordsCreate(
+            preset=StopwordsPreset.EN, additions=["quick"], removals=None
+        )
+        result = client.tokenization.text(
+            text="the quick brown fox",
+            tokenization=Tokenization.WORD,
+            stopwords=sw,
+        )
+        assert result.indexed == ["the", "quick", "brown", "fox"]
+        # "the" (en) and "quick" (addition) filtered.
+        assert result.query == ["brown", "fox"]
+
+    def test_stopwords_additions_default_preset_to_en(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """Caller omits preset, passes only additions. Server defaults preset
+        to 'en' and builds detector from en + additions."""
+        sw = _StopwordsCreate(preset=None, additions=["hello"], removals=None)
+        result = client.tokenization.text(
+            text="the quick hello world",
+            tokenization=Tokenization.WORD,
+            stopwords=sw,
+        )
+        assert result.query == ["quick", "world"]
+
+    def test_stopwords_removals_default_preset_to_en(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """Caller omits preset, passes only removals. 'the' is removed from
+        the en list so it passes through."""
+        sw = _StopwordsCreate(preset=None, additions=None, removals=["the"])
+        result = client.tokenization.text(
+            text="the quick is fast",
+            tokenization=Tokenization.WORD,
+            stopwords=sw,
+        )
+        # "is" still in en, "the" removed.
+        assert result.query == ["the", "quick", "fast"]
+
+    def test_stopword_presets_named_reference(self, client: weaviate.WeaviateClient) -> None:
+        """Define a named preset via stopword_presets, select it via
+        analyzerConfig.stopwordPreset. Word lists use the collection shape."""
         result = client.tokenization.text(
             text="hello world test",
             tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-            stopword_presets={
-                "custom": _StopwordsCreate(preset=None, additions=["test"], removals=None),
-            },
+            analyzer_config=_TextAnalyzerConfigCreate(stopword_preset="custom"),
+            stopword_presets={"custom": ["test"]},
         )
         assert result.indexed == ["hello", "world", "test"]
         assert result.query == ["hello", "world"]
 
-    def test_stopword_presets_with_base_and_removals(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(stopword_preset="en-no-the")
+    def test_stopword_presets_override_builtin_en(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """A user-defined preset sharing a name with a built-in replaces the
+        built-in entirely, including on the default-en path for word
+        tokenization."""
         result = client.tokenization.text(
-            text="the quick",
+            text="the quick hello world",
             tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-            stopword_presets={
-                "en-no-the": _StopwordsCreate(
-                    preset=StopwordsPreset.EN, additions=None, removals=["the"]
-                ),
-            },
+            stopword_presets={"en": ["hello"]},
         )
-        assert result.indexed == ["the", "quick"]
-        assert result.query == ["the", "quick"]
+        assert result.indexed == ["the", "quick", "hello", "world"]
+        # "the" no longer filtered (built-in en replaced), "hello" is.
+        assert result.query == ["the", "quick", "world"]
 
 
 # ---------------------------------------------------------------------------
@@ -176,61 +256,23 @@ def test_stopword_presets_with_base_and_removals(self, client: weaviate.Weaviate
 
 @pytest.mark.usefixtures("require_1_37")
 class TestDeserialization:
-    """Verify the client correctly deserializes response fields into typed objects."""
+    """Verify the client correctly deserializes response fields into
+    TokenizeResult."""
 
-    def test_result_type(self, client: weaviate.WeaviateClient) -> None:
+    def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None:
+        """Generic endpoint returns only indexed and query; tokenization is
+        not echoed back."""
         result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
         assert isinstance(result, TokenizeResult)
         assert isinstance(result.indexed, list)
         assert isinstance(result.query, list)
+        assert result.tokenization is None
 
-    def test_analyzer_config_deserialized(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(
-            ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
-        )
-        result = client.tokenization.text(
-            text="L'école",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert isinstance(result.analyzer_config, TextAnalyzerConfig)
-        assert result.analyzer_config.ascii_fold is True
-        assert result.analyzer_config.ascii_fold_ignore == ["é"]
-        assert result.analyzer_config.stopword_preset == "en"
-
-    def test_no_analyzer_config_returns_none(self, client: weaviate.WeaviateClient) -> None:
-        result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
-        assert result.analyzer_config is None
-
-    def test_stopword_config_deserialized_on_property(
+    def test_property_result_populates_tokenization(
         self, client: weaviate.WeaviateClient
     ) -> None:
-        client.collections.delete("TestDeserStopword")
-        try:
-            client.collections.create_from_dict(
-                {
-                    "class": "TestDeserStopword",
-                    "vectorizer": "none",
-                    "properties": [
-                        {
-                            "name": "title",
-                            "dataType": ["text"],
-                            "tokenization": "word",
-                            "textAnalyzer": {"stopwordPreset": "en"},
-                        },
-                    ],
-                }
-            )
-            col = client.collections.get("TestDeserStopword")
-            result = col.config.tokenize_property(property_name="title", text="the quick")
-            assert isinstance(result, TokenizeResult)
-            assert result.tokenization == Tokenization.WORD
-            if result.stopword_config is not None:
-                assert isinstance(result.stopword_config, StopwordsConfig)
-        finally:
-            client.collections.delete("TestDeserStopword")
-
-    def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
+        """Property endpoint returns tokenization — the server resolved it
+        from the property's schema rather than the caller sending it."""
         client.collections.delete("TestDeserPropTypes")
         try:
             client.collections.create_from_dict(
@@ -256,12 +298,13 @@ def test_property_result_types(self, client: weaviate.WeaviateClient) -> None:
 
 
 # ---------------------------------------------------------------------------
-# Client-side validation (_TextAnalyzerConfigCreate)
+# Client-side validation
 # ---------------------------------------------------------------------------
 
 
 class TestClientSideValidation:
-    """Verify that _TextAnalyzerConfigCreate rejects invalid input before hitting the server."""
+    """Verify that client-side validation rejects invalid input before
+    hitting the server."""
 
     def test_ascii_fold_ignore_without_fold_raises(self) -> None:
         with pytest.raises(ValueError, match="asciiFoldIgnore"):
@@ -291,6 +334,23 @@ def test_empty_config_is_valid(self) -> None:
         assert cfg.asciiFoldIgnore is None
         assert cfg.stopwordPreset is None
 
+    def test_stopwords_and_stopword_presets_mutex(
+        self, client: weaviate.WeaviateClient
+    ) -> None:
+        """Client rejects the mutex violation locally with ValueError, before
+        sending the request (which the server would also reject with 422)."""
+        if client._connection._weaviate_version.is_lower_than(1, 37, 0):
+            pytest.skip("Tokenization requires Weaviate >= 1.37.0")
+        with pytest.raises(ValueError, match="mutually exclusive"):
+            client.tokenization.text(
+                text="hello",
+                tokenization=Tokenization.WORD,
+                stopwords=_StopwordsCreate(
+                    preset=StopwordsPreset.EN, additions=None, removals=None
+                ),
+                stopword_presets={"custom": ["hello"]},
+            )
+
 
 # ---------------------------------------------------------------------------
 # Version gate
@@ -331,20 +391,21 @@ async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -
         )
         assert isinstance(result, TokenizeResult)
         assert result.indexed == ["the", "quick", "brown", "fox"]
+        # default "en" applied server-side.
+        assert result.query == ["quick", "brown", "fox"]
 
     @pytest.mark.asyncio
-    async def test_text_with_analyzer_config(
+    async def test_text_with_stopwords_fallback(
         self, async_client: weaviate.WeaviateAsyncClient
     ) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, stopword_preset=StopwordsPreset.EN)
+        sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None)
         result = await async_client.tokenization.text(
-            text="L'école est fermée",
+            text="the quick brown fox",
             tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
+            stopwords=sw,
         )
-        assert result.indexed == ["l", "ecole", "est", "fermee"]
-        assert isinstance(result.analyzer_config, TextAnalyzerConfig)
-        assert result.analyzer_config.ascii_fold is True
+        assert result.indexed == ["the", "quick", "brown", "fox"]
+        assert result.query == ["brown", "fox"]
 
     @pytest.mark.asyncio
     async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 5093c14e9..3a79d6ee1 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -1,6 +1,6 @@
 """Tokenize executor."""
 
-from typing import Any, Dict, Generic, Optional
+from typing import Any, Dict, Generic, List, Optional
 
 from httpx import Response
 
@@ -33,26 +33,52 @@ def text(
         tokenization: Tokenization,
         *,
         analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        stopword_presets: Optional[Dict[str, _StopwordsCreate]] = None,
+        stopwords: Optional[_StopwordsCreate] = None,
+        stopword_presets: Optional[Dict[str, List[str]]] = None,
     ) -> executor.Result[TokenizeResult]:
         """Tokenize text using the generic /v1/tokenize endpoint.
 
+        For ``word`` tokenization the server defaults to the built-in ``en``
+        stopword preset when no stopword configuration is supplied. Pass
+        ``analyzer_config=TextAnalyzerConfig(stopword_preset="none")`` or
+        equivalent to opt out.
+
         Args:
             text: The text to tokenize.
             tokenization: The tokenization method to use (e.g. Tokenization.WORD).
-            analyzer_config: Text analyzer settings (ASCII folding, stopword preset).
-            stopword_presets: Custom stopword preset definitions, keyed by name.
-                Each value is a ``_StopwordsCreate`` with optional preset, additions,
-                and removals fields.
+            analyzer_config: Text analyzer settings (ASCII folding, stopword
+                preset name). ``stopword_preset`` may reference a built-in preset
+                (``en`` / ``none``) or a name defined in ``stopword_presets``.
+            stopwords: Fallback stopword config applied when
+                ``analyzer_config.stopword_preset`` is not set. Same shape as a
+                collection's ``invertedIndexConfig.stopwords`` — a base preset
+                optionally tweaked with ``additions`` / ``removals``. An empty
+                ``preset`` defaults to ``en``.
+            stopword_presets: User-defined named stopword presets, each a plain
+                list of words. A name matching a built-in (``en`` / ``none``)
+                replaces the built-in entirely.
+
+        Note:
+            ``stopwords`` and ``stopword_presets`` are mutually exclusive on the
+            server — pass one or the other, not both. The server returns HTTP
+            422 if both are supplied.
 
         Returns:
-            A TokenizeResult with indexed and query token lists.
+            A TokenizeResult with indexed and query token lists. The generic
+            endpoint does not echo request fields (tokenization, analyzer_config,
+            stopwords, stopword_presets) back in the response.
 
         Raises:
             WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
+            ValueError: If both ``stopwords`` and ``stopword_presets`` are passed.
         """
         self._check_version()
 
+        if stopwords is not None and stopword_presets is not None:
+            raise ValueError(
+                "stopwords and stopword_presets are mutually exclusive; pass only one"
+            )
+
         payload: Dict[str, Any] = {
             "text": text,
             "tokenization": tokenization.value,
@@ -63,9 +89,16 @@ def text(
             if ac_dict:
                 payload["analyzerConfig"] = ac_dict
 
+        if stopwords is not None:
+            sw_dict = stopwords._to_dict()
+            if sw_dict:
+                payload["stopwords"] = sw_dict
+
         if stopword_presets is not None:
+            # Plain word-list shape matching a collection's
+            # invertedIndexConfig.stopwordPresets.
             payload["stopwordPresets"] = {
-                name: cfg._to_dict() for name, cfg in stopword_presets.items()
+                name: list(words) for name, words in stopword_presets.items()
             }
 
         def resp(response: Response) -> TokenizeResult:
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index 8bfa508f8..3bf980597 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -1,56 +1,26 @@
 """Return types for tokenization operations."""
 
-from typing import Any, Dict, List, Optional
+from typing import List, Optional
 
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict
 
-from weaviate.collections.classes.config import (
-    StopwordsConfig,
-    StopwordsPreset,
-    TextAnalyzerConfig,
-    Tokenization,
-)
+from weaviate.collections.classes.config import Tokenization
 
 
 class TokenizeResult(BaseModel):
     """Result of a tokenization operation.
 
     Attributes:
-        tokenization: The tokenization method that was applied.
         indexed: Tokens as they would be stored in the inverted index.
         query: Tokens as they would be used for querying (after stopword removal).
-        analyzer_config: The text analyzer configuration that was used, if any.
-        stopword_config: The stopword configuration that was used, if any.
+        tokenization: The tokenization method that was applied. Populated only by
+            the property-level endpoint, where the tokenization is resolved from
+            the property's schema. The generic ``/v1/tokenize`` endpoint does not
+            echo it back (the caller passed it).
     """
 
     model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
 
-    tokenization: Tokenization
     indexed: List[str]
     query: List[str]
-    analyzer_config: Optional[TextAnalyzerConfig] = Field(default=None, alias="analyzerConfig")
-    stopword_config: Optional[StopwordsConfig] = Field(default=None, alias="stopwordConfig")
-
-    @field_validator("analyzer_config", mode="before")
-    @classmethod
-    def _parse_analyzer_config(cls, v: Optional[Dict[str, Any]]) -> Optional[TextAnalyzerConfig]:
-        if v is None:
-            return None
-        if "asciiFold" not in v and "stopwordPreset" not in v:
-            return None
-        return TextAnalyzerConfig(
-            ascii_fold=v.get("asciiFold", False),
-            ascii_fold_ignore=v.get("asciiFoldIgnore"),
-            stopword_preset=v.get("stopwordPreset"),
-        )
-
-    @field_validator("stopword_config", mode="before")
-    @classmethod
-    def _parse_stopword_config(cls, v: Optional[Dict[str, Any]]) -> Optional[StopwordsConfig]:
-        if v is None:
-            return None
-        return StopwordsConfig(
-            preset=StopwordsPreset(v["preset"]),
-            additions=v.get("additions"),
-            removals=v.get("removals"),
-        )
+    tokenization: Optional[Tokenization] = None

From 5a12f134c36a5f6ed6329abaae101abf42efb7cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Mon, 20 Apr 2026 15:55:45 +0100
Subject: [PATCH 02/14] fix: update Weaviate 1.37.1 version to include specific
 build identifier

---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 8dd157443..ee9b69537 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -28,7 +28,7 @@ env:
   WEAVIATE_134: 1.34.19
   WEAVIATE_135: 1.35.16-efdedfa
   WEAVIATE_136: 1.36.9-d905e6c
-  WEAVIATE_137: 1.37.1
+  WEAVIATE_137: 1.37.1-5f911bc
 
 jobs:
   lint-and-format:

From 60887f3ab37171d2ea12aa34de57a927fcca8267 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Mon, 20 Apr 2026 18:08:38 +0100
Subject: [PATCH 03/14] fix: update Weaviate 1.37.1 version to include
 architecture suffix

---
 .github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index b2c567cee..94f75b089 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -28,7 +28,7 @@ env:
   WEAVIATE_134: 1.34.19
   WEAVIATE_135: 1.35.18
   WEAVIATE_136: 1.36.12
-  WEAVIATE_137: 1.37.1-5f911bc
+  WEAVIATE_137: 1.37.1-5f911bc.amd64
 
 jobs:
   lint-and-format:

From 9fd83b881c953f2ea09c1fb5c89dc38a96173c29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 21 Apr 2026 09:23:37 +0100
Subject: [PATCH 04/14] fix: refactor tokenization tests to use parameterized
 cases for improved readability and maintainability

---
 integration/test_tokenize.py | 327 +++++++++++++++++++----------------
 1 file changed, 176 insertions(+), 151 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 788cefc31..51f154479 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -109,131 +109,146 @@ def test_tokenization_enum(
         # Generic endpoint does not echo tokenization back.
         assert result.tokenization is None
 
-    def test_default_en_applied_for_word(self, client: weaviate.WeaviateClient) -> None:
-        """Word tokenization defaults to the 'en' preset when no stopword config is supplied."""
-        result = client.tokenization.text(
-            text="The quick brown fox", tokenization=Tokenization.WORD
-        )
-        assert result.indexed == ["the", "quick", "brown", "fox"]
-        # "the" removed by the server's default en preset.
-        assert result.query == ["quick", "brown", "fox"]
-
-    def test_opt_out_of_default_en(self, client: weaviate.WeaviateClient) -> None:
-        """analyzerConfig.stopwordPreset='none' disables the default en."""
-        cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.NONE)
-        result = client.tokenization.text(
-            text="The quick brown fox",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert result.query == ["the", "quick", "brown", "fox"]
-
-    def test_ascii_fold(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
-        result = client.tokenization.text(
-            text="L'école est fermée",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert result.indexed == ["l", "ecole", "est", "fermee"]
-
-    def test_ascii_fold_with_ignore(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é"])
-        result = client.tokenization.text(
-            text="L'école est fermée",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert result.indexed == ["l", "école", "est", "fermée"]
-
-    def test_stopword_preset_enum(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN)
-        result = client.tokenization.text(
-            text="The quick brown fox",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert "the" not in result.query
-        assert "quick" in result.query
-
-    def test_stopword_preset_string(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(stopword_preset="en")
-        result = client.tokenization.text(
-            text="The quick brown fox",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert "the" not in result.query
-
-    def test_ascii_fold_combined_with_stopwords(self, client: weaviate.WeaviateClient) -> None:
-        cfg = _TextAnalyzerConfigCreate(
-            ascii_fold=True, ascii_fold_ignore=["é"], stopword_preset=StopwordsPreset.EN
-        )
-        result = client.tokenization.text(
-            text="The école est fermée",
-            tokenization=Tokenization.WORD,
-            analyzer_config=cfg,
-        )
-        assert result.indexed == ["the", "école", "est", "fermée"]
-        assert "the" not in result.query
-        assert "école" in result.query
-
-    def test_stopwords_fallback(self, client: weaviate.WeaviateClient) -> None:
-        """Top-level stopwords acts as the fallback detector when no analyzerConfig.stopwordPreset is set."""
-        sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None)
-        result = client.tokenization.text(
-            text="the quick brown fox",
-            tokenization=Tokenization.WORD,
-            stopwords=sw,
-        )
-        assert result.indexed == ["the", "quick", "brown", "fox"]
-        # "the" (en) and "quick" (addition) filtered.
-        assert result.query == ["brown", "fox"]
-
-    def test_stopwords_additions_default_preset_to_en(
-        self, client: weaviate.WeaviateClient
+    @pytest.mark.parametrize(
+        "call_kwargs,expected_indexed,expected_query",
+        [
+            (
+                {"text": "The quick brown fox"},
+                ["the", "quick", "brown", "fox"],
+                ["quick", "brown", "fox"],
+            ),
+            (
+                {
+                    "text": "The quick brown fox",
+                    "analyzer_config": _TextAnalyzerConfigCreate(
+                        stopword_preset=StopwordsPreset.NONE
+                    ),
+                },
+                ["the", "quick", "brown", "fox"],
+                ["the", "quick", "brown", "fox"],
+            ),
+            (
+                {
+                    "text": "L'école est fermée",
+                    "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True),
+                },
+                ["l", "ecole", "est", "fermee"],
+                ["l", "ecole", "fermee"],
+            ),
+            (
+                {
+                    "text": "L'école est fermée",
+                    "analyzer_config": _TextAnalyzerConfigCreate(
+                        ascii_fold=True, ascii_fold_ignore=["é"]
+                    ),
+                },
+                ["l", "école", "est", "fermée"],
+                ["l", "école", "fermée"],
+            ),
+            (
+                {
+                    "text": "The quick brown fox",
+                    "analyzer_config": _TextAnalyzerConfigCreate(
+                        stopword_preset=StopwordsPreset.EN
+                    ),
+                },
+                ["the", "quick", "brown", "fox"],
+                ["quick", "brown", "fox"],
+            ),
+            (
+                {
+                    "text": "The quick brown fox",
+                    "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="en"),
+                },
+                ["the", "quick", "brown", "fox"],
+                ["quick", "brown", "fox"],
+            ),
+            (
+                {
+                    "text": "The école est fermée",
+                    "analyzer_config": _TextAnalyzerConfigCreate(
+                        ascii_fold=True,
+                        ascii_fold_ignore=["é"],
+                        stopword_preset=StopwordsPreset.EN,
+                    ),
+                },
+                ["the", "école", "est", "fermée"],
+                ["école", "est", "fermée"],
+            ),
+            (
+                {
+                    "text": "the quick brown fox",
+                    "stopwords": _StopwordsCreate(
+                        preset=StopwordsPreset.EN, additions=["quick"], removals=None
+                    ),
+                },
+                ["the", "quick", "brown", "fox"],
+                ["brown", "fox"],
+            ),
+            (
+                {
+                    "text": "the quick hello world",
+                    "stopwords": _StopwordsCreate(
+                        preset=None, additions=["hello"], removals=None
+                    ),
+                },
+                ["the", "quick", "hello", "world"],
+                ["quick", "world"],
+            ),
+            (
+                {
+                    "text": "the quick is fast",
+                    "stopwords": _StopwordsCreate(
+                        preset=None, additions=None, removals=["the"]
+                    ),
+                },
+                ["the", "quick", "is", "fast"],
+                ["the", "quick", "fast"],
+            ),
+            (
+                {
+                    "text": "hello world test",
+                    "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="custom"),
+                    "stopword_presets": {"custom": ["test"]},
+                },
+                ["hello", "world", "test"],
+                ["hello", "world"],
+            ),
+            (
+                {
+                    "text": "the quick hello world",
+                    "stopword_presets": {"en": ["hello"]},
+                },
+                ["the", "quick", "hello", "world"],
+                ["the", "quick", "world"],
+            ),
+        ],
+        ids=[
+            "default_en_applied_for_word",
+            "opt_out_of_default_en",
+            "ascii_fold",
+            "ascii_fold_with_ignore",
+            "stopword_preset_enum",
+            "stopword_preset_string",
+            "ascii_fold_combined_with_stopwords",
+            "stopwords_fallback",
+            "stopwords_additions_default_preset_to_en",
+            "stopwords_removals_default_preset_to_en",
+            "stopword_presets_named_reference",
+            "stopword_presets_override_builtin_en",
+        ],
+    )
+    def test_text_tokenize(
+        self,
+        client: weaviate.WeaviateClient,
+        call_kwargs: dict,
+        expected_indexed: list,
+        expected_query: list,
     ) -> None:
-        """Caller omits preset, passes only additions. Server defaults preset to 'en' and builds detector from en + additions."""
-        sw = _StopwordsCreate(preset=None, additions=["hello"], removals=None)
-        result = client.tokenization.text(
-            text="the quick hello world",
-            tokenization=Tokenization.WORD,
-            stopwords=sw,
-        )
-        assert result.query == ["quick", "world"]
-
-    def test_stopwords_removals_default_preset_to_en(self, client: weaviate.WeaviateClient) -> None:
-        """Caller omits preset, passes only removals. 'the' is removed from the en list so it passes through."""
-        sw = _StopwordsCreate(preset=None, additions=None, removals=["the"])
-        result = client.tokenization.text(
-            text="the quick is fast",
-            tokenization=Tokenization.WORD,
-            stopwords=sw,
-        )
-        # "is" still in en, "the" removed.
-        assert result.query == ["the", "quick", "fast"]
-
-    def test_stopword_presets_named_reference(self, client: weaviate.WeaviateClient) -> None:
-        """Define a named preset via stopword_presets, select it via analyzerConfig.stopwordPreset. Word lists use the collection shape."""
-        result = client.tokenization.text(
-            text="hello world test",
-            tokenization=Tokenization.WORD,
-            analyzer_config=_TextAnalyzerConfigCreate(stopword_preset="custom"),
-            stopword_presets={"custom": ["test"]},
-        )
-        assert result.indexed == ["hello", "world", "test"]
-        assert result.query == ["hello", "world"]
-
-    def test_stopword_presets_override_builtin_en(self, client: weaviate.WeaviateClient) -> None:
-        """A user-defined preset sharing a name with a built-in replaces the built-in entirely, including on the default-en path for word tokenization."""
-        result = client.tokenization.text(
-            text="the quick hello world",
-            tokenization=Tokenization.WORD,
-            stopword_presets={"en": ["hello"]},
-        )
-        assert result.indexed == ["the", "quick", "hello", "world"]
-        # "the" no longer filtered (built-in en replaced), "hello" is.
-        assert result.query == ["the", "quick", "world"]
+        result = client.tokenization.text(tokenization=Tokenization.WORD, **call_kwargs)
+        assert isinstance(result, TokenizeResult)
+        assert result.indexed == expected_indexed
+        assert result.query == expected_query
 
 
 # ---------------------------------------------------------------------------
@@ -287,33 +302,44 @@ def test_property_result_populates_tokenization(self, client: weaviate.WeaviateC
 class TestClientSideValidation:
     """Verify that client-side validation rejects invalid input before hitting the server."""
 
-    def test_ascii_fold_ignore_without_fold_raises(self) -> None:
-        with pytest.raises(ValueError, match="asciiFoldIgnore"):
-            _TextAnalyzerConfigCreate(ascii_fold=False, ascii_fold_ignore=["é"])
-
-    def test_ascii_fold_ignore_without_fold_default_raises(self) -> None:
+    @pytest.mark.parametrize(
+        "kwargs",
+        [
+            {"ascii_fold": False, "ascii_fold_ignore": ["é"]},
+            {"ascii_fold_ignore": ["é"]},
+        ],
+        ids=["explicit_false", "default"],
+    )
+    def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None:
         with pytest.raises(ValueError, match="asciiFoldIgnore"):
-            _TextAnalyzerConfigCreate(ascii_fold_ignore=["é"])
+            _TextAnalyzerConfigCreate(**kwargs)
 
-    def test_valid_config_does_not_raise(self) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True, ascii_fold_ignore=["é", "ñ"])
-        assert cfg.asciiFold is True
-        assert cfg.asciiFoldIgnore == ["é", "ñ"]
-
-    def test_fold_without_ignore_is_valid(self) -> None:
-        cfg = _TextAnalyzerConfigCreate(ascii_fold=True)
-        assert cfg.asciiFold is True
-        assert cfg.asciiFoldIgnore is None
-
-    def test_stopword_preset_only_is_valid(self) -> None:
-        cfg = _TextAnalyzerConfigCreate(stopword_preset="en")
-        assert cfg.stopwordPreset == "en"
-
-    def test_empty_config_is_valid(self) -> None:
-        cfg = _TextAnalyzerConfigCreate()
-        assert cfg.asciiFold is None
-        assert cfg.asciiFoldIgnore is None
-        assert cfg.stopwordPreset is None
+    @pytest.mark.parametrize(
+        "kwargs,expected",
+        [
+            (
+                {"ascii_fold": True, "ascii_fold_ignore": ["é", "ñ"]},
+                {"asciiFold": True, "asciiFoldIgnore": ["é", "ñ"]},
+            ),
+            (
+                {"ascii_fold": True},
+                {"asciiFold": True, "asciiFoldIgnore": None},
+            ),
+            (
+                {"stopword_preset": "en"},
+                {"stopwordPreset": "en"},
+            ),
+            (
+                {},
+                {"asciiFold": None, "asciiFoldIgnore": None, "stopwordPreset": None},
+            ),
+        ],
+        ids=["fold_with_ignore", "fold_without_ignore", "stopword_preset_only", "empty"],
+    )
+    def test_valid_config(self, kwargs: dict, expected: dict) -> None:
+        cfg = _TextAnalyzerConfigCreate(**kwargs)
+        for attr, value in expected.items():
+            assert getattr(cfg, attr) == value
 
     def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateClient) -> None:
         """Client rejects the mutex violation locally with ValueError, before sending the request (which the server would also reject with 422)."""
@@ -411,7 +437,6 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
             assert isinstance(result, TokenizeResult)
             assert result.tokenization == Tokenization.WORD
             assert result.indexed == ["the", "quick", "brown", "fox"]
-            assert "the" not in result.query
-            assert "quick" in result.query
+            assert result.query == ["quick", "brown", "fox"]
         finally:
             await async_client.collections.delete("TestAsyncPropTokenize")

From e9d681226e1917b80fed0312b086a5818cac2e9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 21 Apr 2026 09:52:24 +0100
Subject: [PATCH 05/14] fix: update Weaviate 1.37.1 version and enhance
 tokenization tests with new fixtures

---
 .github/workflows/main.yaml  |  2 +-
 integration/test_tokenize.py | 77 ++++++++++++++++++++++++++++++------
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 94f75b089..a1ff94f98 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -28,7 +28,7 @@ env:
   WEAVIATE_134: 1.34.19
   WEAVIATE_135: 1.35.18
   WEAVIATE_136: 1.36.12
-  WEAVIATE_137: 1.37.1-5f911bc.amd64
+  WEAVIATE_137: 1.37.1-4e61e26.amd64
 
 jobs:
   lint-and-format:
diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 51f154479..61d54e095 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -9,8 +9,7 @@
 
 Server-side behavior this client relies on:
 - Word tokenization defaults to preset "en" when no stopword config is sent.
-- The generic /v1/tokenize response is minimal: only ``indexed`` and ``query``
-  are returned. The property-level endpoint additionally returns ``tokenization``.
+- Both endpoints return only ``indexed`` and ``query``.
 - ``stopwords`` and ``stopword_presets`` are mutually exclusive on the generic
   endpoint — the server rejects requests that set both.
 """
@@ -57,6 +56,29 @@ async def async_client() -> AsyncGenerator[weaviate.WeaviateAsyncClient, None]:
     await c.close()
 
 
+@pytest.fixture
+def recipe_collection(client: weaviate.WeaviateClient) -> Generator:
+    """Collection with a `recipe` word-tokenized property and an en + ["quick"] stopwords config."""
+    name = "TestTokenizeRecipe"
+    client.collections.delete(name)
+    client.collections.create_from_dict(
+        {
+            "class": name,
+            "vectorizer": "none",
+            "invertedIndexConfig": {
+                "stopwords": {"preset": "en", "additions": ["quick"]},
+            },
+            "properties": [
+                {"name": "recipe", "dataType": ["text"], "tokenization": "word"},
+            ],
+        }
+    )
+    try:
+        yield client.collections.get(name)
+    finally:
+        client.collections.delete(name)
+
+
 # ---------------------------------------------------------------------------
 # Serialization
 # ---------------------------------------------------------------------------
@@ -106,8 +128,6 @@ def test_tokenization_enum(
         assert isinstance(result, TokenizeResult)
         assert result.indexed == expected_indexed
         assert result.query == expected_query
-        # Generic endpoint does not echo tokenization back.
-        assert result.tokenization is None
 
     @pytest.mark.parametrize(
         "call_kwargs,expected_indexed,expected_query",
@@ -133,7 +153,7 @@ def test_tokenization_enum(
                     "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True),
                 },
                 ["l", "ecole", "est", "fermee"],
-                ["l", "ecole", "fermee"],
+                ["l", "ecole", "est", "fermee"],
             ),
             (
                 {
@@ -143,7 +163,7 @@ def test_tokenization_enum(
                     ),
                 },
                 ["l", "école", "est", "fermée"],
-                ["l", "école", "fermée"],
+                ["l", "école", "est", "fermée"],
             ),
             (
                 {
@@ -250,6 +270,42 @@ def test_text_tokenize(
         assert result.indexed == expected_indexed
         assert result.query == expected_query
 
+    def test_text_from_collection_config(
+        self, client: weaviate.WeaviateClient, recipe_collection
+    ) -> None:
+        """Values round-tripped through config.get() feed back into tokenization.text()."""
+        config = recipe_collection.config.get()
+        recipe = next(p for p in config.properties if p.name == "recipe")
+        stopwords = config.inverted_index_config.stopwords
+        result = client.tokenization.text(
+            text="the quick brown fox",
+            tokenization=recipe.tokenization,
+            stopwords=_StopwordsCreate(**stopwords.__dict__),
+        )
+        assert result.indexed == ["the", "quick", "brown", "fox"]
+        assert result.query == ["brown", "fox"]
+
+    def test_property_and_generic_endpoints_agree(
+        self, client: weaviate.WeaviateClient, recipe_collection
+    ) -> None:
+        """Property endpoint (server resolves config from schema) produces the same indexed/query as the generic endpoint fed the same config."""
+        config = recipe_collection.config.get()
+        recipe = next(p for p in config.properties if p.name == "recipe")
+        stopwords = config.inverted_index_config.stopwords
+
+        text = "the quick brown fox"
+        via_property = recipe_collection.config.tokenize_property(
+            property_name="recipe", text=text
+        )
+        via_generic = client.tokenization.text(
+            text=text,
+            tokenization=recipe.tokenization,
+            stopwords=_StopwordsCreate(**stopwords.__dict__),
+        )
+
+        assert via_property.indexed == via_generic.indexed
+        assert via_property.query == via_generic.query
+
 
 # ---------------------------------------------------------------------------
 # Deserialization
@@ -261,15 +317,14 @@ class TestDeserialization:
     """Verify the client correctly deserializes response fields into TokenizeResult."""
 
     def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None:
-        """Generic endpoint returns only indexed and query; tokenization is not echoed back."""
+        """Generic endpoint response deserializes into TokenizeResult with indexed and query lists."""
         result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
         assert isinstance(result, TokenizeResult)
         assert isinstance(result.indexed, list)
         assert isinstance(result.query, list)
-        assert result.tokenization is None
 
-    def test_property_result_populates_tokenization(self, client: weaviate.WeaviateClient) -> None:
-        """Property endpoint returns tokenization — the server resolved it from the property's schema rather than the caller sending it."""
+    def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None:
+        """Property endpoint response deserializes into TokenizeResult — server resolves tokenization from the property's schema."""
         client.collections.delete("TestDeserPropTypes")
         try:
             client.collections.create_from_dict(
@@ -288,7 +343,6 @@ def test_property_result_populates_tokenization(self, client: weaviate.WeaviateC
             col = client.collections.get("TestDeserPropTypes")
             result = col.config.tokenize_property(property_name="tag", text="  Hello World  ")
             assert isinstance(result, TokenizeResult)
-            assert result.tokenization == Tokenization.FIELD
             assert result.indexed == ["Hello World"]
         finally:
             client.collections.delete("TestDeserPropTypes")
@@ -435,7 +489,6 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
                 text="The quick brown fox",
             )
             assert isinstance(result, TokenizeResult)
-            assert result.tokenization == Tokenization.WORD
             assert result.indexed == ["the", "quick", "brown", "fox"]
             assert result.query == ["quick", "brown", "fox"]
         finally:

From 959f554c7df129a226ddbc5f412e95a3879891d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 21 Apr 2026 09:55:35 +0100
Subject: [PATCH 06/14] refactor: ruff format

---
 integration/test_tokenize.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index 61d54e095..d2a8442d8 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -208,9 +208,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "the quick hello world",
-                    "stopwords": _StopwordsCreate(
-                        preset=None, additions=["hello"], removals=None
-                    ),
+                    "stopwords": _StopwordsCreate(preset=None, additions=["hello"], removals=None),
                 },
                 ["the", "quick", "hello", "world"],
                 ["quick", "world"],
@@ -218,9 +216,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "the quick is fast",
-                    "stopwords": _StopwordsCreate(
-                        preset=None, additions=None, removals=["the"]
-                    ),
+                    "stopwords": _StopwordsCreate(preset=None, additions=None, removals=["the"]),
                 },
                 ["the", "quick", "is", "fast"],
                 ["the", "quick", "fast"],
@@ -294,9 +290,7 @@ def test_property_and_generic_endpoints_agree(
         stopwords = config.inverted_index_config.stopwords
 
         text = "the quick brown fox"
-        via_property = recipe_collection.config.tokenize_property(
-            property_name="recipe", text=text
-        )
+        via_property = recipe_collection.config.tokenize_property(property_name="recipe", text=text)
         via_generic = client.tokenization.text(
             text=text,
             tokenization=recipe.tokenization,

From 0f7fe47cac92107f502fb5a22c925896084535ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 21 Apr 2026 14:24:33 +0100
Subject: [PATCH 07/14] test: refactor output types and tests to config

---
 integration/test_tokenize.py      | 32 ++++++++++++++++++++++++
 weaviate/tokenization/executor.py | 41 ++++++++++++++++++++++---------
 weaviate/tokenization/models.py   |  5 ----
 3 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index d2a8442d8..c939e8c5b 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -403,6 +403,38 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli
                 stopword_presets={"custom": ["hello"]},
             )
 
+    @pytest.mark.parametrize(
+        "stopword_presets,match",
+        [
+            ({"custom": "hello"}, "must be a list of strings"),
+            (
+                {
+                    "custom": _StopwordsCreate(
+                        preset=StopwordsPreset.EN, additions=None, removals=None
+                    ),
+                },
+                "must be a list of strings",
+            ),
+            ({"custom": ["hello", 123]}, "must contain only strings"),
+        ],
+        ids=["str_value", "pydantic_model_value", "non_string_element"],
+    )
+    def test_stopword_presets_invalid_shape_raises(
+        self,
+        client: weaviate.WeaviateClient,
+        stopword_presets: dict,
+        match: str,
+    ) -> None:
+        """Client rejects malformed stopword_presets values locally before sending — str would silently split into characters; a pydantic model would serialize to field tuples."""
+        if client._connection._weaviate_version.is_lower_than(1, 37, 0):
+            pytest.skip("Tokenization requires Weaviate >= 1.37.0")
+        with pytest.raises(ValueError, match=match):
+            client.tokenization.text(
+                text="hello",
+                tokenization=Tokenization.WORD,
+                stopword_presets=stopword_presets,
+            )
+
 
 # ---------------------------------------------------------------------------
 # Version gate
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 8cedb6e51..825faee05 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -40,14 +40,15 @@ def text(
 
         For ``word`` tokenization the server defaults to the built-in ``en``
         stopword preset when no stopword configuration is supplied. Pass
-        ``analyzer_config=TextAnalyzerConfig(stopword_preset="none")`` or
-        equivalent to opt out.
+        ``analyzer_config=Configure.text_analyzer(stopword_preset=StopwordsPreset.NONE)``
+        (or equivalent) to opt out.
 
         Args:
             text: The text to tokenize.
             tokenization: The tokenization method to use (e.g. Tokenization.WORD).
             analyzer_config: Text analyzer settings (ASCII folding, stopword
-                preset name). ``stopword_preset`` may reference a built-in preset
+                preset name), built via ``Configure.text_analyzer(...)``.
+                ``stopword_preset`` may reference a built-in preset
                 (``en`` / ``none``) or a name defined in ``stopword_presets``.
             stopwords: Fallback stopword config applied when
                 ``analyzer_config.stopword_preset`` is not set. Same shape as a
@@ -64,13 +65,13 @@ def text(
             422 if both are supplied.
 
         Returns:
-            A TokenizeResult with indexed and query token lists. The generic
-            endpoint does not echo request fields (tokenization, analyzer_config,
-            stopwords, stopword_presets) back in the response.
+            A TokenizeResult with indexed and query token lists. The response
+            does not echo request fields back.
 
         Raises:
             WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
-            ValueError: If both ``stopwords`` and ``stopword_presets`` are passed.
+            ValueError: If both ``stopwords`` and ``stopword_presets`` are passed,
+                or if any ``stopword_presets`` value is not a list/tuple of strings.
         """
         self.__check_version()
 
@@ -94,10 +95,28 @@ def text(
 
         if stopword_presets is not None:
             # Plain word-list shape matching a collection's
-            # invertedIndexConfig.stopwordPresets.
-            payload["stopwordPresets"] = {
-                name: list(words) for name, words in stopword_presets.items()
-            }
+            # invertedIndexConfig.stopwordPresets. Reject str (would
+            # silently split into characters) and pydantic models /
+            # other non-sequence shapes up-front so callers get a clear
+            # error instead of a malformed payload.
+            validated: Dict[str, List[str]] = {}
+            for name, words in stopword_presets.items():
+                if isinstance(words, (str, bytes)):
+                    raise ValueError(
+                        f"stopword_presets[{name!r}] must be a list of strings, "
+                        f"got {type(words).__name__}"
+                    )
+                if not isinstance(words, (list, tuple)):
+                    raise ValueError(
+                        f"stopword_presets[{name!r}] must be a list of strings, "
+                        f"got {type(words).__name__}"
+                    )
+                if not all(isinstance(w, str) for w in words):
+                    raise ValueError(
+                        f"stopword_presets[{name!r}] must contain only strings"
+                    )
+                validated[name] = list(words)
+            payload["stopwordPresets"] = validated
 
         def resp(response: Response) -> TokenizeResult:
             return TokenizeResult.model_validate(response.json())
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index 3bf980597..017abe429 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -13,14 +13,9 @@ class TokenizeResult(BaseModel):
     Attributes:
         indexed: Tokens as they would be stored in the inverted index.
         query: Tokens as they would be used for querying (after stopword removal).
-        tokenization: The tokenization method that was applied. Populated only by
-            the property-level endpoint, where the tokenization is resolved from
-            the property's schema. The generic ``/v1/tokenize`` endpoint does not
-            echo it back (the caller passed it).
     """
 
     model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
 
     indexed: List[str]
     query: List[str]
-    tokenization: Optional[Tokenization] = None

From 52c2c8c8133eb1828be86f10ea824d74b822ca8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Mour=C3=A3o?= <andre.mourao@weaviate.io>
Date: Tue, 21 Apr 2026 14:31:17 +0100
Subject: [PATCH 08/14] refactor: remove unused imports in tokenization models
 and format

---
 weaviate/tokenization/executor.py | 4 +---
 weaviate/tokenization/models.py   | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 825faee05..25b36e1d3 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -112,9 +112,7 @@ def text(
                         f"got {type(words).__name__}"
                     )
                 if not all(isinstance(w, str) for w in words):
-                    raise ValueError(
-                        f"stopword_presets[{name!r}] must contain only strings"
-                    )
+                    raise ValueError(f"stopword_presets[{name!r}] must contain only strings")
                 validated[name] = list(words)
             payload["stopwordPresets"] = validated
 
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index 017abe429..f8fe7cb67 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -1,11 +1,9 @@
 """Return types for tokenization operations."""
 
-from typing import List, Optional
+from typing import List
 
 from pydantic import BaseModel, ConfigDict
 
-from weaviate.collections.classes.config import Tokenization
-
 
 class TokenizeResult(BaseModel):
     """Result of a tokenization operation.

From 3de0955c0520358b5d12f81b094b98ef3d208559 Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Tue, 21 Apr 2026 15:07:26 +0200
Subject: [PATCH 09/14] Use public classes for .text endpoint

---
 integration/test_tokenize.py           | 48 ++++++++++++--------------
 weaviate/classes/config.py             |  4 +++
 weaviate/classes/tokenization.py       | 15 ++++++++
 weaviate/collections/classes/config.py |  4 +++
 weaviate/tokenization/async_.pyi       |  8 ++---
 weaviate/tokenization/executor.py      |  8 ++---
 weaviate/tokenization/sync.pyi         |  8 ++---
 7 files changed, 57 insertions(+), 38 deletions(-)
 create mode 100644 weaviate/classes/tokenization.py

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index c939e8c5b..a5b16da32 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -1,9 +1,9 @@
 """Integration tests for the tokenization module.
 
 These tests cover the client's responsibilities:
-- Correct serialization of inputs (enums, _TextAnalyzerConfigCreate, _StopwordsCreate)
+- Correct serialization of inputs (enums, TextAnalyzerConfigCreate, StopwordsCreate)
 - Correct deserialization of responses into the TokenizeResult object
-- Client-side validation (_TextAnalyzerConfigCreate, stopwords/stopword_presets mutex)
+- Client-side validation (TextAnalyzerConfigCreate, stopwords/stopword_presets mutex)
 - Version gate (>= 1.37.0)
 - Both sync and async client paths
 
@@ -20,15 +20,15 @@
 import pytest_asyncio
 
 import weaviate
-from weaviate.collections.classes.config import (
+from weaviate.classes.tokenization import (
+    StopwordsCreate,
     StopwordsPreset,
+    TextAnalyzerConfigCreate,
     Tokenization,
-    _StopwordsCreate,
-    _TextAnalyzerConfigCreate,
+    TokenizeResult,
 )
 from weaviate.config import AdditionalConfig
 from weaviate.exceptions import WeaviateUnsupportedFeatureError
-from weaviate.tokenization.models import TokenizeResult
 
 
 @pytest.fixture(scope="module")
@@ -140,7 +140,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "The quick brown fox",
-                    "analyzer_config": _TextAnalyzerConfigCreate(
+                    "analyzer_config": TextAnalyzerConfigCreate(
                         stopword_preset=StopwordsPreset.NONE
                     ),
                 },
@@ -150,7 +150,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "L'école est fermée",
-                    "analyzer_config": _TextAnalyzerConfigCreate(ascii_fold=True),
+                    "analyzer_config": TextAnalyzerConfigCreate(ascii_fold=True),
                 },
                 ["l", "ecole", "est", "fermee"],
                 ["l", "ecole", "est", "fermee"],
@@ -158,7 +158,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "L'école est fermée",
-                    "analyzer_config": _TextAnalyzerConfigCreate(
+                    "analyzer_config": TextAnalyzerConfigCreate(
                         ascii_fold=True, ascii_fold_ignore=["é"]
                     ),
                 },
@@ -168,9 +168,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "The quick brown fox",
-                    "analyzer_config": _TextAnalyzerConfigCreate(
-                        stopword_preset=StopwordsPreset.EN
-                    ),
+                    "analyzer_config": TextAnalyzerConfigCreate(stopword_preset=StopwordsPreset.EN),
                 },
                 ["the", "quick", "brown", "fox"],
                 ["quick", "brown", "fox"],
@@ -178,7 +176,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "The quick brown fox",
-                    "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="en"),
+                    "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="en"),
                 },
                 ["the", "quick", "brown", "fox"],
                 ["quick", "brown", "fox"],
@@ -186,7 +184,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "The école est fermée",
-                    "analyzer_config": _TextAnalyzerConfigCreate(
+                    "analyzer_config": TextAnalyzerConfigCreate(
                         ascii_fold=True,
                         ascii_fold_ignore=["é"],
                         stopword_preset=StopwordsPreset.EN,
@@ -198,7 +196,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "the quick brown fox",
-                    "stopwords": _StopwordsCreate(
+                    "stopwords": StopwordsCreate(
                         preset=StopwordsPreset.EN, additions=["quick"], removals=None
                     ),
                 },
@@ -208,7 +206,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "the quick hello world",
-                    "stopwords": _StopwordsCreate(preset=None, additions=["hello"], removals=None),
+                    "stopwords": StopwordsCreate(preset=None, additions=["hello"], removals=None),
                 },
                 ["the", "quick", "hello", "world"],
                 ["quick", "world"],
@@ -216,7 +214,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "the quick is fast",
-                    "stopwords": _StopwordsCreate(preset=None, additions=None, removals=["the"]),
+                    "stopwords": StopwordsCreate(preset=None, additions=None, removals=["the"]),
                 },
                 ["the", "quick", "is", "fast"],
                 ["the", "quick", "fast"],
@@ -224,7 +222,7 @@ def test_tokenization_enum(
             (
                 {
                     "text": "hello world test",
-                    "analyzer_config": _TextAnalyzerConfigCreate(stopword_preset="custom"),
+                    "analyzer_config": TextAnalyzerConfigCreate(stopword_preset="custom"),
                     "stopword_presets": {"custom": ["test"]},
                 },
                 ["hello", "world", "test"],
@@ -276,7 +274,7 @@ def test_text_from_collection_config(
         result = client.tokenization.text(
             text="the quick brown fox",
             tokenization=recipe.tokenization,
-            stopwords=_StopwordsCreate(**stopwords.__dict__),
+            stopwords=StopwordsCreate(**stopwords.__dict__),
         )
         assert result.indexed == ["the", "quick", "brown", "fox"]
         assert result.query == ["brown", "fox"]
@@ -294,7 +292,7 @@ def test_property_and_generic_endpoints_agree(
         via_generic = client.tokenization.text(
             text=text,
             tokenization=recipe.tokenization,
-            stopwords=_StopwordsCreate(**stopwords.__dict__),
+            stopwords=StopwordsCreate(**stopwords.__dict__),
         )
 
         assert via_property.indexed == via_generic.indexed
@@ -360,7 +358,7 @@ class TestClientSideValidation:
     )
     def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None:
         with pytest.raises(ValueError, match="asciiFoldIgnore"):
-            _TextAnalyzerConfigCreate(**kwargs)
+            TextAnalyzerConfigCreate(**kwargs)
 
     @pytest.mark.parametrize(
         "kwargs,expected",
@@ -385,7 +383,7 @@ def test_ascii_fold_ignore_without_fold_raises(self, kwargs: dict) -> None:
         ids=["fold_with_ignore", "fold_without_ignore", "stopword_preset_only", "empty"],
     )
     def test_valid_config(self, kwargs: dict, expected: dict) -> None:
-        cfg = _TextAnalyzerConfigCreate(**kwargs)
+        cfg = TextAnalyzerConfigCreate(**kwargs)
         for attr, value in expected.items():
             assert getattr(cfg, attr) == value
 
@@ -397,9 +395,7 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli
             client.tokenization.text(
                 text="hello",
                 tokenization=Tokenization.WORD,
-                stopwords=_StopwordsCreate(
-                    preset=StopwordsPreset.EN, additions=None, removals=None
-                ),
+                stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=None, removals=None),
                 stopword_presets={"custom": ["hello"]},
             )
 
@@ -482,7 +478,7 @@ async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -
     async def test_text_with_stopwords_fallback(
         self, async_client: weaviate.WeaviateAsyncClient
     ) -> None:
-        sw = _StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None)
+        sw = StopwordsCreate(preset=StopwordsPreset.EN, additions=["quick"], removals=None)
         result = await async_client.tokenization.text(
             text="the quick brown fox",
             tokenization=Tokenization.WORD,
diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py
index 868cd1c79..c154062d3 100644
--- a/weaviate/classes/config.py
+++ b/weaviate/classes/config.py
@@ -11,8 +11,10 @@
     ReferenceProperty,
     ReplicationDeletionStrategy,
     Rerankers,
+    StopwordsCreate,
     StopwordsPreset,
     TextAnalyzerConfig,
+    TextAnalyzerConfigCreate,
     Tokenization,
     VectorDistances,
 )
@@ -39,8 +41,10 @@
     "PQEncoderType",
     "ReferenceProperty",
     "Rerankers",
+    "StopwordsCreate",
     "StopwordsPreset",
     "TextAnalyzerConfig",
+    "TextAnalyzerConfigCreate",
     "Tokenization",
     "Vectorizers",
     "VectorDistances",
diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py
new file mode 100644
index 000000000..ffb050614
--- /dev/null
+++ b/weaviate/classes/tokenization.py
@@ -0,0 +1,15 @@
+from weaviate.collections.classes.config import (
+    StopwordsCreate,
+    StopwordsPreset,
+    TextAnalyzerConfigCreate,
+    Tokenization,
+)
+from weaviate.tokenization.models import TokenizeResult
+
+__all__ = [
+    "StopwordsCreate",
+    "StopwordsPreset",
+    "TextAnalyzerConfigCreate",
+    "Tokenization",
+    "TokenizeResult",
+]
diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py
index 6d60482a3..068399d70 100644
--- a/weaviate/collections/classes/config.py
+++ b/weaviate/collections/classes/config.py
@@ -1647,6 +1647,7 @@ class _StopwordsConfig(_ConfigBase):
 
 
 StopwordsConfig = _StopwordsConfig
+StopwordsCreate = _StopwordsCreate
 
 
 @dataclass
@@ -2224,6 +2225,9 @@ def _validate_ascii_fold_ignore(self) -> "_TextAnalyzerConfigCreate":
         return self
 
 
+TextAnalyzerConfigCreate = _TextAnalyzerConfigCreate
+
+
 class Property(_ConfigCreateModel):
     """This class defines the structure of a data property that a collection can have within Weaviate.
 
diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi
index 11f4a13fc..59e815d87 100644
--- a/weaviate/tokenization/async_.pyi
+++ b/weaviate/tokenization/async_.pyi
@@ -1,9 +1,9 @@
 from typing import Dict, List, Optional
 
 from weaviate.collections.classes.config import (
+    StopwordsCreate,
+    TextAnalyzerConfigCreate,
     Tokenization,
-    _StopwordsCreate,
-    _TextAnalyzerConfigCreate,
 )
 from weaviate.connect.v4 import ConnectionAsync
 from weaviate.tokenization.models import TokenizeResult
@@ -16,7 +16,7 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[_StopwordsCreate] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
+        stopwords: Optional[StopwordsCreate] = None,
         stopword_presets: Optional[Dict[str, List[str]]] = None,
     ) -> TokenizeResult: ...
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 25b36e1d3..a3beffd44 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -5,9 +5,9 @@
 from httpx import Response
 
 from weaviate.collections.classes.config import (
+    StopwordsCreate,
+    TextAnalyzerConfigCreate,
     Tokenization,
-    _StopwordsCreate,
-    _TextAnalyzerConfigCreate,
 )
 from weaviate.connect import executor
 from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
@@ -32,8 +32,8 @@ def text(
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[_StopwordsCreate] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
+        stopwords: Optional[StopwordsCreate] = None,
         stopword_presets: Optional[Dict[str, List[str]]] = None,
     ) -> executor.Result[TokenizeResult]:
         """Tokenize text using the generic /v1/tokenize endpoint.
diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi
index d931aae51..2c2470f85 100644
--- a/weaviate/tokenization/sync.pyi
+++ b/weaviate/tokenization/sync.pyi
@@ -1,9 +1,9 @@
 from typing import Dict, List, Optional
 
 from weaviate.collections.classes.config import (
+    StopwordsCreate,
+    TextAnalyzerConfigCreate,
     Tokenization,
-    _StopwordsCreate,
-    _TextAnalyzerConfigCreate,
 )
 from weaviate.connect.v4 import ConnectionSync
 from weaviate.tokenization.models import TokenizeResult
@@ -16,7 +16,7 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]):
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[_TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[_StopwordsCreate] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
+        stopwords: Optional[StopwordsCreate] = None,
         stopword_presets: Optional[Dict[str, List[str]]] = None,
     ) -> TokenizeResult: ...

From 55b136adfd37f8289b1aa9ffd3816335b25fd599 Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Tue, 21 Apr 2026 15:40:47 +0200
Subject: [PATCH 10/14] Add overloads for exclusivity of stopwrods

---
 integration/test_tokenize.py      | 13 ++---
 weaviate/tokenization/async_.pyi  | 17 +++++--
 weaviate/tokenization/executor.py | 83 ++++++++++++++++++++++++-------
 weaviate/tokenization/sync.pyi    | 17 +++++--
 4 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index a5b16da32..dc244d2c3 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -274,7 +274,7 @@ def test_text_from_collection_config(
         result = client.tokenization.text(
             text="the quick brown fox",
             tokenization=recipe.tokenization,
-            stopwords=StopwordsCreate(**stopwords.__dict__),
+            stopwords=stopwords,
         )
         assert result.indexed == ["the", "quick", "brown", "fox"]
         assert result.query == ["brown", "fox"]
@@ -292,7 +292,7 @@ def test_property_and_generic_endpoints_agree(
         via_generic = client.tokenization.text(
             text=text,
             tokenization=recipe.tokenization,
-            stopwords=StopwordsCreate(**stopwords.__dict__),
+            stopwords=stopwords,
         )
 
         assert via_property.indexed == via_generic.indexed
@@ -308,13 +308,6 @@ def test_property_and_generic_endpoints_agree(
 class TestDeserialization:
     """Verify the client correctly deserializes response fields into TokenizeResult."""
 
-    def test_generic_result_shape(self, client: weaviate.WeaviateClient) -> None:
-        """Generic endpoint response deserializes into TokenizeResult with indexed and query lists."""
-        result = client.tokenization.text(text="hello", tokenization=Tokenization.WORD)
-        assert isinstance(result, TokenizeResult)
-        assert isinstance(result.indexed, list)
-        assert isinstance(result.query, list)
-
     def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None:
         """Property endpoint response deserializes into TokenizeResult — server resolves tokenization from the property's schema."""
         client.collections.delete("TestDeserPropTypes")
@@ -405,7 +398,7 @@ def test_stopwords_and_stopword_presets_mutex(self, client: weaviate.WeaviateCli
             ({"custom": "hello"}, "must be a list of strings"),
             (
                 {
-                    "custom": _StopwordsCreate(
+                    "custom": StopwordsCreate(
                         preset=StopwordsPreset.EN, additions=None, removals=None
                     ),
                 },
diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi
index 59e815d87..6bd2d9e8a 100644
--- a/weaviate/tokenization/async_.pyi
+++ b/weaviate/tokenization/async_.pyi
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, overload
 
 from weaviate.collections.classes.config import (
     StopwordsCreate,
@@ -11,12 +11,21 @@ from weaviate.tokenization.models import TokenizeResult
 from .executor import _TokenizationExecutor
 
 class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
+    @overload
     async def text(
         self,
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[StopwordsCreate] = None,
-        stopword_presets: Optional[Dict[str, List[str]]] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopwords: Optional[StopwordsCreate] = ...,
+    ) -> TokenizeResult: ...
+    @overload
+    async def text(
+        self,
+        text: str,
+        tokenization: Tokenization,
+        *,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopword_presets: Optional[Dict[str, List[str]]] = ...,
     ) -> TokenizeResult: ...
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index a3beffd44..150cc6dd9 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -1,6 +1,6 @@
 """Tokenize executor."""
 
-from typing import Any, Dict, Generic, List, Optional
+from typing import Any, Dict, Generic, List, Optional, overload
 
 from httpx import Response
 
@@ -27,6 +27,29 @@ def __check_version(self) -> None:
                 "1.37.0",
             )
 
+    # Overloads make ``stopwords`` and ``stopword_presets`` mutually exclusive
+    # at type-check time. Passing both is additionally rejected at runtime with
+    # ``ValueError`` in the implementation below.
+    @overload
+    def text(
+        self,
+        text: str,
+        tokenization: Tokenization,
+        *,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopwords: Optional[StopwordsCreate] = ...,
+    ) -> executor.Result[TokenizeResult]: ...
+
+    @overload
+    def text(
+        self,
+        text: str,
+        tokenization: Tokenization,
+        *,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopword_presets: Optional[Dict[str, List[str]]] = ...,
+    ) -> executor.Result[TokenizeResult]: ...
+
     def text(
         self,
         text: str,
@@ -40,33 +63,55 @@ def text(
 
         For ``word`` tokenization the server defaults to the built-in ``en``
         stopword preset when no stopword configuration is supplied. Pass
-        ``analyzer_config=Configure.text_analyzer(stopword_preset=StopwordsPreset.NONE)``
-        (or equivalent) to opt out.
+        ``analyzer_config=TextAnalyzerConfigCreate(stopword_preset="none")``
+        or equivalent to opt out.
+
+        Call patterns for stopword handling (``stopwords`` and
+        ``stopword_presets`` are mutually exclusive — pass at most one):
+
+        1. **No stopword config** — rely on the server default (``en`` for
+           word tokenization, none otherwise)::
+
+               client.tokenization.text(text=..., tokenization=Tokenization.WORD)
+
+        2. **Apply a one-off stopwords block** via ``stopwords`` — the block
+           filters the query tokens directly, same shape as a collection's
+           ``invertedIndexConfig.stopwords``::
+
+               client.tokenization.text(
+                   text=...,
+                   tokenization=Tokenization.WORD,
+                   stopwords=StopwordsCreate(preset=StopwordsPreset.EN, additions=["foo"]),
+               )
+
+        3. **Register a named-preset catalog** via ``stopword_presets`` and
+           reference one by name from ``analyzer_config.stopword_preset``.
+           The catalog can also override built-in presets such as ``en``::
+
+               client.tokenization.text(
+                   text=...,
+                   tokenization=Tokenization.WORD,
+                   analyzer_config=TextAnalyzerConfigCreate(stopword_preset="custom"),
+                   stopword_presets={"custom": ["foo", "bar"]},
+               )
 
         Args:
             text: The text to tokenize.
-            tokenization: The tokenization method to use (e.g. Tokenization.WORD).
+            tokenization: The tokenization method to use (e.g. ``Tokenization.WORD``).
             analyzer_config: Text analyzer settings (ASCII folding, stopword
                 preset name), built via ``Configure.text_analyzer(...)``.
                 ``stopword_preset`` may reference a built-in preset
                 (``en`` / ``none``) or a name defined in ``stopword_presets``.
-            stopwords: Fallback stopword config applied when
-                ``analyzer_config.stopword_preset`` is not set. Same shape as a
-                collection's ``invertedIndexConfig.stopwords`` — a base preset
-                optionally tweaked with ``additions`` / ``removals``. An empty
-                ``preset`` defaults to ``en``.
-            stopword_presets: User-defined named stopword presets, each a plain
-                list of words. A name matching a built-in (``en`` / ``none``)
-                replaces the built-in entirely.
-
-        Note:
-            ``stopwords`` and ``stopword_presets`` are mutually exclusive on the
-            server — pass one or the other, not both. The server returns HTTP
-            422 if both are supplied.
+            stopwords: One-off stopwords block applied directly to this request.
+                Mutually exclusive with ``stopword_presets``.
+            stopword_presets: Named-preset catalog (name → word list). Entries
+                can be referenced from ``analyzer_config.stopword_preset`` or
+                override built-ins like ``en``. Mutually exclusive with
+                ``stopwords``.
 
         Returns:
-            A TokenizeResult with indexed and query token lists. The response
-            does not echo request fields back.
+            A ``TokenizeResult`` with indexed and query token lists. The generic
+            endpoint does not echo request fields back in the response.
 
         Raises:
             WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi
index 2c2470f85..7edf8994a 100644
--- a/weaviate/tokenization/sync.pyi
+++ b/weaviate/tokenization/sync.pyi
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, overload
 
 from weaviate.collections.classes.config import (
     StopwordsCreate,
@@ -11,12 +11,21 @@ from weaviate.tokenization.models import TokenizeResult
 from .executor import _TokenizationExecutor
 
 class _Tokenization(_TokenizationExecutor[ConnectionSync]):
+    @overload
     def text(
         self,
         text: str,
         tokenization: Tokenization,
         *,
-        analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[StopwordsCreate] = None,
-        stopword_presets: Optional[Dict[str, List[str]]] = None,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopwords: Optional[StopwordsCreate] = ...,
+    ) -> TokenizeResult: ...
+    @overload
+    def text(
+        self,
+        text: str,
+        tokenization: Tokenization,
+        *,
+        analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
+        stopword_presets: Optional[Dict[str, List[str]]] = ...,
     ) -> TokenizeResult: ...

From 7924e457cdd3315325db9a9e7ce2cc51bf7d6d04 Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Tue, 21 Apr 2026 15:58:08 +0200
Subject: [PATCH 11/14] Accept collection config classes as stopwords

---
 weaviate/classes/tokenization.py       |  2 ++
 weaviate/collections/classes/config.py | 20 ++++++++++++++++++++
 weaviate/tokenization/async_.pyi       |  5 +++--
 weaviate/tokenization/executor.py      | 21 +++++++++++++++++----
 weaviate/tokenization/sync.pyi         |  5 +++--
 5 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/weaviate/classes/tokenization.py b/weaviate/classes/tokenization.py
index ffb050614..0e89fc64b 100644
--- a/weaviate/classes/tokenization.py
+++ b/weaviate/classes/tokenization.py
@@ -1,4 +1,5 @@
 from weaviate.collections.classes.config import (
+    StopwordsConfig,
     StopwordsCreate,
     StopwordsPreset,
     TextAnalyzerConfigCreate,
@@ -7,6 +8,7 @@
 from weaviate.tokenization.models import TokenizeResult
 
 __all__ = [
+    "StopwordsConfig",
     "StopwordsCreate",
     "StopwordsPreset",
     "TextAnalyzerConfigCreate",
diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py
index 068399d70..43d86375d 100644
--- a/weaviate/collections/classes/config.py
+++ b/weaviate/collections/classes/config.py
@@ -1,5 +1,6 @@
 import datetime
 from dataclasses import dataclass
+from dataclasses import fields as _dataclass_fields
 from typing import (
     Any,
     ClassVar,
@@ -1649,6 +1650,25 @@ class _StopwordsConfig(_ConfigBase):
 StopwordsConfig = _StopwordsConfig
 StopwordsCreate = _StopwordsCreate
 
+# Invariant: the read-side dataclass (_StopwordsConfig) and the write-side
+# pydantic model (_StopwordsCreate) must carry the same set of field names so
+# that values round-tripped from ``collection.config.get()`` can flow back into
+# ``tokenization.text()`` without silent data loss. If a field is added to one
+# but not the other, importing this module fails loudly; the read→write
+# conversion in ``weaviate/tokenization/executor.py::_TokenizationExecutor.text``
+# depends on this parity.
+_read_fields = {f.name for f in _dataclass_fields(_StopwordsConfig)}
+_write_fields = set(_StopwordsCreate.model_fields.keys())
+if _read_fields != _write_fields:
+    raise RuntimeError(
+        "_StopwordsConfig / _StopwordsCreate field drift detected — "
+        f"read-only={_read_fields - _write_fields}, "
+        f"write-only={_write_fields - _read_fields}. "
+        "Update both classes together, or adapt the read→write conversion in "
+        "weaviate/tokenization/executor.py::_TokenizationExecutor.text."
+    )
+del _read_fields, _write_fields
+
 
 @dataclass
 class _InvertedIndexConfig(_ConfigBase):
diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi
index 6bd2d9e8a..156e25c90 100644
--- a/weaviate/tokenization/async_.pyi
+++ b/weaviate/tokenization/async_.pyi
@@ -1,6 +1,7 @@
-from typing import Dict, List, Optional, overload
+from typing import Dict, List, Optional, Union, overload
 
 from weaviate.collections.classes.config import (
+    StopwordsConfig,
     StopwordsCreate,
     TextAnalyzerConfigCreate,
     Tokenization,
@@ -18,7 +19,7 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
         tokenization: Tokenization,
         *,
         analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
-        stopwords: Optional[StopwordsCreate] = ...,
+        stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ...,
     ) -> TokenizeResult: ...
     @overload
     async def text(
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 150cc6dd9..ea36e1cda 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -1,10 +1,11 @@
 """Tokenize executor."""
 
-from typing import Any, Dict, Generic, List, Optional, overload
+from typing import Any, Dict, Generic, List, Optional, Union, overload
 
 from httpx import Response
 
 from weaviate.collections.classes.config import (
+    StopwordsConfig,
     StopwordsCreate,
     TextAnalyzerConfigCreate,
     Tokenization,
@@ -29,7 +30,10 @@ def __check_version(self) -> None:
 
     # Overloads make ``stopwords`` and ``stopword_presets`` mutually exclusive
     # at type-check time. Passing both is additionally rejected at runtime with
-    # ``ValueError`` in the implementation below.
+    # ``ValueError`` in the implementation below. ``stopwords`` accepts either a
+    # ``StopwordsCreate`` (the write-side shape) or a ``StopwordsConfig`` (the
+    # read-side shape returned by ``collection.config.get()``), so values round-
+    # tripped through config reads can be passed back in directly.
     @overload
     def text(
         self,
@@ -37,7 +41,7 @@ def text(
         tokenization: Tokenization,
         *,
         analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
-        stopwords: Optional[StopwordsCreate] = ...,
+        stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ...,
     ) -> executor.Result[TokenizeResult]: ...
 
     @overload
@@ -56,7 +60,7 @@ def text(
         tokenization: Tokenization,
         *,
         analyzer_config: Optional[TextAnalyzerConfigCreate] = None,
-        stopwords: Optional[StopwordsCreate] = None,
+        stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = None,
         stopword_presets: Optional[Dict[str, List[str]]] = None,
     ) -> executor.Result[TokenizeResult]:
         """Tokenize text using the generic /v1/tokenize endpoint.
@@ -134,6 +138,15 @@ def text(
                 payload["analyzerConfig"] = ac_dict
 
         if stopwords is not None:
+            if isinstance(stopwords, StopwordsConfig):
+                # Widen from the read-side shape returned by config.get() to the
+                # write-side shape the server expects. Field parity between the
+                # two classes is enforced at import time in
+                # ``weaviate/collections/classes/config.py``, so iterating
+                # ``StopwordsCreate.model_fields`` copies every field.
+                stopwords = StopwordsCreate(
+                    **{name: getattr(stopwords, name) for name in StopwordsCreate.model_fields}
+                )
             sw_dict = stopwords._to_dict()
             if sw_dict:
                 payload["stopwords"] = sw_dict
diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi
index 7edf8994a..389edd485 100644
--- a/weaviate/tokenization/sync.pyi
+++ b/weaviate/tokenization/sync.pyi
@@ -1,6 +1,7 @@
-from typing import Dict, List, Optional, overload
+from typing import Dict, List, Optional, Union, overload
 
 from weaviate.collections.classes.config import (
+    StopwordsConfig,
     StopwordsCreate,
     TextAnalyzerConfigCreate,
     Tokenization,
@@ -18,7 +19,7 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]):
         tokenization: Tokenization,
         *,
         analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
-        stopwords: Optional[StopwordsCreate] = ...,
+        stopwords: Optional[Union[StopwordsCreate, StopwordsConfig]] = ...,
     ) -> TokenizeResult: ...
     @overload
     def text(

From 64bed62ea2dc6f3a05984d1ae4ce0700600027c7 Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Tue, 21 Apr 2026 16:24:06 +0200
Subject: [PATCH 12/14] Improve docstring

---
 weaviate/tokenization/executor.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index ea36e1cda..0d287ba0e 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -107,11 +107,15 @@ def text(
                 ``stopword_preset`` may reference a built-in preset
                 (``en`` / ``none``) or a name defined in ``stopword_presets``.
             stopwords: One-off stopwords block applied directly to this request.
+                Mirrors the collection-level ``invertedIndexConfig.stopwords``
+                shape — hence the rich model with preset / additions / removals.
                 Mutually exclusive with ``stopword_presets``.
-            stopword_presets: Named-preset catalog (name → word list). Entries
-                can be referenced from ``analyzer_config.stopword_preset`` or
-                override built-ins like ``en``. Mutually exclusive with
-                ``stopwords``.
+            stopword_presets: Named-preset catalog (name → word list). Mirrors
+                the property-level preset catalog — a plain mapping, since a
+                property only references a preset by name (via
+                ``analyzer_config.stopword_preset``) rather than carrying the
+                full stopwords block. Entries can override built-ins like
+                ``en``. Mutually exclusive with ``stopwords``.
 
         Returns:
             A ``TokenizeResult`` with indexed and query token lists. The generic

From 220e839360848a8c67b7eab322a97b232d12d5c9 Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Tue, 21 Apr 2026 16:26:17 +0200
Subject: [PATCH 13/14] Hook up tokenization and clean up model

---
 weaviate/classes/__init__.py    | 2 ++
 weaviate/tokenization/models.py | 4 +---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/weaviate/classes/__init__.py b/weaviate/classes/__init__.py
index d495744ac..69af5d920 100644
--- a/weaviate/classes/__init__.py
+++ b/weaviate/classes/__init__.py
@@ -13,6 +13,7 @@
     rbac,
     replication,
     tenants,
+    tokenization,
 )  # noqa: F401
 from .config import ConsistencyLevel
 
@@ -29,6 +30,7 @@
     "init",
     "query",
     "tenants",
+    "tokenization",
     "rbac",
     "replication",
 ]
diff --git a/weaviate/tokenization/models.py b/weaviate/tokenization/models.py
index f8fe7cb67..baeac140c 100644
--- a/weaviate/tokenization/models.py
+++ b/weaviate/tokenization/models.py
@@ -2,7 +2,7 @@
 
 from typing import List
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel
 
 
 class TokenizeResult(BaseModel):
@@ -13,7 +13,5 @@ class TokenizeResult(BaseModel):
         query: Tokens as they would be used for querying (after stopword removal).
     """
 
-    model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True)
-
     indexed: List[str]
     query: List[str]

From 081aaef36f83890eeb839e44363851477c44d1ca Mon Sep 17 00:00:00 2001
From: Dirk Kulawiak <dirk@semi.technology>
Date: Wed, 22 Apr 2026 16:54:36 +0200
Subject: [PATCH 14/14] Move property back to tokenization

---
 integration/test_tokenize.py            | 18 ++++++-----
 weaviate/collections/config/async_.pyi  |  2 --
 weaviate/collections/config/executor.py | 40 -------------------------
 weaviate/collections/config/sync.pyi    |  2 --
 weaviate/tokenization/async_.pyi        |  3 ++
 weaviate/tokenization/executor.py       | 40 +++++++++++++++++++++++++
 weaviate/tokenization/sync.pyi          |  1 +
 7 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/integration/test_tokenize.py b/integration/test_tokenize.py
index dc244d2c3..d2d46916d 100644
--- a/integration/test_tokenize.py
+++ b/integration/test_tokenize.py
@@ -288,7 +288,9 @@ def test_property_and_generic_endpoints_agree(
         stopwords = config.inverted_index_config.stopwords
 
         text = "the quick brown fox"
-        via_property = recipe_collection.config.tokenize_property(property_name="recipe", text=text)
+        via_property = client.tokenization.for_property(
+            collection=recipe_collection.name, property_name="recipe", text=text
+        )
         via_generic = client.tokenization.text(
             text=text,
             tokenization=recipe.tokenization,
@@ -325,8 +327,9 @@ def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None:
                     ],
                 }
             )
-            col = client.collections.get("TestDeserPropTypes")
-            result = col.config.tokenize_property(property_name="tag", text="  Hello World  ")
+            result = client.tokenization.for_property(
+                collection="TestDeserPropTypes", property_name="tag", text="  Hello World  "
+            )
             assert isinstance(result, TokenizeResult)
             assert result.indexed == ["Hello World"]
         finally:
@@ -442,9 +445,8 @@ def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> Non
     def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
         if client._connection._weaviate_version.is_at_least(1, 37, 0):
             pytest.skip("Version gate only applies to Weaviate < 1.37.0")
-        col = client.collections.get("Any")
         with pytest.raises(WeaviateUnsupportedFeatureError):
-            col.config.tokenize_property(property_name="title", text="hello")
+            client.tokenization.for_property(collection="Any", property_name="title", text="hello")
 
 
 # ---------------------------------------------------------------------------
@@ -454,7 +456,7 @@ def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateC
 
 @pytest.mark.usefixtures("require_1_37")
 class TestAsyncClient:
-    """Verify text() and tokenize_property() work through the async client."""
+    """Verify tokenization.text() and tokenization.for_property() work through the async client."""
 
     @pytest.mark.asyncio
     async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
@@ -498,8 +500,8 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
                     ],
                 }
             )
-            col = async_client.collections.get("TestAsyncPropTokenize")
-            result = await col.config.tokenize_property(
+            result = await async_client.tokenization.for_property(
+                collection="TestAsyncPropTokenize",
                 property_name="title",
                 text="The quick brown fox",
             )
diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi
index a1f740ded..015b70dab 100644
--- a/weaviate/collections/config/async_.pyi
+++ b/weaviate/collections/config/async_.pyi
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
 from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
 from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
 from weaviate.connect.v4 import ConnectionAsync
-from weaviate.tokenization.models import TokenizeResult
 
 from .executor import _ConfigCollectionExecutor
 
@@ -91,4 +90,3 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]):
         self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
     ) -> None: ...
     async def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
-    async def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...
diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py
index fe9f5ec0d..103ab70ac 100644
--- a/weaviate/collections/config/executor.py
+++ b/weaviate/collections/config/executor.py
@@ -56,7 +56,6 @@
     WeaviateInvalidInputError,
     WeaviateUnsupportedFeatureError,
 )
-from weaviate.tokenization.models import TokenizeResult
 from weaviate.util import (
     _capitalize_first_letter,
     _decode_json_response_dict,
@@ -667,42 +666,3 @@ def resp(res: Response) -> bool:
             error_msg="Property may not exist",
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"),
         )
-
-    def tokenize_property(
-        self,
-        property_name: str,
-        text: str,
-    ) -> executor.Result[TokenizeResult]:
-        """Tokenize text using a property's configured tokenization settings.
-
-        Args:
-            property_name: The property name whose tokenization config to use.
-            text: The text to tokenize.
-
-        Returns:
-            A TokenizeResult with indexed and query token lists.
-
-        Raises:
-            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
-        """
-        if self._connection._weaviate_version.is_lower_than(1, 37, 0):
-            raise WeaviateUnsupportedFeatureError(
-                "Tokenization",
-                str(self._connection._weaviate_version),
-                "1.37.0",
-            )
-
-        path = f"/schema/{self._name}/properties/{property_name}/tokenize"
-        payload: Dict[str, Any] = {"text": text}
-
-        def resp(response: Response) -> TokenizeResult:
-            return TokenizeResult.model_validate(response.json())
-
-        return executor.execute(
-            response_callback=resp,
-            method=self._connection.post,
-            path=path,
-            weaviate_object=payload,
-            error_msg="Property tokenization failed",
-            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
-        )
diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi
index 3664a0e1b..e54d8c8fc 100644
--- a/weaviate/collections/config/sync.pyi
+++ b/weaviate/collections/config/sync.pyi
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
 from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
 from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
 from weaviate.connect.v4 import ConnectionSync
-from weaviate.tokenization.models import TokenizeResult
 
 from .executor import _ConfigCollectionExecutor
 
@@ -89,4 +88,3 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]):
         self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
     ) -> None: ...
     def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
-    def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...
diff --git a/weaviate/tokenization/async_.pyi b/weaviate/tokenization/async_.pyi
index 156e25c90..ba12abc2a 100644
--- a/weaviate/tokenization/async_.pyi
+++ b/weaviate/tokenization/async_.pyi
@@ -30,3 +30,6 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
         analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
         stopword_presets: Optional[Dict[str, List[str]]] = ...,
     ) -> TokenizeResult: ...
+    async def for_property(
+        self, collection: str, property_name: str, text: str
+    ) -> TokenizeResult: ...
diff --git a/weaviate/tokenization/executor.py b/weaviate/tokenization/executor.py
index 0d287ba0e..33f1c05f9 100644
--- a/weaviate/tokenization/executor.py
+++ b/weaviate/tokenization/executor.py
@@ -14,6 +14,7 @@
 from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
 from weaviate.exceptions import WeaviateUnsupportedFeatureError
 from weaviate.tokenization.models import TokenizeResult
+from weaviate.util import _capitalize_first_letter
 
 
 class _TokenizationExecutor(Generic[ConnectionType]):
@@ -189,3 +190,42 @@ def resp(response: Response) -> TokenizeResult:
             error_msg="Tokenization failed",
             status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"),
         )
+
+    def for_property(
+        self,
+        collection: str,
+        property_name: str,
+        text: str,
+    ) -> executor.Result[TokenizeResult]:
+        """Tokenize text using a property's configured tokenization settings.
+
+        The server resolves the tokenization and analyzer configuration from
+        the property's schema, so callers only supply the text.
+
+        Args:
+            collection: The collection that owns the property.
+            property_name: The property name whose tokenization config to use.
+            text: The text to tokenize.
+
+        Returns:
+            A TokenizeResult with indexed and query token lists.
+
+        Raises:
+            WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
+        """
+        self.__check_version()
+
+        path = f"/schema/{_capitalize_first_letter(collection)}/properties/{property_name}/tokenize"
+        payload: Dict[str, Any] = {"text": text}
+
+        def resp(response: Response) -> TokenizeResult:
+            return TokenizeResult.model_validate(response.json())
+
+        return executor.execute(
+            response_callback=resp,
+            method=self._connection.post,
+            path=path,
+            weaviate_object=payload,
+            error_msg="Property tokenization failed",
+            status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
+        )
diff --git a/weaviate/tokenization/sync.pyi b/weaviate/tokenization/sync.pyi
index 389edd485..71aaaea5c 100644
--- a/weaviate/tokenization/sync.pyi
+++ b/weaviate/tokenization/sync.pyi
@@ -30,3 +30,4 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]):
         analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
         stopword_presets: Optional[Dict[str, List[str]]] = ...,
     ) -> TokenizeResult: ...
+    def for_property(self, collection: str, property_name: str, text: str) -> TokenizeResult: ...