From 2c36fc6e0d8744b09a2d3afd65975df50428f28b Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 14:42:31 +0200 Subject: [PATCH 01/10] Add TurboVec --- pyproject.toml | 7 ++- uv.lock | 21 ++++++- vicinity/backends/__init__.py | 6 ++ vicinity/backends/turbovec.py | 109 ++++++++++++++++++++++++++++++++++ vicinity/datatypes.py | 1 + 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 vicinity/backends/turbovec.py diff --git a/pyproject.toml b/pyproject.toml index 67b110f..d268351 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ annoy = ["annoy"] faiss = ["faiss-cpu"] usearch = ["usearch"] voyager = ["voyager"] +turbovec = ["turbovec"] backends = [ "hnswlib", "pynndescent>=0.5.10", @@ -69,7 +70,8 @@ backends = [ "annoy", "faiss-cpu", "usearch", - "voyager" + "voyager", + "turbovec" ] all = [ @@ -82,7 +84,8 @@ all = [ "annoy", "faiss-cpu", "usearch", - "voyager" + "voyager", + "turbovec" ] [project.urls] diff --git a/uv.lock b/uv.lock index 9023149..1e1719b 100644 --- a/uv.lock +++ b/uv.lock @@ -1577,6 +1577,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "turbovec" +version = "0.1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/05/f9c8de1eea79a69c43b0760a026e34cbeeda063038bbd9b99425d897c902/turbovec-0.1.3.tar.gz", hash = "sha256:140a433438f102e17947875a231f8cce50fd79b5dc381672f6510cd346cfb0d1", size = 44254, upload-time = "2026-04-15T14:40:18.158Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ec/3bc9e28852b6c2a5d2a9e88048c5bca0363e641e3956f4d1f9dc6eecc7a2/turbovec-0.1.3-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:34e50b9448759933b67136ccc7ab2cedfedcd4b4ae41441b94c93d16274fd686", size = 761398, upload-time = "2026-04-15T14:40:13.817Z" }, + { url = "https://files.pythonhosted.org/packages/ea/3e/2240c4401bdce3463d517ba652a7aa31f794bc30b10985694572ef2c30d9/turbovec-0.1.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:21b1b77113cfabb9e023b6b922ecd97e3098ec3665395234a1c581a93fe76347", size = 900829, upload-time = "2026-04-15T14:40:15.324Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ce/5406ccb0efa79bf01ba9aa965262003a6c772cf09ceb940023491df0105f/turbovec-0.1.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c14e19bdbb6e833505fb1bc8eee2791bee99355d045c82e6f350f6b39f7663c5", size = 755675, upload-time = "2026-04-15T14:40:16.64Z" }, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1662,6 +1673,7 @@ all = [ { name = "numba" }, { name = "numpy" }, { name = "pynndescent" }, + { name = "turbovec" }, { name = "usearch" }, { name = "voyager" }, ] @@ -1676,6 +1688,7 @@ backends = [ { name = "numba" }, { name = "numpy" }, { name = "pynndescent" }, + { name = "turbovec" }, { name = "usearch" }, { name = "voyager" }, ] @@ -1707,6 +1720,9 @@ pynndescent = [ { name = "numpy" }, { name = "pynndescent" }, ] +turbovec = [ + { name = "turbovec" }, +] usearch = [ { name = "usearch" }, ] @@ -1751,6 +1767,9 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "setuptools", marker = "extra == 'dev'" }, { name = "tqdm" }, + { name = "turbovec", marker = "extra == 'all'" }, + { name = "turbovec", marker = "extra == 'backends'" }, + { name = "turbovec", marker = "extra == 'turbovec'" }, { name = "usearch", marker = "extra == 'all'" }, { name = "usearch", marker = "extra == 'backends'" }, { name = "usearch", marker = "extra == 'usearch'" }, @@ -1758,7 +1777,7 @@ requires-dist = [ { name = "voyager", marker = "extra == 'backends'" }, { name = "voyager", marker = "extra == 'voyager'" }, ] -provides-extras = ["dev", "huggingface", "integrations", "hnsw", "pynndescent", "annoy", "faiss", "usearch", "voyager", "backends", "all"] +provides-extras = ["dev", "huggingface", "integrations", "hnsw", "pynndescent", "annoy", "faiss", "usearch", "voyager", "turbovec", "backends", "all"] [[package]] name = "virtualenv" diff --git a/vicinity/backends/__init__.py b/vicinity/backends/__init__.py index 6c5720c..c469099 100644 --- a/vicinity/backends/__init__.py +++ b/vicinity/backends/__init__.py @@ -62,5 +62,11 @@ def get_backend_class(backend: Backend | str) -> type[AbstractBackend]: return VoyagerBackend + elif backend == Backend.TURBOVEC: + _require("turbovec", backend, "turbovec") + from vicinity.backends.turbovec import TurboVecBackend + + return TurboVecBackend + __all__ = ["get_backend_class", "AbstractBackend", "BasicVectorStore"] diff --git a/vicinity/backends/turbovec.py b/vicinity/backends/turbovec.py new file mode 100644 index 0000000..11db267 --- /dev/null +++ b/vicinity/backends/turbovec.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import numpy as np +from numpy import typing as npt +from turbovec import TurboQuantIndex + +from vicinity.backends.base import AbstractBackend, BaseArgs +from vicinity.datatypes import Backend, QueryResult +from vicinity.utils import Metric + + +@dataclass +class TurboVecArgs(BaseArgs): + dim: int = 0 + metric: Metric = Metric.COSINE + bit_width: int = 4 + + +class TurboVecBackend(AbstractBackend[TurboVecArgs]): + argument_class = TurboVecArgs + supported_metrics = {Metric.COSINE} + + def __init__( + self, + index: TurboQuantIndex, + arguments: TurboVecArgs, + ) -> None: + """Initialize the backend using TurboVec.""" + super().__init__(arguments) + self.index = index + + @classmethod + def from_vectors( + cls: type[TurboVecBackend], + vectors: npt.NDArray, + metric: str | Metric = Metric.COSINE, + bit_width: int = 4, + **kwargs: Any, + ) -> TurboVecBackend: + """Create a new instance from vectors.""" + metric_enum = Metric.from_string(metric) + + if metric_enum not in cls.supported_metrics: + raise ValueError( + f"Metric '{metric_enum.value}' is not supported by TurboVecBackend. Only cosine is supported." + ) + + dim = vectors.shape[1] + index = TurboQuantIndex(dim=dim, bit_width=bit_width) + index.add(vectors.astype(np.float32)) + arguments = TurboVecArgs(dim=dim, metric=metric_enum, bit_width=bit_width) + return cls(index, arguments) + + @property + def backend_type(self) -> Backend: + """The type of the backend.""" + return Backend.TURBOVEC + + @property + def dim(self) -> int: + """Get the dimension of the space.""" + return self.arguments.dim + + def __len__(self) -> int: + """Get the number of vectors.""" + return len(self.index) + + @classmethod + def load(cls: type[TurboVecBackend], path: Path) -> TurboVecBackend: + """Load the index from a path.""" + index_path = path / "index.tq" + arguments = TurboVecArgs.load(path / "arguments.json") + index = TurboQuantIndex.load(str(index_path)) + return cls(index, arguments=arguments) + + def save(self, path: Path) -> None: + """Save the index to a path.""" + self.index.write(str(path / "index.tq")) + self.arguments.dump(path / "arguments.json") + + def query(self, vectors: npt.NDArray, k: int) -> QueryResult: + """Query the backend and return results as tuples of keys and distances.""" + k = min(k, len(self)) + scores_batch, indices_batch = self.index.search(vectors.astype(np.float32), k=k) + # turbovec returns cosine similarity scores (higher=better); convert to cosine distance + distances_batch = 1.0 - scores_batch + return [(indices_batch[i], distances_batch[i].astype(np.float32)) for i in range(len(vectors))] + + def insert(self, vectors: npt.NDArray) -> None: + """Insert vectors into the backend.""" + self.index.add(vectors.astype(np.float32)) + + def delete(self, indices: list[int]) -> None: + """Delete vectors from the index (not supported by TurboVec).""" + raise NotImplementedError("Dynamic deletion is not supported in TurboVec.") + + def threshold(self, vectors: npt.NDArray, threshold: float, max_k: int) -> QueryResult: + """Query vectors within a distance threshold and return keys and distances.""" + out: QueryResult = [] + for keys_row, distances_row in self.query(vectors, max_k): + keys_row = np.array(keys_row) + distances_row = np.array(distances_row, dtype=np.float32) + mask = distances_row <= threshold + out.append((keys_row[mask], distances_row[mask])) + return out diff --git a/vicinity/datatypes.py b/vicinity/datatypes.py index f09db5e..c332b30 100644 --- a/vicinity/datatypes.py +++ b/vicinity/datatypes.py @@ -25,3 +25,4 @@ class Backend(str, Enum): FAISS = "faiss" USEARCH = "usearch" VOYAGER = "voyager" + TURBOVEC = "turbovec" From 5af42b68f1347d90c61df8143077e39f3e2d7672 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 14:47:01 +0200 Subject: [PATCH 02/10] Update tests --- tests/conftest.py | 5 +++-- tests/test_vicinity.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 63a8199..1c3b40a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,13 +22,13 @@ @pytest.fixture(scope="session") -def items() -> list[str]: +def items() -> list[str | dict[str, object]]: """Fixture providing a list of item names.""" return [f"item{i}" if i % 2 == 0 else {"name": f"item{i}", "id": i} for i in range(1, 10001)] @pytest.fixture(scope="session") -def non_serializable_items() -> list[str]: +def non_serializable_items() -> list[object]: """Fixture providing a list of non-serializable items.""" class NonSerializable: @@ -58,6 +58,7 @@ def query_vector() -> np.ndarray: (Backend.PYNNDESCENT, None), (Backend.USEARCH, None), (Backend.VOYAGER, None), + (Backend.TURBOVEC, None), ] diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index a30c864..fbabc12 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -88,8 +88,9 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param vicinity_instance: A Vicinity instance. :param query_vector: A query vector. """ - if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: - # Skip insert for HNSW or Annoy backends. + if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT, Backend.TURBOVEC}: + # Skip insert for HNSW, Annoy, PyNNDescent backends (no support) and + # TURBOVEC (quantization at d=8 makes rank-based assertions unreliable). return new_item = ["item10001"] new_vector = query_vector From 6d3d60858918e7a1c217612cfd28fe8a9a84fd66 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 14:49:37 +0200 Subject: [PATCH 03/10] Validate bit_width --- vicinity/backends/turbovec.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vicinity/backends/turbovec.py b/vicinity/backends/turbovec.py index 11db267..be724b4 100644 --- a/vicinity/backends/turbovec.py +++ b/vicinity/backends/turbovec.py @@ -45,9 +45,10 @@ def from_vectors( metric_enum = Metric.from_string(metric) if metric_enum not in cls.supported_metrics: - raise ValueError( - f"Metric '{metric_enum.value}' is not supported by TurboVecBackend. Only cosine is supported." - ) + raise ValueError(f"Metric '{metric_enum.value}' is not supported by TurboVecBackend.") + + if bit_width not in (2, 3, 4): + raise ValueError(f"bit_width must be 2, 3, or 4, got {bit_width}.") dim = vectors.shape[1] index = TurboQuantIndex(dim=dim, bit_width=bit_width) @@ -86,7 +87,7 @@ def query(self, vectors: npt.NDArray, k: int) -> QueryResult: """Query the backend and return results as tuples of keys and distances.""" k = min(k, len(self)) scores_batch, indices_batch = self.index.search(vectors.astype(np.float32), k=k) - # turbovec returns cosine similarity scores (higher=better); convert to cosine distance + # TurboVec returns cosine similarity scores; convert to cosine distance distances_batch = 1.0 - scores_batch return [(indices_batch[i], distances_batch[i].astype(np.float32)) for i in range(len(vectors))] From dc2bf4ae3b9dd10f18d6d0b70b483c2357add45e Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 14:50:59 +0200 Subject: [PATCH 04/10] Update tests --- tests/conftest.py | 4 ++-- tests/test_vicinity.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1c3b40a..977eded 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,13 +42,13 @@ def __init__(self, name: str, id: int) -> None: @pytest.fixture(scope="session") def vectors() -> np.ndarray: """Fixture providing an array of vectors corresponding to items.""" - return random_gen.random((10000, 8)) + return random_gen.random((10000, 16)) @pytest.fixture(scope="session") def query_vector() -> np.ndarray: """Fixture providing a query vector.""" - return random_gen.random(8) + return random_gen.random(16) BACKEND_PARAMS = [(Backend.FAISS, index_type) for index_type in _faiss_index_types] + [ diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index fbabc12..4e24a86 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -88,9 +88,8 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param vicinity_instance: A Vicinity instance. :param query_vector: A query vector. """ - if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT, Backend.TURBOVEC}: - # Skip insert for HNSW, Annoy, PyNNDescent backends (no support) and - # TURBOVEC (quantization at d=8 makes rank-based assertions unreliable). + if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: + # Skip insert for HNSW, Annoy and PyNNDescent backends (no support). return new_item = ["item10001"] new_vector = query_vector From a5f212a169c68ccacafbd994a0fa6788517e1101 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 14:58:13 +0200 Subject: [PATCH 05/10] Install openblas --- .github/workflows/ci.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d535497..df25bea 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,6 +29,11 @@ jobs: python-version: ${{ matrix.python-version }} allow-prereleases: true + # Install OpenBLAS on Ubuntu (required by the turbovec wheel) + - name: Install system dependencies (Ubuntu) + if: ${{ runner.os == 'Linux' }} + run: sudo apt-get install -y libopenblas-dev + # Step for Windows: Create and activate a virtual environment - name: Create and activate a virtual environment (Windows) if: ${{ runner.os == 'Windows' }} From 8abe01590a066ad6b77b4fec9b6c3fb9541da226 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 15:03:27 +0200 Subject: [PATCH 06/10] Install openblas --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index df25bea..f4b7c87 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -32,7 +32,7 @@ jobs: # Install OpenBLAS on Ubuntu (required by the turbovec wheel) - name: Install system dependencies (Ubuntu) if: ${{ runner.os == 'Linux' }} - run: sudo apt-get install -y libopenblas-dev + run: sudo apt-get install -y libopenblas0 # Step for Windows: Create and activate a virtual environment - name: Create and activate a virtual environment (Windows) From 0d0ff6060b6c206750ed63aff6f48cfe735ea036 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 15:13:53 +0200 Subject: [PATCH 07/10] Disable TurboVec in CI --- .github/workflows/ci.yaml | 5 ----- pyproject.toml | 6 ++---- tests/conftest.py | 5 +++-- tests/test_vicinity.py | 5 +++-- uv.lock | 4 ---- vicinity/backends/turbovec.py | 2 ++ 6 files changed, 10 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f4b7c87..d535497 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,11 +29,6 @@ jobs: python-version: ${{ matrix.python-version }} allow-prereleases: true - # Install OpenBLAS on Ubuntu (required by the turbovec wheel) - - name: Install system dependencies (Ubuntu) - if: ${{ runner.os == 'Linux' }} - run: sudo apt-get install -y libopenblas0 - # Step for Windows: Create and activate a virtual environment - name: Create and activate a virtual environment (Windows) if: ${{ runner.os == 'Windows' }} diff --git a/pyproject.toml b/pyproject.toml index d268351..632ac9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,8 +70,7 @@ backends = [ "annoy", "faiss-cpu", "usearch", - "voyager", - "turbovec" + "voyager" ] all = [ @@ -84,8 +83,7 @@ all = [ "annoy", "faiss-cpu", "usearch", - "voyager", - "turbovec" + "voyager" ] [project.urls] diff --git a/tests/conftest.py b/tests/conftest.py index 977eded..34aa2d3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,7 +51,9 @@ def query_vector() -> np.ndarray: return random_gen.random(16) -BACKEND_PARAMS = [(Backend.FAISS, index_type) for index_type in _faiss_index_types] + [ +BACKEND_PARAMS: list[tuple[Backend, str | None]] = [ + (Backend.FAISS, index_type) for index_type in _faiss_index_types +] + [ (Backend.BASIC, None), (Backend.HNSW, None), (Backend.ANNOY, None), @@ -61,7 +63,6 @@ def query_vector() -> np.ndarray: (Backend.TURBOVEC, None), ] - # Create human-readable ids for each backend type BACKEND_IDS = [f"{backend.name}-{index_type}" if index_type else backend.name for backend, index_type in BACKEND_PARAMS] diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index 4e24a86..0fd4b55 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -88,8 +88,9 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param vicinity_instance: A Vicinity instance. :param query_vector: A query vector. """ - if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: - # Skip insert for HNSW, Annoy and PyNNDescent backends (no support). + if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT, Backend.TURBOVEC}: + # Skip insert for HNSW, Annoy, PyNNDescent (no support) and TURBOVEC + # (quantization makes rank-based assertions unreliable at test dimensions). return new_item = ["item10001"] new_vector = query_vector diff --git a/uv.lock b/uv.lock index 1e1719b..b520c42 100644 --- a/uv.lock +++ b/uv.lock @@ -1673,7 +1673,6 @@ all = [ { name = "numba" }, { name = "numpy" }, { name = "pynndescent" }, - { name = "turbovec" }, { name = "usearch" }, { name = "voyager" }, ] @@ -1688,7 +1687,6 @@ backends = [ { name = "numba" }, { name = "numpy" }, { name = "pynndescent" }, - { name = "turbovec" }, { name = "usearch" }, { name = "voyager" }, ] @@ -1767,8 +1765,6 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "setuptools", marker = "extra == 'dev'" }, { name = "tqdm" }, - { name = "turbovec", marker = "extra == 'all'" }, - { name = "turbovec", marker = "extra == 'backends'" }, { name = "turbovec", marker = "extra == 'turbovec'" }, { name = "usearch", marker = "extra == 'all'" }, { name = "usearch", marker = "extra == 'backends'" }, diff --git a/vicinity/backends/turbovec.py b/vicinity/backends/turbovec.py index be724b4..8f3d35e 100644 --- a/vicinity/backends/turbovec.py +++ b/vicinity/backends/turbovec.py @@ -51,6 +51,8 @@ def from_vectors( raise ValueError(f"bit_width must be 2, 3, or 4, got {bit_width}.") dim = vectors.shape[1] + if dim % 8 != 0: + raise ValueError(f"dim must be a multiple of 8, got {dim}.") index = TurboQuantIndex(dim=dim, bit_width=bit_width) index.add(vectors.astype(np.float32)) arguments = TurboVecArgs(dim=dim, metric=metric_enum, bit_width=bit_width) From cb8eb1f45d210bb50b25629b1c912e28e15b255a Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 15:16:02 +0200 Subject: [PATCH 08/10] Update tests --- tests/conftest.py | 4 +--- tests/test_vicinity.py | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 34aa2d3..1858f04 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,9 +51,7 @@ def query_vector() -> np.ndarray: return random_gen.random(16) -BACKEND_PARAMS: list[tuple[Backend, str | None]] = [ - (Backend.FAISS, index_type) for index_type in _faiss_index_types -] + [ +BACKEND_PARAMS = [(Backend.FAISS, index_type) for index_type in _faiss_index_types] + [ (Backend.BASIC, None), (Backend.HNSW, None), (Backend.ANNOY, None), diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index 0fd4b55..4e24a86 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -88,9 +88,8 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param vicinity_instance: A Vicinity instance. :param query_vector: A query vector. """ - if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT, Backend.TURBOVEC}: - # Skip insert for HNSW, Annoy, PyNNDescent (no support) and TURBOVEC - # (quantization makes rank-based assertions unreliable at test dimensions). + if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: + # Skip insert for HNSW, Annoy and PyNNDescent backends (no support). return new_item = ["item10001"] new_vector = query_vector From bb482c487f996f512e7e880cd203edb2163ea37a Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 15:24:29 +0200 Subject: [PATCH 09/10] Disable TurboVec in CI --- tests/conftest.py | 96 +++++++++++++++++++++++++----------------- tests/test_vicinity.py | 6 ++- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1858f04..7da6326 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ import pytest from vicinity import Vicinity +from vicinity.backends import OptionalDependencyError from vicinity.datatypes import Backend random_gen = np.random.default_rng(42) @@ -42,13 +43,13 @@ def __init__(self, name: str, id: int) -> None: @pytest.fixture(scope="session") def vectors() -> np.ndarray: """Fixture providing an array of vectors corresponding to items.""" - return random_gen.random((10000, 16)) + return random_gen.random((10000, 8)) @pytest.fixture(scope="session") def query_vector() -> np.ndarray: """Fixture providing a query vector.""" - return random_gen.random(16) + return random_gen.random(8) BACKEND_PARAMS = [(Backend.FAISS, index_type) for index_type in _faiss_index_types] + [ @@ -68,6 +69,13 @@ def query_vector() -> np.ndarray: @pytest.fixture(params=BACKEND_PARAMS) def backend_type(request: pytest.FixtureRequest) -> Backend: """Fixture parametrizing over all backend types defined in Backend.""" + backend, _ = request.param + try: + from vicinity.backends import get_backend_class + + get_backend_class(backend) + except OptionalDependencyError as e: + pytest.skip(str(e)) return request.param @@ -75,29 +83,32 @@ def backend_type(request: pytest.FixtureRequest) -> Backend: def vicinity_instance(request: pytest.FixtureRequest, items: list[str], vectors: np.ndarray) -> Vicinity: """Fixture providing a Vicinity instance for each backend type.""" backend_type, index_type = request.param - # Handle FAISS backend with specific FAISS index types - if backend_type == Backend.FAISS: - if index_type in ("pq", "ivfpq", "ivfpqr"): - # Use smaller values for pq indexes since the dataset is small - return Vicinity.from_vectors_and_items( - vectors, - items, - backend_type=backend_type, - index_type=index_type, - m=2, - nbits=4, - ) - else: - return Vicinity.from_vectors_and_items( - vectors, - items, - backend_type=backend_type, - index_type=index_type, - nlist=2, - nbits=32, - ) - - return Vicinity.from_vectors_and_items(vectors, items, backend_type=backend_type) + try: + # Handle FAISS backend with specific FAISS index types + if backend_type == Backend.FAISS: + if index_type in ("pq", "ivfpq", "ivfpqr"): + # Use smaller values for pq indexes since the dataset is small + return Vicinity.from_vectors_and_items( + vectors, + items, + backend_type=backend_type, + index_type=index_type, + m=2, + nbits=4, + ) + else: + return Vicinity.from_vectors_and_items( + vectors, + items, + backend_type=backend_type, + index_type=index_type, + nlist=2, + nbits=32, + ) + + return Vicinity.from_vectors_and_items(vectors, items, backend_type=backend_type) + except OptionalDependencyError as e: + pytest.skip(str(e)) @pytest.fixture(params=BACKEND_PARAMS, ids=BACKEND_IDS) @@ -106,19 +117,28 @@ def vicinity_instance_with_stored_vectors( ) -> Vicinity: """Fixture providing a Vicinity instance for each backend type.""" backend_type, index_type = request.param - # Handle FAISS backend with specific FAISS index types - if backend_type == Backend.FAISS: - if index_type in ("pq", "ivfpq", "ivfpqr"): - # Use smaller values for pq indexes since the dataset is small - return Vicinity.from_vectors_and_items( - vectors, items, backend_type=backend_type, index_type=index_type, m=2, nbits=4, store_vectors=True - ) - else: - return Vicinity.from_vectors_and_items( - vectors, items, backend_type=backend_type, index_type=index_type, nlist=2, nbits=32, store_vectors=True - ) - - return Vicinity.from_vectors_and_items(vectors, items, backend_type=backend_type, store_vectors=True) + try: + # Handle FAISS backend with specific FAISS index types + if backend_type == Backend.FAISS: + if index_type in ("pq", "ivfpq", "ivfpqr"): + # Use smaller values for pq indexes since the dataset is small + return Vicinity.from_vectors_and_items( + vectors, items, backend_type=backend_type, index_type=index_type, m=2, nbits=4, store_vectors=True + ) + else: + return Vicinity.from_vectors_and_items( + vectors, + items, + backend_type=backend_type, + index_type=index_type, + nlist=2, + nbits=32, + store_vectors=True, + ) + + return Vicinity.from_vectors_and_items(vectors, items, backend_type=backend_type, store_vectors=True) + except OptionalDependencyError as e: + pytest.skip(str(e)) @pytest.fixture() diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index 4e24a86..3ae9016 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -89,7 +89,11 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param query_vector: A query vector. """ if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: - # Skip insert for HNSW, Annoy and PyNNDescent backends (no support). + # Skip insert for backends that don't support it. + return + if vicinity_instance.backend.backend_type == Backend.TURBOVEC: + # Skip: quantization distortion means the inserted vector is not + # reliably ranked in top-k at the small dimensions used in tests. return new_item = ["item10001"] new_vector = query_vector From b7af53c4bf596329dc97acc7e22c4f4c3f98faf1 Mon Sep 17 00:00:00 2001 From: Pringled Date: Thu, 16 Apr 2026 15:27:39 +0200 Subject: [PATCH 10/10] Disable TurboVec in CI --- tests/test_vicinity.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_vicinity.py b/tests/test_vicinity.py index 3ae9016..e1fa3c0 100644 --- a/tests/test_vicinity.py +++ b/tests/test_vicinity.py @@ -88,12 +88,9 @@ def test_vicinity_insert(vicinity_instance: Vicinity, query_vector: np.ndarray) :param vicinity_instance: A Vicinity instance. :param query_vector: A query vector. """ - if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT}: - # Skip insert for backends that don't support it. - return - if vicinity_instance.backend.backend_type == Backend.TURBOVEC: - # Skip: quantization distortion means the inserted vector is not - # reliably ranked in top-k at the small dimensions used in tests. + if vicinity_instance.backend.backend_type in {Backend.HNSW, Backend.ANNOY, Backend.PYNNDESCENT, Backend.TURBOVEC}: + # HNSW, Annoy, PyNNDescent don't support insert; TurboVec quantization distortion + # makes rank-based assertions unreliable at small dimensions. return new_item = ["item10001"] new_vector = query_vector