Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ Key dataclasses live in `libs/envector/config.py`:
- Client-side filtering requires the JSON envelope to include an object under `metadata`.

## Limitations
- Item-level delete/update is unsupported (drop the index to reset).
- Manual item IDs are not accepted; returned IDs from `add_texts` are ephemeral.
- Item-level updates are unsupported (re-insert to update; drop the index to reset).
- Manual item IDs are not accepted; use the `item_id` values returned by `add_texts` / `add_documents` for subsequent `delete` calls.
- Fetch-by-ID (`get_by_ids`) is unsupported.
- Filtering happens client-side; ensure metadata is JSON for structured filters.

## Examples
Expand All @@ -55,8 +56,8 @@ Key dataclasses live in `libs/envector/config.py`:
key=KeyConfig(
key_path=ENVECTOR_KEY_PATH,
key_id=ENVECTOR_KEY_ID,
preset="ip",
eval_mode="rmp"
preset="ip2",
eval_mode="mm32"
),
index=IndexSettings(
index_name=INDEX_NAME,
Expand Down Expand Up @@ -113,7 +114,6 @@ for doc, score in results:
print(f"* [SIM={score:.3f}] {doc.page_content} [{doc.metadata}]")
```


#### Similarity Search with Vector

```python
Expand Down
6 changes: 3 additions & 3 deletions libs/envector/examples/basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@

from __future__ import annotations

from libs.envector.config import (
from langchain_envector.config import (
ConnectionConfig,
EnvectorConfig,
IndexSettings,
KeyConfig,
)
from libs.envector.vectorstore import Envector
from langchain_envector.vectorstore import Envector


def main():
# Replace with your actual settings
cfg = EnvectorConfig(
connection=ConnectionConfig(address="localhost:50050"),
key=KeyConfig(
key_path="./keys", key_id="example_key", preset="ip", eval_mode="rmp"
key_path="./keys", key_id="example_key", preset="ip2", eval_mode="mm32"
),
index=IndexSettings(index_name="demo", dim=384, query_encryption="plain"),
create_if_missing=True,
Expand Down
49 changes: 0 additions & 49 deletions libs/envector/examples/cipher_query.py

This file was deleted.

6 changes: 3 additions & 3 deletions libs/envector/examples/ingest_synthetic_1k.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
from pathlib import Path
from typing import List

from libs.envector.config import (
from langchain_envector.config import (
ConnectionConfig,
EnvectorConfig,
IndexSettings,
KeyConfig,
)
from libs.envector.vectorstore import Envector
from langchain_envector.vectorstore import Envector


def batched(seq, n):
Expand Down Expand Up @@ -63,7 +63,7 @@ def main():
cfg = EnvectorConfig(
connection=ConnectionConfig(address=args.address),
key=KeyConfig(
key_path=args.key_path, key_id=args.key_id, preset="ip", eval_mode="rmp"
key_path=args.key_path, key_id=args.key_id, preset="ip2", eval_mode="mm32"
),
index=IndexSettings(
index_name=args.index_name,
Expand Down
17 changes: 9 additions & 8 deletions libs/envector/langchain_envector/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,16 @@ def init(self):

# Connection
if c.address:
ev_client.init_connect(address=c.address, access_token=c.access_token)
ev_client.init_connect(
address=c.address, access_token=c.access_token, secure=c.secure
)
else:
if not (c.host and c.port):
raise ValueError("Either address or host+port must be provided.")
ev_client.init_connect(
host=c.host, port=c.port, access_token=c.access_token
host=c.host, port=c.port, access_token=c.access_token, secure=c.secure
)

# Key path baseline for Index
from pyenvector.index import Index as _Index

_Index.init_key_path(k.key_path)

# Index config + key setup
ev_client.init_index_config(
index_name=i.index_name,
Expand All @@ -55,13 +52,17 @@ def init(self):
index_encryption="cipher", # server vectors are always encrypted
index_type=i.index_type,
auto_key_setup=True,
description=i.description,
metadata_encryption=i.metadata_encryption,
)

# Create index if missing
if self.config.create_if_missing:
idx_list = ev_client.get_index_list()
if i.index_name not in idx_list:
ev_client.create_index(index_name=i.index_name, dim=i.dim)
ev_client.create_index(
index_name=i.index_name, dim=i.dim, index_params=i.index_params
)

# Bind index instance
self._index = ev.Index(i.index_name)
Expand Down
8 changes: 6 additions & 2 deletions libs/envector/langchain_envector/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class ConnectionConfig:
host: Optional[str] = None
port: Optional[int] = None
access_token: Optional[str] = None
secure: bool = True # default to secure connection if not specified


@dataclass
Expand All @@ -26,11 +27,14 @@ class KeyConfig:
class IndexSettings:
index_name: str
dim: int
query_encryption: str = "plain" # plain | cipher
index_encryption: str = "cipher" # fixed to cipher
query_encryption: str = "plain"
index_encryption: str = "cipher"
index_type: str = "flat"
output_fields: List[str] = field(default_factory=lambda: ["metadata"])
fetch_k: Optional[int] = None # over-fetch to support client-side filters
description: Optional[str] = None
metadata_encryption: Optional[str] = None
index_params: Optional[dict] = None # catch-all for any additional index parameters


@dataclass
Expand Down
33 changes: 33 additions & 0 deletions libs/envector/langchain_envector/vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,39 @@ def add_texts(
# but they are NOT persisted/addressable.
return result_ids

def delete(
self,
ids: Optional[List[Any]] = None,
*,
await_completion: bool = False,
timeout_s: float = 600.0,
poll_interval_s: float = 1.0,
**kwargs: Any,
) -> Optional[bool]:
"""Delete items from the encrypted index by item ID.

Accepts the ``item_id`` values returned from ``add_texts`` /
``add_documents``. Both ``int`` and ``str`` (numeric) IDs are accepted
and coerced to ``int`` before being passed to the SDK.
"""
if not ids:
return False
try:
item_ids = [int(x) for x in ids]
except (TypeError, ValueError) as e:
raise ValueError(
"Envector.delete expects integer item IDs (or numeric strings) "
"as returned by add_texts/add_documents."
) from e

self.client.index.delete(
item_ids=item_ids,
await_completion=await_completion,
timeout_s=timeout_s,
poll_interval_s=poll_interval_s,
)
return True

def _similarity_search_with_scores(
self,
*,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "langchain-envector"
version = "0.1.3"
version = "0.2.0"
description = "LangChain VectorStore integration for Envector"
readme = "README.md"
license = {text = "MIT"}
Expand All @@ -16,7 +16,7 @@ authors = [
{ name = "Envector Contributors" }
]
dependencies = [
"pyenvector",
"pyenvector>=1.4.0",
"langchain>=0.2.0",
"langchain-core>=1.2.5,<2.0",
]
Expand Down
18 changes: 18 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,30 @@ def embed_query(self, text: str) -> List[float]:
@dataclass
class FakeIndex:
inserted: List[Dict[str, Any]] = field(default_factory=list)
deleted: List[Dict[str, Any]] = field(default_factory=list)
search_payload: Optional[List[List[Dict[str, Any]]]] = None

def insert(self, data: List[List[float]], metadata: List[str]):
self.inserted.append({"data": data, "metadata": metadata})
return [len(self.inserted) + i + 1 for i in range(len(metadata))]

def delete(
self,
item_ids: List[int],
await_completion: bool = True,
timeout_s: float = 600.0,
poll_interval_s: float = 1.0,
) -> str:
self.deleted.append(
{
"item_ids": list(item_ids),
"await_completion": await_completion,
"timeout_s": timeout_s,
"poll_interval_s": poll_interval_s,
}
)
return f"req-del-{len(self.deleted)}"

def search(self, query: List[float], top_k: int, output_fields: List[str]):
if self.search_payload is not None:
return self.search_payload
Expand Down
4 changes: 2 additions & 2 deletions tests/integration_tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_e2e_vectorstore_plain_and_cipher():
# Plain query mode
cfg_plain = EnvectorConfig(
connection=ConnectionConfig(address=address),
key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip", eval_mode="rmp"),
key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip2", eval_mode="mm32"),
index=IndexSettings(
index_name=f"{base_index_name}_plain", dim=dim, query_encryption="plain"
),
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_e2e_vectorstore_plain_and_cipher():
# Cipher query mode
cfg_cc = EnvectorConfig(
connection=ConnectionConfig(address=address),
key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip", eval_mode="rmp"),
key=KeyConfig(key_path=key_path, key_id=key_id, preset="ip2", eval_mode="mm32"),
index=IndexSettings(
index_name=f"{base_index_name}_cipher", dim=dim, query_encryption="cipher"
),
Expand Down
53 changes: 53 additions & 0 deletions tests/test_vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,59 @@ def test_add_documents_requires_vectors_when_no_embeddings():
assert "embeddings is None and vectors not provided" in str(e)


def test_delete_passes_item_ids_to_sdk():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
ids = store.add_texts(["t1", "t2", "t3"])
assert ids == [2, 3, 4]

assert store.delete(ids=[ids[0], ids[2]]) is True
assert len(client.index.deleted) == 1
call = client.index.deleted[0]
assert call["item_ids"] == [2, 4]
assert call["await_completion"] is False


def test_delete_accepts_string_ids():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
ids = store.add_texts(["a", "b"])

assert store.delete(ids=[str(ids[0])]) is True
assert client.index.deleted[0]["item_ids"] == [ids[0]]


def test_delete_empty_or_none_returns_false():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)

assert store.delete(ids=None) is False
assert store.delete(ids=[]) is False
assert client.index.deleted == []


def test_delete_rejects_non_numeric_ids():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
try:
store.delete(ids=["abc"])
assert False, "Expected ValueError for non-numeric ids"
except ValueError as e:
assert "integer item IDs" in str(e)


def test_delete_forwards_await_kwargs():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=FakeEmbeddings(dim=4), client=client)
ids = store.add_texts(["x"])

store.delete(ids=ids, await_completion=False, timeout_s=12.0, poll_interval_s=0.5)
call = client.index.deleted[0]
assert call["await_completion"] is False
assert call["timeout_s"] == 12.0
assert call["poll_interval_s"] == 0.5


def test_add_documents_with_explicit_vectors():
client = FakeClient()
store = Envector(config=_cfg(), embeddings=None, client=client)
Expand Down
Loading