diff --git a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py index 59424272d6..b1f5db8056 100644 --- a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py +++ b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py @@ -133,6 +133,13 @@ async def _flush_embedding_buffer( ) ) + if documents: + dims = {len(d.vector) for d in documents} + if len(dims) > 1: + msg = f"Inconsistent embedding dimensions: {dims}. Check that your embedding model returns consistent vector sizes." + callbacks.error(msg) + raise ValueError(msg) + vector_store.load_documents(documents) if skipped > 0: diff --git a/packages/graphrag/pyproject.toml b/packages/graphrag/pyproject.toml index f9fa8b5373..7d1bc0cb58 100644 --- a/packages/graphrag/pyproject.toml +++ b/packages/graphrag/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "graspologic-native~=1.2", "json-repair~=0.30", "networkx~=3.4", - "nltk~=3.9", + "nltk>=3.9.3", "numpy~=2.1", "pandas~=2.3", "pyarrow~=22.0",