From 7d36c0815f0b3f052dbb73c16177f9823d9fa2ae Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Thu, 21 May 2026 22:55:04 +0530 Subject: [PATCH 1/9] refactor: simplify bigrag platform monorepo --- api/alembic/versions/0001_initial_schema.py | 71 --- api/bigrag/app_factory/lifespan.py | 13 +- api/bigrag/app_factory/routers.py | 2 - api/bigrag/config.py | 6 +- api/bigrag/db/models/__init__.py | 2 - api/bigrag/db/models/collection.py | 10 - api/bigrag/db/models/vector_migration.py | 51 -- api/bigrag/logging.py | 1 - api/bigrag/main.py | 5 +- api/bigrag/middleware/maintenance.py | 2 +- api/bigrag/models/collection.py | 12 +- api/bigrag/models/vector_migration.py | 38 -- api/bigrag/routers/admin_realtime.py | 33 -- api/bigrag/routers/admin_vector_migrations.py | 177 ------ api/bigrag/routers/admin_vector_storage.py | 4 +- api/bigrag/routers/collections.py | 10 +- api/bigrag/routers/collections_embedding.py | 4 +- api/bigrag/routers/documents.py | 3 - api/bigrag/routers/documents_batch.py | 1 - api/bigrag/routers/documents_global.py | 1 - api/bigrag/routers/evaluation.py | 1 - api/bigrag/routers/query.py | 5 - api/bigrag/routers/vectors.py | 4 +- api/bigrag/services/backup/exporters.py | 2 - api/bigrag/services/backup/jobs.py | 15 +- api/bigrag/services/chat/questions/api.py | 1 - api/bigrag/services/chat/turn/prepare.py | 1 - api/bigrag/services/collection_cache.py | 2 - api/bigrag/services/collection_provision.py | 36 +- api/bigrag/services/collections.py | 6 +- api/bigrag/services/connectors/documents.py | 2 - api/bigrag/services/connectors/manifest.py | 1 - api/bigrag/services/health.py | 5 - api/bigrag/services/ingestion_job.py | 3 - api/bigrag/services/jobs/actors.py | 16 - api/bigrag/services/jobs/runtime.py | 9 +- api/bigrag/services/queue_embedding/insert.py | 4 - .../runtime_setting_specs/vector_store.py | 44 +- api/bigrag/services/runtime_settings_apply.py | 13 +- .../services/vector_migration/__init__.py | 17 - api/bigrag/services/vector_migration/jobs.py | 522 ------------------ api/bigrag/services/vector_store/_util.py | 2 +- api/bigrag/services/vector_store/base.py | 3 +- api/bigrag/services/vector_store/facade.py | 229 ++------ api/bigrag/services/vector_store/qdrant.py | 499 ----------------- .../services/vector_store/qdrant_filter.py | 60 -- .../services/vector_store/turbopuffer.py | 32 +- api/pyproject.toml | 1 - api/uv.lock | 128 +---- app/src/components/navigation/sidebar.tsx | 2 - .../collections/collection-form-state.ts | 4 - .../collections/create-collection-modal.tsx | 17 - app/src/features/overview/overview-page.tsx | 7 +- .../settings/instance-settings-helpers.ts | 1 - app/src/features/settings/settings-layout.ts | 14 +- app/src/features/settings/tabs/server-tab.tsx | 6 +- .../vector-storage/vector-migration-panel.tsx | 370 ------------- .../vector-storage/vector-storage-page.tsx | 84 --- .../vector-storage/vector-storage-route.tsx | 21 - app/src/hooks/use-collections.ts | 1 - app/src/hooks/use-vector-migrations.ts | 109 ---- app/src/lib/query-keys.ts | 7 - app/src/routeTree.gen.ts | 21 - .../_dashboard.collections.$name.index.tsx | 4 - .../_dashboard.collections.$name.settings.tsx | 29 +- .../routes/_dashboard.collections.$name.tsx | 4 - .../routes/_dashboard.collections.index.tsx | 17 - app/src/routes/_dashboard.vector-storage.tsx | 16 - app/src/types/bigrag-api/admin.ts | 2 - app/src/types/bigrag-api/settings.ts | 28 - dev.sh | 14 +- docker-compose.yml | 24 +- .../src/bigrag/resources/admin/__init__.py | 4 - .../src/bigrag/resources/admin/realtime.py | 12 - .../resources/admin/vector_migrations.py | 56 -- sdks/python/src/bigrag/types/__init__.py | 12 +- sdks/python/src/bigrag/types/admin.py | 37 -- sdks/python/src/bigrag/types/chat.py | 4 +- sdks/python/src/bigrag/types/collections.py | 12 +- sdks/python/src/bigrag/types/common.py | 2 - sdks/python/src/bigrag/types/evaluations.py | 4 +- sdks/python/src/bigrag/types/query.py | 10 +- sdks/typescript/src/resources/admin/index.ts | 4 - .../src/resources/admin/realtime.ts | 11 - .../src/resources/admin/vector_migrations.ts | 53 -- sdks/typescript/src/types/admin.ts | 34 -- sdks/typescript/src/types/collections.ts | 4 - sdks/typescript/src/types/common.ts | 2 - 88 files changed, 163 insertions(+), 3009 deletions(-) delete mode 100644 api/bigrag/db/models/vector_migration.py delete mode 100644 api/bigrag/models/vector_migration.py delete mode 100644 api/bigrag/routers/admin_vector_migrations.py delete mode 100644 api/bigrag/services/vector_migration/__init__.py delete mode 100644 api/bigrag/services/vector_migration/jobs.py delete mode 100644 api/bigrag/services/vector_store/qdrant.py delete mode 100644 api/bigrag/services/vector_store/qdrant_filter.py delete mode 100644 app/src/features/vector-storage/vector-migration-panel.tsx delete mode 100644 app/src/features/vector-storage/vector-storage-page.tsx delete mode 100644 app/src/features/vector-storage/vector-storage-route.tsx delete mode 100644 app/src/hooks/use-vector-migrations.ts delete mode 100644 app/src/routes/_dashboard.vector-storage.tsx delete mode 100644 sdks/python/src/bigrag/resources/admin/vector_migrations.py delete mode 100644 sdks/typescript/src/resources/admin/vector_migrations.ts diff --git a/api/alembic/versions/0001_initial_schema.py b/api/alembic/versions/0001_initial_schema.py index 3658d651..fff94d0d 100644 --- a/api/alembic/versions/0001_initial_schema.py +++ b/api/alembic/versions/0001_initial_schema.py @@ -260,7 +260,6 @@ def upgrade() -> None: sa.Column("embedding_api_key", bigrag.services.crypto.EncryptedString(), nullable=True), sa.Column("embedding_base_url", sa.Text(), nullable=True), sa.Column("embedding_preset_id", sa.Uuid(), nullable=True), - sa.Column("vector_store_provider", sa.Text(), server_default="qdrant", nullable=False), sa.Column("dimension", sa.Integer(), server_default=sa.text("1536"), nullable=False), sa.Column("chunk_size", sa.Integer(), server_default=sa.text("512"), nullable=False), sa.Column("chunk_overlap", sa.Integer(), server_default=sa.text("50"), nullable=False), @@ -283,7 +282,6 @@ def upgrade() -> None: server_default=sa.text("false"), nullable=False, ), - sa.Column("index_type", sa.Text(), server_default="HNSW", nullable=False), sa.Column("tenant_field", sa.Text(), nullable=True), sa.Column("metadata_schema", postgresql.JSONB(astext_type=sa.Text()), nullable=True), sa.Column( @@ -317,66 +315,6 @@ def upgrade() -> None: unique=False, ) op.create_index("idx_collections_name", "collections", ["name"], unique=False) - op.create_table( - "vector_migration_jobs", - sa.Column("id", sa.Uuid(), nullable=False), - sa.Column("collection_id", sa.Uuid(), nullable=True), - sa.Column("collection_name", sa.Text(), nullable=False), - sa.Column("source_provider", sa.Text(), nullable=False), - sa.Column("target_provider", sa.Text(), nullable=False), - sa.Column("status", sa.Text(), server_default="pending", nullable=False), - sa.Column("phase", sa.Text(), server_default="queued", nullable=False), - sa.Column("progress", sa.Double(), server_default=sa.text("0"), nullable=False), - sa.Column("copied_points", sa.Integer(), server_default=sa.text("0"), nullable=False), - sa.Column("total_points", sa.Integer(), nullable=True), - sa.Column( - "details", - postgresql.JSONB(astext_type=sa.Text()), - server_default=sa.text("'{}'::jsonb"), - nullable=False, - ), - sa.Column("error_message", sa.Text(), nullable=True), - sa.Column("created_by", sa.Uuid(), nullable=True), - sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), - sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), - sa.Column( - "created_at", - sa.DateTime(timezone=True), - server_default=sa.text("now()"), - nullable=False, - ), - sa.Column( - "updated_at", - sa.DateTime(timezone=True), - server_default=sa.text("now()"), - nullable=False, - ), - sa.CheckConstraint( - "status IN ('pending', 'running', 'canceling', 'succeeded', 'failed')", - name="vector_migration_jobs_status_check", - ), - sa.ForeignKeyConstraint(["collection_id"], ["collections.id"], ondelete="SET NULL"), - sa.ForeignKeyConstraint(["created_by"], ["users.id"], ondelete="SET NULL"), - sa.PrimaryKeyConstraint("id"), - ) - op.create_index( - "idx_vector_migration_jobs_collection", - "vector_migration_jobs", - ["collection_name"], - unique=False, - ) - op.create_index( - "idx_vector_migration_jobs_created_at_id", - "vector_migration_jobs", - [sa.literal_column("created_at DESC"), sa.literal_column("id DESC")], - unique=False, - ) - op.create_index( - "idx_vector_migration_jobs_status", - "vector_migration_jobs", - ["status"], - unique=False, - ) op.create_table( "connector_accounts", sa.Column("id", sa.Uuid(), nullable=False), @@ -1170,11 +1108,6 @@ def upgrade() -> None: "webhook_deliveries", "status IN ('pending', 'delivered', 'failed')", ) - op.create_check_constraint( - "collections_vector_store_provider_check", - "collections", - "vector_store_provider IN ('qdrant', 'turbopuffer')", - ) op.create_check_constraint( "embedding_presets_provider_check", "embedding_presets", @@ -1266,10 +1199,6 @@ def downgrade() -> None: op.drop_table("connector_accounts") op.drop_index("idx_collections_name", table_name="collections") op.drop_index("idx_collections_created_at_id", table_name="collections") - op.drop_index("idx_vector_migration_jobs_status", table_name="vector_migration_jobs") - op.drop_index("idx_vector_migration_jobs_created_at_id", table_name="vector_migration_jobs") - op.drop_index("idx_vector_migration_jobs_collection", table_name="vector_migration_jobs") - op.drop_table("vector_migration_jobs") op.drop_table("collections") op.drop_index("idx_backup_jobs_status", table_name="backup_jobs") op.drop_index("idx_backup_jobs_created_at_id", table_name="backup_jobs") diff --git a/api/bigrag/app_factory/lifespan.py b/api/bigrag/app_factory/lifespan.py index 12034bf1..0c31aa69 100644 --- a/api/bigrag/app_factory/lifespan.py +++ b/api/bigrag/app_factory/lifespan.py @@ -52,21 +52,16 @@ async def lifespan(app: FastAPI): runtime = await runtime_settings.get_values( [ "ingestion_workers", - "qdrant_connect_timeout_seconds", - "qdrant_required", - "qdrant_search_ef", - "qdrant_url", "turbopuffer_api_key", + "turbopuffer_base_url", "turbopuffer_namespace_prefix", "turbopuffer_region", ] ) vector_store.configure( - qdrant_url=runtime["qdrant_url"], - connect_timeout_seconds=runtime["qdrant_connect_timeout_seconds"], - search_ef=runtime["qdrant_search_ef"], turbopuffer_api_key=runtime["turbopuffer_api_key"], + turbopuffer_base_url=runtime["turbopuffer_base_url"], turbopuffer_region=runtime["turbopuffer_region"], turbopuffer_namespace_prefix=runtime["turbopuffer_namespace_prefix"], ) @@ -76,12 +71,10 @@ async def lifespan(app: FastAPI): except Exception as exc: logger.warning( "Vector store startup connection failed; API will start degraded", - provider=vector_store.provider, + provider="turbopuffer", error_type=exc.__class__.__name__, error=str(exc), ) - if runtime["qdrant_required"]: - raise app.state.vector_store = vector_store storage = await init_storage_from_runtime(upload_dir=s.upload_dir) diff --git a/api/bigrag/app_factory/routers.py b/api/bigrag/app_factory/routers.py index d1dba1aa..dc2033fc 100644 --- a/api/bigrag/app_factory/routers.py +++ b/api/bigrag/app_factory/routers.py @@ -15,7 +15,6 @@ def include_all_routers(app: FastAPI) -> None: from bigrag.routers.admin_realtime import router as admin_realtime_router from bigrag.routers.admin_settings import router as admin_settings_router from bigrag.routers.admin_users import router as admin_users_router - from bigrag.routers.admin_vector_migrations import router as admin_vector_migrations_router from bigrag.routers.admin_vector_storage import router as admin_vector_storage_router from bigrag.routers.analytics import router as analytics_router from bigrag.routers.auth import router as auth_router @@ -51,7 +50,6 @@ def include_all_routers(app: FastAPI) -> None: app.include_router(admin_settings_router) app.include_router(admin_access_router) app.include_router(admin_vector_storage_router) - app.include_router(admin_vector_migrations_router) app.include_router(admin_realtime_router) app.include_router(mcp_servers_router) app.include_router(admin_audit_router) diff --git a/api/bigrag/config.py b/api/bigrag/config.py index 9eacab94..5731b974 100644 --- a/api/bigrag/config.py +++ b/api/bigrag/config.py @@ -27,12 +27,8 @@ class Settings(BaseSettings): db_pool_max: int = 50 migration_timeout_seconds: int = 60 - qdrant_url: str = "http://localhost:6333" - qdrant_connect_timeout_seconds: int = 10 - qdrant_required: bool = False - qdrant_prefer_grpc: bool = False - qdrant_grpc_port: int = 6334 turbopuffer_api_key: str | None = None + turbopuffer_base_url: str | None = None turbopuffer_region: str = "aws-us-east-1" turbopuffer_namespace_prefix: str = "bigrag_" diff --git a/api/bigrag/db/models/__init__.py b/api/bigrag/db/models/__init__.py index b9a89dce..1b41b899 100644 --- a/api/bigrag/db/models/__init__.py +++ b/api/bigrag/db/models/__init__.py @@ -19,7 +19,6 @@ from bigrag.db.models.instance import InstanceSetting, MaintenanceLock from bigrag.db.models.observability import AccessLog, AuditLog, BackupJob, QueryLog from bigrag.db.models.preference import UserPreference -from bigrag.db.models.vector_migration import VectorMigrationJob from bigrag.db.models.webhook import Webhook, WebhookDelivery __all__ = [ @@ -46,7 +45,6 @@ "User", "UserSession", "UserPreference", - "VectorMigrationJob", "Webhook", "WebhookDelivery", ] diff --git a/api/bigrag/db/models/collection.py b/api/bigrag/db/models/collection.py index 4fda5d81..b6bac6a6 100644 --- a/api/bigrag/db/models/collection.py +++ b/api/bigrag/db/models/collection.py @@ -14,10 +14,6 @@ class Collection(Base): __tablename__ = "collections" __table_args__ = ( - sa.CheckConstraint( - "vector_store_provider IN ('qdrant', 'turbopuffer')", - name="collections_vector_store_provider_check", - ), sa.Index("idx_collections_name", "name"), sa.Index("idx_collections_created_at_id", sa.desc("created_at"), sa.desc("id")), ) @@ -37,11 +33,6 @@ class Collection(Base): sa.ForeignKey("embedding_presets.id", ondelete="RESTRICT"), nullable=True, ) - vector_store_provider: Mapped[str] = mapped_column( - sa.Text, - nullable=False, - server_default="qdrant", - ) dimension: Mapped[int] = mapped_column( sa.Integer, nullable=False, server_default=sa.text("1536") ) @@ -75,7 +66,6 @@ class Collection(Base): multimodal_enrichment_enabled: Mapped[bool] = mapped_column( sa.Boolean, nullable=False, server_default=sa.false() ) - index_type: Mapped[str] = mapped_column(sa.Text, nullable=False, server_default="HNSW") tenant_field: Mapped[str | None] = mapped_column(sa.Text) metadata_schema: Mapped[dict | None] = mapped_column(JSONB) meta: Mapped[dict] = mapped_column( diff --git a/api/bigrag/db/models/vector_migration.py b/api/bigrag/db/models/vector_migration.py deleted file mode 100644 index d9f402c0..00000000 --- a/api/bigrag/db/models/vector_migration.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import annotations - -from datetime import datetime -from uuid import UUID - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import Mapped, mapped_column - -from bigrag.db.base import TS, Base, TSupd, UUIDpk - - -class VectorMigrationJob(Base): - __tablename__ = "vector_migration_jobs" - __table_args__ = ( - sa.CheckConstraint( - "status IN ('pending', 'running', 'canceling', 'succeeded', 'failed')", - name="vector_migration_jobs_status_check", - ), - sa.Index("idx_vector_migration_jobs_collection", "collection_name"), - sa.Index("idx_vector_migration_jobs_status", "status"), - sa.Index( - "idx_vector_migration_jobs_created_at_id", - sa.desc("created_at"), - sa.desc("id"), - ), - ) - - id: Mapped[UUIDpk] - collection_id: Mapped[UUID | None] = mapped_column( - sa.ForeignKey("collections.id", ondelete="SET NULL") - ) - collection_name: Mapped[str] = mapped_column(sa.Text, nullable=False) - source_provider: Mapped[str] = mapped_column(sa.Text, nullable=False) - target_provider: Mapped[str] = mapped_column(sa.Text, nullable=False) - status: Mapped[str] = mapped_column(sa.Text, nullable=False, server_default="pending") - phase: Mapped[str] = mapped_column(sa.Text, nullable=False, server_default="queued") - progress: Mapped[float] = mapped_column(sa.Double, nullable=False, server_default=sa.text("0")) - copied_points: Mapped[int] = mapped_column( - sa.Integer, nullable=False, server_default=sa.text("0") - ) - total_points: Mapped[int | None] = mapped_column(sa.Integer) - details: Mapped[dict] = mapped_column( - JSONB, nullable=False, server_default=sa.text("'{}'::jsonb") - ) - error_message: Mapped[str | None] = mapped_column(sa.Text) - created_by: Mapped[UUID | None] = mapped_column(sa.ForeignKey("users.id", ondelete="SET NULL")) - started_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True)) - completed_at: Mapped[datetime | None] = mapped_column(sa.DateTime(timezone=True)) - created_at: Mapped[TS] - updated_at: Mapped[TSupd] diff --git a/api/bigrag/logging.py b/api/bigrag/logging.py index a0a0dad4..03e7d6a1 100644 --- a/api/bigrag/logging.py +++ b/api/bigrag/logging.py @@ -255,7 +255,6 @@ def configure_logging(log_level: str = "debug", log_format: str = "text") -> Non "hpack", "httpcore", "httpx", - "qdrant_client", "uvicorn.access", ): logging.getLogger(name).setLevel(logging.WARNING) diff --git a/api/bigrag/main.py b/api/bigrag/main.py index e4d31f52..b8adf430 100644 --- a/api/bigrag/main.py +++ b/api/bigrag/main.py @@ -47,7 +47,7 @@ def create_app(settings_override: Settings | None = None) -> FastAPI: app = FastAPI( title="bigRAG", - description="Self-hostable RAG platform with Docling + Qdrant", + description="Self-hostable RAG platform with Docling + turbopuffer", version=__version__, lifespan=lifespan, default_response_class=ORJSONResponse, @@ -79,7 +79,6 @@ def cli() -> None: parser.add_argument("--host", help="Server host") parser.add_argument("--port", type=int, help="Server port") parser.add_argument("--database-url", help="Postgres connection URL") - parser.add_argument("--qdrant-url", help="Qdrant connection URL") parser.add_argument("--redis-url", help="Redis connection URL") args = parser.parse_args() @@ -92,8 +91,6 @@ def cli() -> None: overrides["port"] = args.port if args.database_url is not None: overrides["database_url"] = args.database_url - if args.qdrant_url is not None: - overrides["qdrant_url"] = args.qdrant_url if args.redis_url is not None: overrides["redis_url"] = args.redis_url for key, value in overrides.items(): diff --git a/api/bigrag/middleware/maintenance.py b/api/bigrag/middleware/maintenance.py index 4a5e134b..9310c02a 100644 --- a/api/bigrag/middleware/maintenance.py +++ b/api/bigrag/middleware/maintenance.py @@ -7,7 +7,7 @@ from bigrag.services.maintenance import active_lock SAFE_METHODS = {"GET", "HEAD", "OPTIONS"} -CONTROL_PATH_PREFIXES = ("/v1/admin/backups", "/v1/admin/vector-storage/migrations") +CONTROL_PATH_PREFIXES = ("/v1/admin/backups",) def _is_control_path(path: str) -> bool: diff --git a/api/bigrag/models/collection.py b/api/bigrag/models/collection.py index 4754965d..cee4226f 100644 --- a/api/bigrag/models/collection.py +++ b/api/bigrag/models/collection.py @@ -4,14 +4,11 @@ from pydantic import BaseModel, Field, model_validator -from bigrag.services.vector_store.base import VectorStoreProvider - class CreateCollectionRequest(BaseModel): name: str = Field(min_length=1, max_length=128, pattern=r"^[a-zA-Z][a-zA-Z0-9_]*$") description: str = "" embedding_preset_id: str | None = None - vector_store_provider: VectorStoreProvider = "qdrant" embedding_provider: str | None = None embedding_model: str | None = None embedding_api_key: str | None = None @@ -24,16 +21,11 @@ class CreateCollectionRequest(BaseModel): pattern=r"^(paragraph|recursive)$", description="Chunking algorithm: paragraph (default) or recursive.", ) - index_type: str = Field( - default="HNSW", - pattern=r"^HNSW$", - description="Vector index preference. Qdrant uses HNSW for dense-vector search.", - ) tenant_field: str | None = Field( default=None, max_length=64, description=( - "Optional metadata field name to index for tenant-aware Qdrant " + "Optional metadata field name to index for tenant-aware " "payload filtering in multi-tenant deployments." ), ) @@ -92,12 +84,10 @@ class CollectionResponse(BaseModel): description: str embedding_provider: str embedding_model: str - vector_store_provider: VectorStoreProvider dimension: int chunk_size: int chunk_overlap: int chunk_strategy: str = "paragraph" - index_type: str = "HNSW" tenant_field: str | None = None has_metadata_schema: bool = False document_count: int diff --git a/api/bigrag/models/vector_migration.py b/api/bigrag/models/vector_migration.py deleted file mode 100644 index 71954ee9..00000000 --- a/api/bigrag/models/vector_migration.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import annotations - -from datetime import datetime - -from pydantic import BaseModel, Field - -from bigrag.services.vector_store.base import VectorStoreProvider - - -class VectorMigrationCreateRequest(BaseModel): - collection: str = Field(min_length=1, max_length=128) - target_provider: VectorStoreProvider - - -class VectorMigrationJobResponse(BaseModel): - id: str - collection_id: str | None - collection_name: str - source_provider: VectorStoreProvider - target_provider: VectorStoreProvider - status: str - phase: str - progress: float - copied_points: int - total_points: int | None - details: dict - error_message: str | None = None - created_by: str | None = None - started_at: datetime | None = None - completed_at: datetime | None = None - created_at: datetime - updated_at: datetime - - -class VectorMigrationJobListResponse(BaseModel): - jobs: list[VectorMigrationJobResponse] - total: int | None = None - next_cursor: str | None = None diff --git a/api/bigrag/routers/admin_realtime.py b/api/bigrag/routers/admin_realtime.py index 61e4184d..1b7aa76d 100644 --- a/api/bigrag/routers/admin_realtime.py +++ b/api/bigrag/routers/admin_realtime.py @@ -11,7 +11,6 @@ from bigrag.routers.admin_access import access_overview, list_access_logs from bigrag.routers.admin_audit import list_audit_log from bigrag.routers.admin_backups import list_backup_jobs -from bigrag.routers.admin_vector_migrations import list_vector_migration_jobs from bigrag.routers.collections import get_collection_stats from bigrag.routers.connectors import connector_sources, connector_sync_jobs from bigrag.routers.documents import get_document, list_documents @@ -37,7 +36,6 @@ ACTIVE_SYNC_JOB_STATUSES = {"pending", "running"} ACTIVE_BACKUP_JOB_STATUSES = {"pending", "running"} -ACTIVE_VECTOR_MIGRATION_STATUSES = {"pending", "running", "canceling"} def _parse_document_ids(document_ids: list[str]) -> list[str]: @@ -84,12 +82,6 @@ def _backup_jobs_interval(payload: Any | None) -> float: return 2.0 if active else 15.0 -def _vector_migration_jobs_interval(payload: Any | None) -> float: - jobs = getattr(payload, "jobs", []) if payload is not None else [] - active = any(getattr(job, "status", None) in ACTIVE_VECTOR_MIGRATION_STATUSES for job in jobs) - return 2.0 if active else 15.0 - - @router.get("/collections/{collection_name}/documents", response_class=StreamingResponse) async def collection_documents_stream( collection_name: str, @@ -283,31 +275,6 @@ async def load(): return _interval_response(topic, load, _backup_jobs_interval) -@router.get("/vector-migrations", response_class=StreamingResponse) -async def vector_migration_jobs_stream( - collection: str | None = Query(default=None, max_length=128), - limit: int = Query(default=20, ge=1, le=100), - offset: int = Query(default=0, ge=0), - user: dict = Depends(require_admin_session), -): - topic = f"vector-migrations:{collection or 'all'}:{limit}:{offset}" - - async def load(): - return await _with_session( - lambda session: list_vector_migration_jobs( - collection=collection, - limit=limit, - offset=offset, - cursor=None, - include_total=False, - _=user, - session=session, - ) - ) - - return _interval_response(topic, load, _vector_migration_jobs_interval) - - @router.get("/access/overview", response_class=StreamingResponse) async def access_overview_stream( window_days: int = Query(default=7, ge=1, le=90), diff --git a/api/bigrag/routers/admin_vector_migrations.py b/api/bigrag/routers/admin_vector_migrations.py deleted file mode 100644 index aa623fec..00000000 --- a/api/bigrag/routers/admin_vector_migrations.py +++ /dev/null @@ -1,177 +0,0 @@ -from __future__ import annotations - -from uuid import UUID - -import sqlalchemy as sa -from fastapi import APIRouter, Depends, HTTPException, Query, Request -from sqlalchemy.ext.asyncio import AsyncSession - -from bigrag.db.models import VectorMigrationJob -from bigrag.db.session import get_session -from bigrag.middleware.auth import require_admin_session -from bigrag.models import StatusResponse -from bigrag.models.vector_migration import ( - VectorMigrationCreateRequest, - VectorMigrationJobListResponse, - VectorMigrationJobResponse, -) -from bigrag.routers import uuid_or_404 -from bigrag.services import audit -from bigrag.services.error_sanitize import sanitize_message_text -from bigrag.services.jobs.actors import enqueue_vector_migration_job -from bigrag.services.pagination import apply_cursor, build_response_cursor, decode_cursor_or_400 -from bigrag.services.vector_migration import ( - VectorMigrationConflictError, - VectorMigrationError, - create_vector_migration_job, - delete_vector_migration_job, -) - -router = APIRouter( - prefix="/v1/admin/vector-storage/migrations", - tags=["admin:vector-storage"], -) - - -def vector_migration_job_response(job: VectorMigrationJob) -> VectorMigrationJobResponse: - return VectorMigrationJobResponse( - id=str(job.id), - collection_id=str(job.collection_id) if job.collection_id else None, - collection_name=job.collection_name, - source_provider=job.source_provider, - target_provider=job.target_provider, - status=job.status, - phase=job.phase, - progress=job.progress, - copied_points=job.copied_points, - total_points=job.total_points, - details=job.details or {}, - error_message=job.error_message, - created_by=str(job.created_by) if job.created_by else None, - started_at=job.started_at, - completed_at=job.completed_at, - created_at=job.created_at, - updated_at=job.updated_at, - ) - - -@router.get("", response_model=VectorMigrationJobListResponse) -async def list_vector_migration_jobs( - collection: str | None = Query(default=None, max_length=128), - limit: int = Query(default=20, ge=1, le=100), - offset: int = Query(default=0, ge=0), - cursor: str | None = Query(default=None), - include_total: bool = Query(default=False), - _: dict = Depends(require_admin_session), - session: AsyncSession = Depends(get_session), -) -> VectorMigrationJobListResponse: - cursor_tuple = decode_cursor_or_400(cursor) - stmt = sa.select(VectorMigrationJob).order_by( - VectorMigrationJob.created_at.desc(), - VectorMigrationJob.id.desc(), - ) - count_stmt = sa.select(sa.func.count()).select_from(VectorMigrationJob) - if collection: - stmt = stmt.where(VectorMigrationJob.collection_name == collection) - count_stmt = count_stmt.where(VectorMigrationJob.collection_name == collection) - - if cursor_tuple is not None: - stmt = apply_cursor( - stmt, - VectorMigrationJob.created_at, - VectorMigrationJob.id, - cursor_tuple, - ).limit(limit + 1) - else: - stmt = stmt.limit(limit + 1).offset(offset) - - rows = (await session.scalars(stmt)).all() - page, next_cursor = build_response_cursor(list(rows), "created_at", "id", limit) - - total: int | None = None - if include_total: - total = (await session.scalar(count_stmt)) or 0 - - return VectorMigrationJobListResponse( - jobs=[vector_migration_job_response(job) for job in page], - total=total, - next_cursor=next_cursor, - ) - - -@router.get("/{migration_id}", response_model=VectorMigrationJobResponse) -async def get_vector_migration_job( - migration_id: str, - _: dict = Depends(require_admin_session), - session: AsyncSession = Depends(get_session), -) -> VectorMigrationJobResponse: - try: - target_id = UUID(migration_id) - except ValueError as exc: - raise HTTPException(status_code=400, detail="Invalid migration_id") from exc - job = await session.get(VectorMigrationJob, target_id) - if job is None: - raise HTTPException(status_code=404, detail="Vector migration job not found") - return vector_migration_job_response(job) - - -@router.post("", response_model=VectorMigrationJobResponse, status_code=201) -async def start_vector_migration_job( - body: VectorMigrationCreateRequest, - request: Request, - admin: dict = Depends(require_admin_session), -) -> VectorMigrationJobResponse: - user_id = UUID(admin["id"]) if admin.get("id") else None - try: - job = await create_vector_migration_job( - collection_name=body.collection, - target_provider=body.target_provider, - created_by=user_id, - ) - except VectorMigrationConflictError as exc: - raise HTTPException( - status_code=409, - detail=sanitize_message_text(str(exc)) or "Vector migration cannot be started.", - ) from exc - except VectorMigrationError as exc: - raise HTTPException( - status_code=400, - detail=sanitize_message_text(str(exc)) or "Vector migration cannot be started.", - ) from exc - audit.record( - request, - user=admin, - action="vector_migration.requested", - resource_type="vector_migration_job", - resource_id=str(job.id), - metadata={ - "collection": job.collection_name, - "source_provider": job.source_provider, - "target_provider": job.target_provider, - }, - ) - enqueue_vector_migration_job(str(job.id)) - return vector_migration_job_response(job) - - -@router.delete("/{migration_id}", response_model=StatusResponse) -async def delete_vector_migration_job_route( - migration_id: str, - request: Request, - admin: dict = Depends(require_admin_session), -) -> StatusResponse: - target_id = uuid_or_404(migration_id, "Vector migration job") - result = await delete_vector_migration_job(target_id) - if result is None: - raise HTTPException(status_code=404, detail="Vector migration job not found") - audit.record( - request, - user=admin, - action="vector_migration.delete", - resource_type="vector_migration_job", - resource_id=migration_id, - metadata={"result": result}, - ) - if result == "stop_requested": - return StatusResponse(status="ok", message="Vector migration stop requested") - return StatusResponse(status="ok", message="Vector migration deleted") diff --git a/api/bigrag/routers/admin_vector_storage.py b/api/bigrag/routers/admin_vector_storage.py index 0fffd917..70d70d12 100644 --- a/api/bigrag/routers/admin_vector_storage.py +++ b/api/bigrag/routers/admin_vector_storage.py @@ -22,7 +22,6 @@ async def vector_storage_overview( await session.execute( sa.select( Collection.name, - Collection.vector_store_provider, sa.func.count(Document.id).label("documents"), sa.func.coalesce(sa.func.sum(Document.chunk_count), 0).label("chunks"), sa.func.coalesce(sa.func.sum(Document.file_size), 0).label("bytes"), @@ -35,12 +34,11 @@ async def vector_storage_overview( collections = [ { "name": name, - "provider": provider, "documents": int(documents or 0), "chunks": int(chunks or 0), "bytes": int(bytes_ or 0), } - for name, provider, documents, chunks, bytes_ in rows + for name, documents, chunks, bytes_ in rows ] totals = { "collections": len(collections), diff --git a/api/bigrag/routers/collections.py b/api/bigrag/routers/collections.py index 087ea0d4..381a0663 100644 --- a/api/bigrag/routers/collections.py +++ b/api/bigrag/routers/collections.py @@ -49,12 +49,10 @@ def _collection_response(c: Collection) -> CollectionResponse: description=c.description, embedding_provider=c.embedding_provider, embedding_model=c.embedding_model, - vector_store_provider=c.vector_store_provider, dimension=c.dimension, chunk_size=c.chunk_size, chunk_overlap=c.chunk_overlap, chunk_strategy=c.chunk_strategy, - index_type=c.index_type, tenant_field=c.tenant_field, has_metadata_schema=bool(c.metadata_schema), document_count=c.document_count, @@ -125,7 +123,6 @@ async def create_collection( logger.info( "create collection", name=body.name, - vector_store_provider=body.vector_store_provider, provider=body.embedding_provider, model=body.embedding_model, ) @@ -232,14 +229,12 @@ async def create_collection( collection = Collection( name=body.name, description=body.description, - vector_store_provider=body.vector_store_provider, embedding_provider=provider, embedding_model=model, dimension=dimension, chunk_size=body.chunk_size, chunk_overlap=body.chunk_overlap, chunk_strategy=body.chunk_strategy, - index_type=body.index_type, tenant_field=body.tenant_field, meta=body.metadata, metadata_schema=body.metadata_schema, @@ -260,11 +255,11 @@ async def create_collection( await session.commit() except IntegrityError as e: await session.rollback() - await vector_store.delete_collection(body.name, provider=body.vector_store_provider) + await vector_store.delete_collection(body.name) raise HTTPException(status_code=409, detail="Collection already exists") from e except Exception: await session.rollback() - await vector_store.delete_collection(body.name, provider=body.vector_store_provider) + await vector_store.delete_collection(body.name) raise await session.refresh(collection) await collection_cache.invalidate(body.name) @@ -285,7 +280,6 @@ async def create_collection( metadata={ "name": body.name, "provider": provider, - "vector_store_provider": body.vector_store_provider, "model": model, "dimension": dimension, }, diff --git a/api/bigrag/routers/collections_embedding.py b/api/bigrag/routers/collections_embedding.py index 396a5451..76bcc6de 100644 --- a/api/bigrag/routers/collections_embedding.py +++ b/api/bigrag/routers/collections_embedding.py @@ -14,6 +14,7 @@ from bigrag.services.ingestion_job import create_ingestion_job from bigrag.services.queue import ingestion_queue from bigrag.services.retrieval import invalidate_collection_query_cache +from bigrag.services.vector_store import vector_store logger = get_logger("bigrag.routers.collections_embedding") @@ -47,7 +48,6 @@ async def reembed_collection( "chunk_size": collection.chunk_size, "chunk_overlap": collection.chunk_overlap, "chunk_strategy": collection.chunk_strategy or "paragraph", - "vector_store_provider": collection.vector_store_provider, "tenant_field": collection.tenant_field, } jobs = [ @@ -61,6 +61,8 @@ async def reembed_collection( ] doc_ids = [doc_id for doc_id, _ in docs] + for doc_id in doc_ids: + await vector_store.delete_by_document(name, str(doc_id)) await session.execute( sa.update(Document) .where(Document.id.in_(doc_ids)) diff --git a/api/bigrag/routers/documents.py b/api/bigrag/routers/documents.py index 3c14e52f..19ac88c3 100644 --- a/api/bigrag/routers/documents.py +++ b/api/bigrag/routers/documents.py @@ -286,7 +286,6 @@ async def delete_document( await vector_store.delete_by_document( collection_name, document_id, - provider=collection.get("vector_store_provider"), ) storage = get_storage() await storage.delete(file_path) @@ -333,7 +332,6 @@ async def reprocess_document( await vector_store.delete_by_document( collection_name, document_id, - provider=collection.get("vector_store_provider"), ) doc.status = "pending" @@ -451,7 +449,6 @@ async def get_document_chunks( document_id, limit=limit, offset=offset, - provider=collection.get("vector_store_provider"), ) return {"chunks": chunks, "total": total} diff --git a/api/bigrag/routers/documents_batch.py b/api/bigrag/routers/documents_batch.py index 5c52d16d..8a18bac9 100644 --- a/api/bigrag/routers/documents_batch.py +++ b/api/bigrag/routers/documents_batch.py @@ -241,7 +241,6 @@ async def _delete_one(doc_id: str, doc: Document) -> bool: await vector_store.delete_by_document( collection_name, doc_id, - provider=collection.get("vector_store_provider"), ) storage = get_storage() await storage.delete(doc.file_path) diff --git a/api/bigrag/routers/documents_global.py b/api/bigrag/routers/documents_global.py index 881f8c5d..8c72c566 100644 --- a/api/bigrag/routers/documents_global.py +++ b/api/bigrag/routers/documents_global.py @@ -60,6 +60,5 @@ async def get_document_chunks_global( document_id, limit=limit, offset=offset, - provider=collection.get("vector_store_provider"), ) return {"chunks": chunks, "total": total} diff --git a/api/bigrag/routers/evaluation.py b/api/bigrag/routers/evaluation.py index 9eba0e5e..38b3384e 100644 --- a/api/bigrag/routers/evaluation.py +++ b/api/bigrag/routers/evaluation.py @@ -122,7 +122,6 @@ async def run_evaluation( search_mode=body.search_mode, filters=case_filters, reranking_config=get_reranking_config(collection), - vector_store_provider=collection.get("vector_store_provider"), ) hit_ids = [r.get("document_id") or r.get("id") for r in outcome.results] expected = set(case.relevant_ids) diff --git a/api/bigrag/routers/query.py b/api/bigrag/routers/query.py index 9ab04c50..811073e7 100644 --- a/api/bigrag/routers/query.py +++ b/api/bigrag/routers/query.py @@ -95,7 +95,6 @@ async def query_collection( search_mode=search_mode, reranking_config=get_reranking_config(collection), rerank_override=body.rerank, - vector_store_provider=collection.get("vector_store_provider"), ) logger.info( @@ -214,7 +213,6 @@ async def multi_collection_query( embedding_models = {} reranking_configs = {} - vector_store_providers = {} resolved_collections = await asyncio.gather( *[get_collection_or_404(col_name) for col_name in body.collections] ) @@ -225,7 +223,6 @@ async def multi_collection_query( except (ImportError, ValueError) as e: raise HTTPException(status_code=400, detail=f"Collection '{col_name}': {e}") from e reranking_configs[col_name] = get_reranking_config(collection) - vector_store_providers[col_name] = collection.get("vector_store_provider") or "qdrant" include_multimodal_by_collection = { col_name: bool(body.multimodal and collection.get("multimodal_enabled")) for col_name, collection in zip(body.collections, resolved_collections, strict=True) @@ -241,7 +238,6 @@ async def multi_collection_query( search_mode=body.search_mode, reranking_configs=reranking_configs, rerank_override=body.rerank, - vector_store_providers=vector_store_providers, ) logger.info("multi-query complete", collections=body.collections, results=len(results)) @@ -314,7 +310,6 @@ async def run_one(item: BatchQueryItem) -> tuple[BatchQueryItem, list[dict], int search_mode=item.search_mode, reranking_config=get_reranking_config(collection), rerank_override=item.rerank, - vector_store_provider=collection.get("vector_store_provider"), ) include_multimodal = bool(item.multimodal and collection.get("multimodal_enabled")) diff --git a/api/bigrag/routers/vectors.py b/api/bigrag/routers/vectors.py index b2c27e2c..f673299e 100644 --- a/api/bigrag/routers/vectors.py +++ b/api/bigrag/routers/vectors.py @@ -90,7 +90,6 @@ async def upsert_vectors( embeddings=embeddings, texts=texts, metadata=metadata, - provider=collection.get("vector_store_provider"), ) await invalidate_collection_query_cache(collection_name) logger.info("vector upsert complete", collection=collection_name, upserted=count) @@ -123,12 +122,11 @@ async def delete_vectors( status_code=413, detail=f"Too many vector IDs. Max: {limits['max_vector_delete_count']}", ) - collection = await get_collection_or_404(collection_name) + await get_collection_or_404(collection_name) logger.info("vector delete", collection=collection_name, ids=len(body.ids)) await vector_store.delete_by_ids( collection_name, body.ids, - provider=collection.get("vector_store_provider"), ) await invalidate_collection_query_cache(collection_name) access_log.set_context(request, metadata={"deleted": len(body.ids)}) diff --git a/api/bigrag/services/backup/exporters.py b/api/bigrag/services/backup/exporters.py index 4ff39bb2..bcbdbbf1 100644 --- a/api/bigrag/services/backup/exporters.py +++ b/api/bigrag/services/backup/exporters.py @@ -77,7 +77,6 @@ async def _export_vector_store(temp_dir: Path) -> dict[str, int]: async for point in vector_store.iter_collection_points( collection.name, with_vectors=False, - provider=collection.vector_store_provider, ): f.write(orjson.dumps(_point_payload(point)) + b"\n") count += 1 @@ -88,7 +87,6 @@ async def _export_vector_store(temp_dir: Path) -> dict[str, int]: collections_meta.append( { "collection": collection.name, - "provider": collection.vector_store_provider, "vector_store_collection": collection.name, "exists": exists, "points": count, diff --git a/api/bigrag/services/backup/jobs.py b/api/bigrag/services/backup/jobs.py index e2332b03..085630ff 100644 --- a/api/bigrag/services/backup/jobs.py +++ b/api/bigrag/services/backup/jobs.py @@ -12,7 +12,7 @@ import sqlalchemy as sa from bigrag.db.engine import session_factory -from bigrag.db.models import AuditLog, BackupJob, ConnectorSyncJob, VectorMigrationJob +from bigrag.db.models import AuditLog, BackupJob, ConnectorSyncJob from bigrag.logging import get_logger from bigrag.services.maintenance import acquire_backup_lock, active_lock, release_backup_lock from bigrag.services.queue import ingestion_queue @@ -25,8 +25,6 @@ logger = get_logger("bigrag.backup") -ACTIVE_VECTOR_MIGRATION_STATUSES = ("pending", "running", "canceling") - async def create_backup_job(*, label: str, created_by: uuid.UUID | None) -> BackupJob: lock = await active_lock() @@ -43,17 +41,6 @@ async def create_backup_job(*, label: str, created_by: uuid.UUID | None) -> Back ) if active is not None: raise BackupConfigError("A backup is already pending or running") - active_migration = await session.scalar( - sa.select(VectorMigrationJob) - .where(VectorMigrationJob.status.in_(ACTIVE_VECTOR_MIGRATION_STATUSES)) - .order_by(VectorMigrationJob.created_at.desc()) - .limit(1) - .with_for_update() - ) - if active_migration is not None: - raise BackupConfigError( - "A vector migration is already pending, running, or canceling" - ) job = BackupJob(label=label.strip(), created_by=created_by) session.add(job) await session.refresh(job) diff --git a/api/bigrag/services/chat/questions/api.py b/api/bigrag/services/chat/questions/api.py index 03a4bb9a..1fe2cbbf 100644 --- a/api/bigrag/services/chat/questions/api.py +++ b/api/bigrag/services/chat/questions/api.py @@ -192,7 +192,6 @@ async def _sample_chunks( str(document.id), limit=CHUNK_LIMIT, offset=offset, - provider=collection.get("vector_store_provider"), ) for chunk in chunks: item = dict(chunk) diff --git a/api/bigrag/services/chat/turn/prepare.py b/api/bigrag/services/chat/turn/prepare.py index f8484d2b..617a17fe 100644 --- a/api/bigrag/services/chat/turn/prepare.py +++ b/api/bigrag/services/chat/turn/prepare.py @@ -90,7 +90,6 @@ async def _prepare_chat_turn( search_mode=search_mode, reranking_config=get_reranking_config(collection), rerank_override=rerank, - vector_store_provider=collection.get("vector_store_provider"), ) sources = await _sources_from_results(session, outcome.results) timings = ChatTimings( diff --git a/api/bigrag/services/collection_cache.py b/api/bigrag/services/collection_cache.py index de44635b..6d13f7d7 100644 --- a/api/bigrag/services/collection_cache.py +++ b/api/bigrag/services/collection_cache.py @@ -34,7 +34,6 @@ def _serialize(c: Collection, preset: EmbeddingPreset | None = None) -> dict: "embedding_preset_id": str(c.embedding_preset_id) if c.embedding_preset_id else None, "embedding_preset_api_key": (preset.api_key if preset else None) if crypto_ready else None, "embedding_preset_base_url": preset.base_url if preset else None, - "vector_store_provider": c.vector_store_provider, "dimension": c.dimension, "chunk_size": c.chunk_size, "chunk_overlap": c.chunk_overlap, @@ -48,7 +47,6 @@ def _serialize(c: Collection, preset: EmbeddingPreset | None = None) -> dict: "reranking_api_key": c.reranking_api_key if crypto_ready else None, "multimodal_enabled": c.multimodal_enabled, "multimodal_enrichment_enabled": c.multimodal_enrichment_enabled, - "index_type": c.index_type, "tenant_field": c.tenant_field, "metadata_schema": c.metadata_schema, "metadata": c.meta or {}, diff --git a/api/bigrag/services/collection_provision.py b/api/bigrag/services/collection_provision.py index 4dca1db4..8afd4683 100644 --- a/api/bigrag/services/collection_provision.py +++ b/api/bigrag/services/collection_provision.py @@ -36,21 +36,19 @@ async def verify_embedding_credentials( ) from exc -def vector_store_unavailable_detail(provider: str) -> str: - if provider == "turbopuffer": - return ( - "turbopuffer is not configured. Save a turbopuffer API key in Vector Storage " - "before creating a turbopuffer collection." - ) - return f"{provider} vector store is not configured." +def vector_store_unavailable_detail() -> str: + return ( + "Turbopuffer is not configured. Save a turbopuffer API key in Vector Storage " + "before creating a collection." + ) -def ensure_vector_store_provider_available(provider: str) -> None: - if provider in vector_store.configured_providers: +def ensure_vector_store_available() -> None: + if vector_store.provider in vector_store.configured_providers: return raise HTTPException( status_code=400, - detail=vector_store_unavailable_detail(provider), + detail=vector_store_unavailable_detail(), ) @@ -58,48 +56,38 @@ async def create_vector_store_collection( body: CreateCollectionRequest, dimension: int, ) -> None: - ensure_vector_store_provider_available(body.vector_store_provider) + ensure_vector_store_available() try: await vector_store.create_collection( body.name, dimension, - index_type=body.index_type, tenant_field=body.tenant_field, - provider=body.vector_store_provider, ) except RuntimeError as e: message = str(e) if "API key is not configured" in message or "client is not connected" in message: raise HTTPException( status_code=400, - detail=vector_store_unavailable_detail(body.vector_store_provider), + detail=vector_store_unavailable_detail(), ) from e logger.warning( "vector collection create failed", collection=body.name, - vector_store_provider=body.vector_store_provider, error_type=e.__class__.__name__, error=message, ) raise HTTPException( status_code=502, - detail=( - f"Unable to create {body.vector_store_provider} vector collection. " - "Check Vector Storage settings." - ), + detail="Unable to create vector collection. Check Vector Storage settings.", ) from e except httpx.HTTPError as e: logger.warning( "vector collection create failed", collection=body.name, - vector_store_provider=body.vector_store_provider, error_type=e.__class__.__name__, error=str(e), ) raise HTTPException( status_code=502, - detail=( - f"Unable to create {body.vector_store_provider} vector collection. " - "Check Vector Storage settings." - ), + detail="Unable to create vector collection. Check Vector Storage settings.", ) from e diff --git a/api/bigrag/services/collections.py b/api/bigrag/services/collections.py index cf7d72d5..a6a7533a 100644 --- a/api/bigrag/services/collections.py +++ b/api/bigrag/services/collections.py @@ -25,14 +25,13 @@ async def delete_collection(session: AsyncSession, name: str) -> str: logger.info("delete collection jobs cancelled", collection=name, flushed=flushed) deleted_id = str(collection.id) - vector_store_provider = collection.vector_store_provider await session.delete(collection) await session.commit() await collection_cache.invalidate(name) await invalidate_collection_query_cache(name) logger.info("delete collection database records removed", collection=name) - await vector_store.delete_collection(name, provider=vector_store_provider) + await vector_store.delete_collection(name) logger.info("delete collection vectors dropped", collection=name) deleted = await get_storage().delete_prefix(f"{name}/") @@ -51,7 +50,6 @@ async def truncate_collection(session: AsyncSession, name: str) -> str: logger.info("truncate collection jobs cancelled", collection=name, flushed=flushed) collection_id = str(collection.id) - vector_store_provider = collection.vector_store_provider await session.execute(sa.delete(Document).where(Document.collection_id == collection.id)) await session.execute( sa.update(Collection).where(Collection.id == collection.id).values(document_count=0) @@ -61,7 +59,7 @@ async def truncate_collection(session: AsyncSession, name: str) -> str: await invalidate_collection_query_cache(name) logger.info("truncate collection documents removed", collection=name) - await vector_store.delete_collection(name, provider=vector_store_provider) + await vector_store.delete_collection(name) logger.info("truncate collection vectors cleared", collection=name) deleted = await get_storage().delete_prefix(f"{name}/") diff --git a/api/bigrag/services/connectors/documents.py b/api/bigrag/services/connectors/documents.py index 19844259..72c5e9f0 100644 --- a/api/bigrag/services/connectors/documents.py +++ b/api/bigrag/services/connectors/documents.py @@ -104,7 +104,6 @@ async def _put_downloaded(storage_key: str) -> None: await vector_store.delete_by_document( source.collection_name, str(doc.id), - provider=collection.vector_store_provider, ) old_path = doc.file_path storage_key = f"{source.collection_name}/{doc.id}{downloaded.file_ext}" @@ -156,7 +155,6 @@ async def delete_synced_document( await vector_store.delete_by_document( source.collection_name, str(doc.id), - provider=collection.vector_store_provider, ) storage = get_storage() await storage.delete(doc.file_path) diff --git a/api/bigrag/services/connectors/manifest.py b/api/bigrag/services/connectors/manifest.py index e4358686..6b869b59 100644 --- a/api/bigrag/services/connectors/manifest.py +++ b/api/bigrag/services/connectors/manifest.py @@ -26,7 +26,6 @@ def collection_dict_for_sync(collection: Collection) -> dict[str, Any]: "chunk_size": collection.chunk_size, "chunk_overlap": collection.chunk_overlap, "chunk_strategy": collection.chunk_strategy or "paragraph", - "vector_store_provider": collection.vector_store_provider, "tenant_field": collection.tenant_field, "metadata_schema": collection.metadata_schema, } diff --git a/api/bigrag/services/health.py b/api/bigrag/services/health.py index cacd6952..89aa6232 100644 --- a/api/bigrag/services/health.py +++ b/api/bigrag/services/health.py @@ -211,11 +211,6 @@ async def _check_redis(): healthy = False else: checks[name] = True - checks["vector_store_provider"] = "per_collection" - checks["qdrant"] = ( - checks["vector_store"] if "qdrant" in getattr(vs, "configured_providers", ()) else None - ) - embedding_result = await check_embedding_provider() checks.update(embedding_result) if not embedding_result.get("embedding"): diff --git a/api/bigrag/services/ingestion_job.py b/api/bigrag/services/ingestion_job.py index 3bad2281..00e928d3 100644 --- a/api/bigrag/services/ingestion_job.py +++ b/api/bigrag/services/ingestion_job.py @@ -17,7 +17,6 @@ class IngestionJob: chunk_size: int chunk_overlap: int chunk_strategy: str = "paragraph" - vector_store_provider: str = "qdrant" tenant_field: str | None = None embedding_base_url: str | None = None multimodal_enabled: bool = False @@ -45,7 +44,6 @@ def serialize(self) -> bytes: "chunk_size": self.chunk_size, "chunk_overlap": self.chunk_overlap, "chunk_strategy": self.chunk_strategy, - "vector_store_provider": self.vector_store_provider, "tenant_field": self.tenant_field, "attempt": self.attempt, "max_attempts": self.max_attempts, @@ -79,7 +77,6 @@ def create_ingestion_job( chunk_size=collection["chunk_size"], chunk_overlap=collection["chunk_overlap"], chunk_strategy=collection.get("chunk_strategy") or "paragraph", - vector_store_provider=collection.get("vector_store_provider") or "qdrant", tenant_field=collection.get("tenant_field"), multimodal_enabled=bool(collection.get("multimodal_enabled")), multimodal_enrichment_enabled=bool(collection.get("multimodal_enrichment_enabled")), diff --git a/api/bigrag/services/jobs/actors.py b/api/bigrag/services/jobs/actors.py index c4dfd69b..f24204ad 100644 --- a/api/bigrag/services/jobs/actors.py +++ b/api/bigrag/services/jobs/actors.py @@ -71,10 +71,6 @@ def enqueue_backup_job(job_id: str) -> None: run_backup.send(job_id) -def enqueue_vector_migration_job(job_id: str) -> None: - run_vector_migration.send(job_id) - - def seed_periodic_jobs(enabled_queues: set[str] | None = None) -> None: if enabled_queues is None or MAINTENANCE_QUEUE in enabled_queues: _schedule_sync( @@ -227,18 +223,6 @@ async def _run_backup(job_id: str) -> None: await run_backup_job(job_id) -@dramatiq.actor(queue_name=MAINTENANCE_QUEUE, max_retries=0, broker=broker) -def run_vector_migration(job_id: str) -> None: - _run(_run_vector_migration, job_id) - - -async def _run_vector_migration(job_id: str) -> None: - await ensure_worker_runtime() - from bigrag.services.vector_migration import run_vector_migration_job - - await run_vector_migration_job(job_id) - - @dramatiq.actor(queue_name=MAINTENANCE_QUEUE, max_retries=0, broker=broker) def run_cleanup() -> None: try: diff --git a/api/bigrag/services/jobs/runtime.py b/api/bigrag/services/jobs/runtime.py index 6fab5c0a..4214654d 100644 --- a/api/bigrag/services/jobs/runtime.py +++ b/api/bigrag/services/jobs/runtime.py @@ -64,21 +64,16 @@ async def ensure_worker_runtime() -> None: runtime = await runtime_settings.get_values( [ "ingestion_workers", - "qdrant_connect_timeout_seconds", - "qdrant_required", - "qdrant_search_ef", - "qdrant_url", "turbopuffer_api_key", + "turbopuffer_base_url", "turbopuffer_namespace_prefix", "turbopuffer_region", ] ) logger.info("worker runtime settings loaded") vector_store.configure( - qdrant_url=runtime["qdrant_url"], - connect_timeout_seconds=runtime["qdrant_connect_timeout_seconds"], - search_ef=runtime["qdrant_search_ef"], turbopuffer_api_key=runtime["turbopuffer_api_key"], + turbopuffer_base_url=runtime["turbopuffer_base_url"], turbopuffer_region=runtime["turbopuffer_region"], turbopuffer_namespace_prefix=runtime["turbopuffer_namespace_prefix"], ) diff --git a/api/bigrag/services/queue_embedding/insert.py b/api/bigrag/services/queue_embedding/insert.py index 82242a67..8479caab 100644 --- a/api/bigrag/services/queue_embedding/insert.py +++ b/api/bigrag/services/queue_embedding/insert.py @@ -107,7 +107,6 @@ async def chunk_and_embed( job.collection_name, job.embedding_dimension, tenant_field=getattr(job, "tenant_field", None), - provider=job.vector_store_provider, ) await ensure_job_current(job) @@ -246,7 +245,6 @@ async def _embed_one_bounded(bn, bs, be, bc): texts=batch_texts, embeddings=embeddings, metadata=metadata, - provider=job.vector_store_provider, ) try: await ensure_job_current(job) @@ -254,7 +252,6 @@ async def _embed_one_bounded(bn, bs, be, bc): await vector_store.delete_by_document( job.collection_name, doc, - provider=job.vector_store_provider, ) raise insert_elapsed = time.monotonic() - t1 @@ -266,7 +263,6 @@ async def _embed_one_bounded(bn, bs, be, bc): await vector_store.delete_by_ids( job.collection_name, [f"{doc}_{i}" for i in range(batch_start, batch_end)], - provider=job.vector_store_provider, ) except Exception as cleanup_exc: logger.warning( diff --git a/api/bigrag/services/runtime_setting_specs/vector_store.py b/api/bigrag/services/runtime_setting_specs/vector_store.py index 1080ec7f..c7f51aa4 100644 --- a/api/bigrag/services/runtime_setting_specs/vector_store.py +++ b/api/bigrag/services/runtime_setting_specs/vector_store.py @@ -3,32 +3,6 @@ from bigrag.services.runtime_setting_specs._spec import SettingSpec VECTOR_STORE_SPECS: tuple[SettingSpec, ...] = ( - SettingSpec( - key="qdrant_url", - group="vector_store", - label="Qdrant URL", - kind="string", - default="http://localhost:6333", - description="Qdrant connection URL.", - ), - SettingSpec( - key="qdrant_connect_timeout_seconds", - group="vector_store", - label="Qdrant connect timeout", - kind="int", - default=10, - description="Qdrant startup connection timeout in seconds.", - min=0, - max=300, - ), - SettingSpec( - key="qdrant_required", - group="vector_store", - label="Require vector store", - kind="bool", - default=False, - description="Fail startup if configured vector-store clients cannot be reached.", - ), SettingSpec( key="turbopuffer_api_key", group="vector_store", @@ -38,6 +12,14 @@ description="turbopuffer API key.", secret=True, ), + SettingSpec( + key="turbopuffer_base_url", + group="vector_store", + label="turbopuffer base URL", + kind="string", + default=None, + description="Optional turbopuffer API base URL.", + ), SettingSpec( key="turbopuffer_region", group="vector_store", @@ -54,14 +36,4 @@ default="bigrag_", description="Prefix prepended to turbopuffer namespace names.", ), - SettingSpec( - key="qdrant_search_ef", - group="vector_store", - label="Qdrant search ef", - kind="int", - default=None, - description="Optional Qdrant HNSW search ef override.", - min=1, - max=10000, - ), ) diff --git a/api/bigrag/services/runtime_settings_apply.py b/api/bigrag/services/runtime_settings_apply.py index 711d5549..f9784f4c 100644 --- a/api/bigrag/services/runtime_settings_apply.py +++ b/api/bigrag/services/runtime_settings_apply.py @@ -42,10 +42,8 @@ } VECTOR_CONFIG_KEYS = { - "qdrant_connect_timeout_seconds", - "qdrant_search_ef", - "qdrant_url", "turbopuffer_api_key", + "turbopuffer_base_url", "turbopuffer_namespace_prefix", "turbopuffer_region", } @@ -155,8 +153,9 @@ async def _prepare_vector_backend(values: dict[str, Any]) -> VectorStore: store = VectorStore() _configure_vector_store(store, values) try: - store.connect() - await store.health_check() + if values.get("turbopuffer_api_key"): + store.connect() + await store.health_check() return store except Exception: await store.close() @@ -165,10 +164,8 @@ async def _prepare_vector_backend(values: dict[str, Any]) -> VectorStore: def _configure_vector_store(store: VectorStore, values: dict[str, Any]) -> None: store.configure( - qdrant_url=values["qdrant_url"], - connect_timeout_seconds=values["qdrant_connect_timeout_seconds"], - search_ef=values["qdrant_search_ef"], turbopuffer_api_key=values["turbopuffer_api_key"], + turbopuffer_base_url=values["turbopuffer_base_url"], turbopuffer_region=values["turbopuffer_region"], turbopuffer_namespace_prefix=values["turbopuffer_namespace_prefix"], ) diff --git a/api/bigrag/services/vector_migration/__init__.py b/api/bigrag/services/vector_migration/__init__.py deleted file mode 100644 index 28c5b5a8..00000000 --- a/api/bigrag/services/vector_migration/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import annotations - -from bigrag.services.vector_migration.jobs import ( - VectorMigrationConflictError, - VectorMigrationError, - create_vector_migration_job, - delete_vector_migration_job, - run_vector_migration_job, -) - -__all__ = [ - "VectorMigrationConflictError", - "VectorMigrationError", - "create_vector_migration_job", - "delete_vector_migration_job", - "run_vector_migration_job", -] diff --git a/api/bigrag/services/vector_migration/jobs.py b/api/bigrag/services/vector_migration/jobs.py deleted file mode 100644 index b4cf092c..00000000 --- a/api/bigrag/services/vector_migration/jobs.py +++ /dev/null @@ -1,522 +0,0 @@ -from __future__ import annotations - -import asyncio -import uuid -from datetime import UTC, datetime -from typing import Any - -import sqlalchemy as sa - -from bigrag.db.engine import session_factory -from bigrag.db.models import AuditLog, BackupJob, Collection, ConnectorSyncJob, VectorMigrationJob -from bigrag.logging import get_logger -from bigrag.services import collection_cache -from bigrag.services.error_sanitize import sanitize_message_text -from bigrag.services.maintenance import ( - acquire_maintenance_lock, - active_lock, - release_maintenance_lock, -) -from bigrag.services.queue import ingestion_queue -from bigrag.services.retrieval import invalidate_collection_query_cache -from bigrag.services.runtime_settings import get_value -from bigrag.services.vector_store import vector_store -from bigrag.services.vector_store._util import validate_provider -from bigrag.services.vector_store.base import _FIXED_PAYLOAD_FIELDS - -logger = get_logger("bigrag.vector_migration") - -ACTIVE_STATUSES = ("pending", "running", "canceling") -ACTIVE_BACKUP_STATUSES = ("pending", "running") - - -class VectorMigrationError(RuntimeError): - pass - - -class VectorMigrationConflictError(VectorMigrationError): - pass - - -class VectorMigrationCanceledError(VectorMigrationError): - pass - - -async def create_vector_migration_job( - *, - collection_name: str, - target_provider: str, - created_by: uuid.UUID | None, -) -> VectorMigrationJob: - target = validate_provider(target_provider) - lock = await active_lock() - if lock is not None: - raise VectorMigrationConflictError(f"Instance maintenance active: {lock.reason}") - if target not in vector_store.configured_providers: - raise VectorMigrationError(f"{target} vector store is not configured") - async with session_factory()() as session: - async with session.begin(): - active_backup = await session.scalar( - sa.select(BackupJob) - .where(BackupJob.status.in_(ACTIVE_BACKUP_STATUSES)) - .order_by(BackupJob.created_at.desc()) - .limit(1) - .with_for_update() - ) - if active_backup is not None: - raise VectorMigrationConflictError("A backup is already pending or running") - active = await session.scalar( - sa.select(VectorMigrationJob) - .where(VectorMigrationJob.status.in_(ACTIVE_STATUSES)) - .order_by(VectorMigrationJob.created_at.desc()) - .limit(1) - .with_for_update() - ) - if active is not None: - raise VectorMigrationConflictError( - "A vector migration is already pending, running, or canceling" - ) - collection = await session.scalar( - sa.select(Collection).where(Collection.name == collection_name).with_for_update() - ) - if collection is None: - raise VectorMigrationError("Collection not found") - source = validate_provider(collection.vector_store_provider) - if source == target: - raise VectorMigrationConflictError("Collection already uses that vector provider") - job = VectorMigrationJob( - collection_id=collection.id, - collection_name=collection.name, - source_provider=source, - target_provider=target, - created_by=created_by, - ) - session.add(job) - await session.refresh(job) - return job - - -async def delete_vector_migration_job(job_id: uuid.UUID) -> str | None: - async with session_factory()() as session: - async with session.begin(): - job = await session.scalar( - sa.select(VectorMigrationJob) - .where(VectorMigrationJob.id == job_id) - .with_for_update() - ) - if job is None: - return None - if job.status == "pending" or job.status in {"succeeded", "failed"}: - await session.delete(job) - return "deleted" - details = dict(job.details or {}) - details["delete_requested"] = True - job.details = details - job.status = "canceling" - job.phase = "canceling" - job.updated_at = datetime.now(UTC) - return "stop_requested" - - -async def run_vector_migration_job(job_id: str) -> None: - owner_id = uuid.UUID(job_id) - locked = False - try: - job = await _get_job(owner_id) - if job is None: - return - if job.status != "pending": - return - locked = await acquire_maintenance_lock( - owner_id, - reason=f"vector migration for {job.collection_name}", - metadata={ - "collection": job.collection_name, - "source_provider": job.source_provider, - "target_provider": job.target_provider, - }, - ) - if not locked: - await _fail_job(owner_id, "Another maintenance lock is active") - return - if not await _mark_running(owner_id): - return - await _raise_if_delete_requested(owner_id) - await _wait_for_connector_sync_drain(owner_id) - await _wait_for_ingestion_drain(owner_id) - await _run_locked_migration(owner_id) - except VectorMigrationCanceledError: - await _delete_job(owner_id, "vector_migration.deleted", {"reason": "canceled"}) - except Exception as exc: - logger.exception("vector migration failed", job_id=job_id, error=str(exc)) - await _fail_job(owner_id, sanitize_message_text(str(exc)) or "Vector migration failed") - finally: - if locked: - await release_maintenance_lock(owner_id) - - -async def _run_locked_migration(job_id: uuid.UUID) -> None: - job, collection = await _load_job_and_collection(job_id) - if job is None: - return - if collection is None: - await _fail_job(job_id, "Collection not found") - return - source = validate_provider(job.source_provider) - target = validate_provider(job.target_provider) - cutover_done = False - copied = 0 - try: - if collection.vector_store_provider != source: - raise VectorMigrationError( - "Collection vector provider changed before migration started" - ) - await _raise_if_delete_requested(job_id) - await _update_job(job_id, phase="provisioning", progress=0.2) - await vector_store.delete_collection(collection.name, provider=target) - await _raise_if_delete_requested(job_id) - await vector_store.create_collection( - collection.name, - collection.dimension, - index_type=collection.index_type, - tenant_field=collection.tenant_field, - provider=target, - ) - await _raise_if_delete_requested(job_id) - copied = await _copy_points(job_id, collection.name, source, target) - await _raise_if_delete_requested(job_id) - await _update_job( - job_id, - phase="verifying", - progress=0.86, - copied_points=copied, - total_points=copied, - ) - target_count = 0 if copied == 0 else await _count_points(collection.name, target) - if target_count != copied: - raise VectorMigrationError( - f"Target point count mismatch: copied {copied}, target has {target_count}" - ) - await _raise_if_delete_requested(job_id) - await _cutover_collection(job_id, collection.id, collection.name, source, target) - cutover_done = True - await _update_job( - job_id, - phase="cleanup", - progress=0.94, - copied_points=copied, - total_points=copied, - ) - await vector_store.delete_collection(collection.name, provider=source) - await _complete_job(job_id, copied) - except VectorMigrationCanceledError: - if not cutover_done: - try: - await vector_store.delete_collection(collection.name, provider=target) - except Exception as cleanup_exc: - logger.warning( - "canceled vector migration cleanup failed", - collection=collection.name, - target_provider=target, - error=str(cleanup_exc), - ) - await _delete_job(job_id, "vector_migration.deleted", {"reason": "canceled"}) - except Exception as exc: - message = sanitize_message_text(str(exc)) or "Vector migration failed" - if not cutover_done: - try: - await vector_store.delete_collection(collection.name, provider=target) - except Exception as cleanup_exc: - logger.warning( - "partial vector migration cleanup failed", - collection=collection.name, - target_provider=target, - error=str(cleanup_exc), - ) - await _fail_job( - job_id, - message, - phase="cleanup_failed" if cutover_done else "failed", - copied_points=copied, - total_points=copied or None, - ) - - -async def _copy_points( - job_id: uuid.UUID, - collection: str, - source: str, - target: str, -) -> int: - batch_size = max(1, min(int(await get_value("ingestion_batch_size")), 1000)) - copied = 0 - batch: list[dict[str, Any]] = [] - await _raise_if_delete_requested(job_id) - await _update_job(job_id, phase="copying", progress=0.32) - async for point in vector_store.iter_collection_points( - collection, - with_vectors=True, - provider=source, - ): - await _raise_if_delete_requested(job_id) - batch.append(_normalise_point(point)) - if len(batch) >= batch_size: - copied += await _insert_batch(collection, target, batch) - batch.clear() - await _update_job( - job_id, - copied_points=copied, - progress=min(0.84, 0.34 + copied / (copied + batch_size) * 0.48), - ) - if batch: - await _raise_if_delete_requested(job_id) - copied += await _insert_batch(collection, target, batch) - await _update_job(job_id, copied_points=copied, progress=0.84) - return copied - - -def _normalise_point(point: dict[str, Any]) -> dict[str, Any]: - payload = dict(point.get("payload") or {}) - vector = point.get("vector") - if vector is None: - raise VectorMigrationError("Source point is missing its vector") - public_id = str(payload.get("id") or point.get("id") or "") - if not public_id: - raise VectorMigrationError("Source point is missing its id") - return { - "id": public_id, - "document_id": str(payload.get("document_id") or ""), - "chunk_index": int(payload.get("chunk_index") or 0), - "text": str(payload.get("text") or ""), - "vector": vector, - "metadata": { - k: v for k, v in payload.items() if k not in _FIXED_PAYLOAD_FIELDS and v is not None - }, - } - - -async def _insert_batch( - collection: str, - target: str, - batch: list[dict[str, Any]], -) -> int: - return await vector_store.insert( - collection=collection, - ids=[item["id"] for item in batch], - document_ids=[item["document_id"] for item in batch], - chunk_indices=[item["chunk_index"] for item in batch], - texts=[item["text"] for item in batch], - embeddings=[item["vector"] for item in batch], - metadata=[item["metadata"] for item in batch], - provider=target, - ) - - -async def _count_points(collection: str, provider: str) -> int: - count = 0 - async for _point in vector_store.iter_collection_points( - collection, - with_vectors=False, - provider=provider, - ): - count += 1 - return count - - -async def _cutover_collection( - job_id: uuid.UUID, - collection_id: uuid.UUID, - collection_name: str, - source: str, - target: str, -) -> None: - await _update_job(job_id, phase="cutover", progress=0.9) - async with session_factory()() as session: - async with session.begin(): - collection = await session.scalar( - sa.select(Collection).where(Collection.id == collection_id).with_for_update() - ) - if collection is None: - raise VectorMigrationError("Collection not found during cutover") - if collection.vector_store_provider != source: - raise VectorMigrationError("Collection vector provider changed during migration") - collection.vector_store_provider = target - await collection_cache.invalidate(collection_name) - await invalidate_collection_query_cache(collection_name) - - -async def _wait_for_ingestion_drain(job_id: uuid.UUID, max_wait_seconds: int = 1800) -> None: - deadline = asyncio.get_event_loop().time() + max_wait_seconds - while True: - await _raise_if_delete_requested(job_id) - stats = await ingestion_queue.stats - processing = int(stats.get("processing") or 0) - if processing <= 0: - return - if asyncio.get_event_loop().time() >= deadline: - raise VectorMigrationError( - f"Timed out waiting for ingestion drain after {max_wait_seconds}s" - ) - await _update_job(job_id, phase="draining", progress=0.12) - await asyncio.sleep(1) - - -async def _wait_for_connector_sync_drain(job_id: uuid.UUID, max_wait_seconds: int = 1800) -> None: - deadline = asyncio.get_event_loop().time() + max_wait_seconds - while True: - await _raise_if_delete_requested(job_id) - async with session_factory()() as session: - running = await session.scalar( - sa.select(sa.func.count()) - .select_from(ConnectorSyncJob) - .where(ConnectorSyncJob.status == "running") - ) - if int(running or 0) <= 0: - return - if asyncio.get_event_loop().time() >= deadline: - raise VectorMigrationError( - f"Timed out waiting for connector sync drain after {max_wait_seconds}s" - ) - await _update_job(job_id, phase="draining", progress=0.08) - await asyncio.sleep(1) - - -async def _load_job_and_collection( - job_id: uuid.UUID, -) -> tuple[VectorMigrationJob | None, Collection | None]: - async with session_factory()() as session: - job = await session.get(VectorMigrationJob, job_id) - if job is None: - return None, None - collection = await session.scalar( - sa.select(Collection).where(Collection.name == job.collection_name) - ) - return job, collection - - -async def _get_job(job_id: uuid.UUID) -> VectorMigrationJob | None: - async with session_factory()() as session: - return await session.get(VectorMigrationJob, job_id) - - -async def _mark_running(job_id: uuid.UUID) -> bool: - updated = await _update_job( - job_id, - status="running", - phase="draining", - progress=0.04, - started_at=datetime.now(UTC), - ) - if updated <= 0: - return False - await _insert_audit(job_id, "vector_migration.start", {}) - return True - - -async def _complete_job(job_id: uuid.UUID, copied_points: int) -> None: - await _update_job( - job_id, - status="succeeded", - phase="complete", - progress=1.0, - copied_points=copied_points, - total_points=copied_points, - completed_at=datetime.now(UTC), - ) - await _insert_audit(job_id, "vector_migration.succeeded", {"copied_points": copied_points}) - if await _delete_requested(job_id): - await _delete_job( - job_id, - "vector_migration.deleted", - {"reason": "completed_after_delete_request"}, - ) - - -async def _fail_job( - job_id: uuid.UUID, - message: str, - *, - phase: str = "failed", - copied_points: int | None = None, - total_points: int | None = None, -) -> None: - values: dict[str, Any] = { - "status": "failed", - "phase": phase, - "error_message": sanitize_message_text(message), - "completed_at": datetime.now(UTC), - } - if copied_points is not None: - values["copied_points"] = copied_points - if total_points is not None: - values["total_points"] = total_points - await _update_job(job_id, **values) - await _insert_audit(job_id, "vector_migration.failed", {"error": values["error_message"]}) - if await _delete_requested(job_id): - await _delete_job( - job_id, - "vector_migration.deleted", - {"reason": "failed_after_delete_request"}, - ) - - -async def _update_job(job_id: uuid.UUID, **values: Any) -> int: - async with session_factory()() as session: - values["updated_at"] = sa.func.now() - result = await session.execute( - sa.update(VectorMigrationJob).where(VectorMigrationJob.id == job_id).values(**values) - ) - await session.commit() - return result.rowcount or 0 - - -async def _insert_audit(job_id: uuid.UUID, action: str, metadata: dict[str, Any]) -> None: - async with session_factory()() as session: - job = await session.get(VectorMigrationJob, job_id) - session.add( - AuditLog( - actor_id=job.created_by if job else None, - actor_email=None, - api_key_id=None, - action=action, - resource_type="vector_migration_job", - resource_id=str(job_id), - meta=metadata, - ip=None, - user_agent=None, - ) - ) - await session.commit() - - -async def _delete_requested(job_id: uuid.UUID) -> bool: - job = await _get_job(job_id) - if job is None: - return False - return job.status == "canceling" or bool((job.details or {}).get("delete_requested")) - - -async def _raise_if_delete_requested(job_id: uuid.UUID) -> None: - if await _delete_requested(job_id): - raise VectorMigrationCanceledError("Vector migration deletion requested") - - -async def _delete_job(job_id: uuid.UUID, action: str, metadata: dict[str, Any]) -> None: - async with session_factory()() as session: - job = await session.get(VectorMigrationJob, job_id) - session.add( - AuditLog( - actor_id=job.created_by if job else None, - actor_email=None, - api_key_id=None, - action=action, - resource_type="vector_migration_job", - resource_id=str(job_id), - meta=metadata, - ip=None, - user_agent=None, - ) - ) - if job is not None: - await session.delete(job) - await session.commit() diff --git a/api/bigrag/services/vector_store/_util.py b/api/bigrag/services/vector_store/_util.py index 4b847419..20c39cf4 100644 --- a/api/bigrag/services/vector_store/_util.py +++ b/api/bigrag/services/vector_store/_util.py @@ -7,7 +7,7 @@ logger = get_logger("bigrag.vector_store") -PROVIDERS: tuple[VectorStoreProvider, ...] = ("qdrant", "turbopuffer") +PROVIDERS: tuple[VectorStoreProvider, ...] = ("turbopuffer",) def validate_provider(value: str) -> VectorStoreProvider: diff --git a/api/bigrag/services/vector_store/base.py b/api/bigrag/services/vector_store/base.py index 6992ef0f..d59f98de 100644 --- a/api/bigrag/services/vector_store/base.py +++ b/api/bigrag/services/vector_store/base.py @@ -6,7 +6,7 @@ from bigrag.services._retrieval_filters import FilterExpression -VectorStoreProvider = Literal["qdrant", "turbopuffer"] +VectorStoreProvider = Literal["turbopuffer"] _POINT_NAMESPACE = uuid.UUID("1b04f7ca-0c3b-5d76-a5bb-6e4b4a40f61d") _FIXED_PAYLOAD_FIELDS = {"id", "text", "document_id", "chunk_index", "embedding"} @@ -29,7 +29,6 @@ class VectorStoreFeatureError(RuntimeError): class VectorStoreBackend(Protocol): provider: VectorStoreProvider - supports_text_search: bool def connect(self) -> None: ... diff --git a/api/bigrag/services/vector_store/facade.py b/api/bigrag/services/vector_store/facade.py index 956a9139..b02daed4 100644 --- a/api/bigrag/services/vector_store/facade.py +++ b/api/bigrag/services/vector_store/facade.py @@ -6,24 +6,20 @@ from typing import Any from bigrag.config import settings as _app_settings -from bigrag.logging import get_logger from bigrag.services._retrieval_filters import FilterExpression -from bigrag.services.error_sanitize import sanitize_message_text -from bigrag.services.vector_store._util import PROVIDERS, close_backends, validate_provider -from bigrag.services.vector_store.base import VectorStoreBackend, VectorStoreProvider -from bigrag.services.vector_store.qdrant import QdrantVectorStore +from bigrag.services.vector_store._util import close_backends +from bigrag.services.vector_store.base import VectorStoreBackend from bigrag.services.vector_store.turbopuffer import TurbopufferVectorStore -logger = get_logger("bigrag.vector_store") - class VectorStore: def __init__(self) -> None: - self.provider: str = "collection" - self.backends: dict[VectorStoreProvider, VectorStoreBackend] = {} - self._configured_providers: set[VectorStoreProvider] = {"qdrant"} - self._fallback_provider: VectorStoreProvider = "qdrant" - self.backend = QdrantVectorStore() + self._backend_instance: VectorStoreBackend = TurbopufferVectorStore( + api_key=_app_settings.turbopuffer_api_key, + region=_app_settings.turbopuffer_region, + namespace_prefix=_app_settings.turbopuffer_namespace_prefix, + base_url=_app_settings.turbopuffer_base_url, + ) self.client: Any | None = None self._condition = asyncio.Condition() self._active = 0 @@ -31,73 +27,32 @@ def __init__(self) -> None: @property def backend(self) -> VectorStoreBackend: - return self.backends[self._fallback_provider] + return self._backend_instance @backend.setter def backend(self, value: VectorStoreBackend) -> None: - provider = validate_provider(getattr(value, "provider", self._fallback_provider)) - self.backends[provider] = value - self._fallback_provider = provider - self._configured_providers.add(provider) + self._backend_instance = value self._sync_client() def configure( self, url: str | None = None, *, - provider: VectorStoreProvider | None = None, - connect_timeout_seconds: int | float | None = 10, - search_ef: int | None = None, - qdrant_url: str | None = None, - qdrant_prefer_grpc: bool | None = None, - qdrant_grpc_port: int | None = None, turbopuffer_api_key: str | None = None, turbopuffer_region: str = "aws-us-east-1", turbopuffer_namespace_prefix: str = "bigrag_", + turbopuffer_base_url: str | None = None, + **_: Any, ) -> None: - if provider is not None: - validate_provider(provider) - prefer_grpc = ( - qdrant_prefer_grpc - if qdrant_prefer_grpc is not None - else _app_settings.qdrant_prefer_grpc + self.backend = TurbopufferVectorStore( + api_key=turbopuffer_api_key, + region=turbopuffer_region or "aws-us-east-1", + namespace_prefix=turbopuffer_namespace_prefix, + base_url=turbopuffer_base_url, ) - grpc_port = ( - qdrant_grpc_port if qdrant_grpc_port is not None else _app_settings.qdrant_grpc_port - ) - self.backends = { - "qdrant": QdrantVectorStore( - qdrant_url or url or "http://localhost:6333", - connect_timeout_seconds=connect_timeout_seconds, - search_ef=search_ef, - prefer_grpc=prefer_grpc, - grpc_port=grpc_port, - ), - "turbopuffer": TurbopufferVectorStore( - api_key=turbopuffer_api_key, - region=turbopuffer_region, - namespace_prefix=turbopuffer_namespace_prefix, - ), - } - self._configured_providers = {"qdrant"} - if turbopuffer_api_key or provider == "turbopuffer": - self._configured_providers.add("turbopuffer") - self._fallback_provider = provider or "qdrant" - self.provider = provider or "collection" - self._sync_client() - - def supports_text_search_for(self, provider: VectorStoreProvider | None = None) -> bool: - return self.backends[ - validate_provider(provider or self._fallback_provider) - ].supports_text_search - - @property - def configured_providers(self) -> tuple[VectorStoreProvider, ...]: - return tuple(provider for provider in PROVIDERS if provider in self._configured_providers) def connect(self) -> None: - for provider in self.configured_providers: - self.backends[provider].connect() + self.backend.connect() self._sync_client() async def close(self) -> None: @@ -106,7 +61,7 @@ async def close(self) -> None: try: while self._active: await self._condition.wait() - await close_backends(self.backends) + await self.backend.close() self._sync_client() finally: self._swapping = False @@ -117,23 +72,20 @@ async def replace_with(self, other: VectorStore) -> None: self._swapping = True while self._active: await self._condition.wait() - old_backends = dict(self.backends) - self.provider = other.provider - self.backends = dict(other.backends) - self._configured_providers = set(other._configured_providers) - self._fallback_provider = other._fallback_provider + old_backend = self.backend + self.backend = other.backend self._sync_client() - await close_backends(old_backends, log_errors=True) + await close_backends({"turbopuffer": old_backend}, log_errors=True) self._swapping = False self._condition.notify_all() @asynccontextmanager - async def _backend(self, provider: VectorStoreProvider) -> AsyncIterator[VectorStoreBackend]: + async def _backend(self) -> AsyncIterator[VectorStoreBackend]: async with self._condition: while self._swapping: await self._condition.wait() self._active += 1 - backend = self.backends[provider] + backend = self.backend try: yield backend finally: @@ -143,82 +95,18 @@ async def _backend(self, provider: VectorStoreProvider) -> AsyncIterator[VectorS self._condition.notify_all() async def health_check(self) -> None: - errors: list[str] = [] - for provider in self.configured_providers: - try: - async with self._backend(provider) as backend: - await backend.health_check() - except Exception as exc: - logger.warning( - "vector store provider unhealthy", - provider=provider, - error_type=type(exc).__name__, - ) - errors.append(f"{provider}: {type(exc).__name__}") - if errors: - raise RuntimeError("; ".join(errors)) + async with self._backend() as backend: + await backend.health_check() - async def provider_health(self) -> dict[str, dict[str, object]]: - results: dict[str, dict[str, object]] = {} - for provider in PROVIDERS: - configured = provider in self._configured_providers - if not configured: - results[provider] = {"configured": False, "status": "not_configured", "error": None} - continue - try: - async with self._backend(provider) as backend: - await backend.health_check() - results[provider] = {"configured": True, "status": "ok", "error": None} - except Exception as exc: - logger.warning( - "vector store provider_health error", - provider=provider, - error_type=type(exc).__name__, - ) - results[provider] = { - "configured": True, - "status": "error", - "error": sanitize_message_text(type(exc).__name__), - } - return results - - def _client(self, provider: VectorStoreProvider | None = None) -> Any: - backend = self.backends[validate_provider(provider or self._fallback_provider)] - if isinstance(backend, QdrantVectorStore): - return backend._client() - client = getattr(backend, "client", None) + def _client(self) -> Any: + client = getattr(self.backend, "client", None) if client is None: - backend.connect() - client = getattr(backend, "client", None) + self.backend.connect() + client = getattr(self.backend, "client", None) return client - async def _provider_for( - self, - collection: str, - provider: VectorStoreProvider | None, - ) -> VectorStoreProvider: - if provider is not None: - return validate_provider(provider) - try: - from bigrag.services.collection_cache import get_or_404 - - value = (await get_or_404(collection)).get("vector_store_provider") - if value: - return validate_provider(str(value)) - except Exception: - return self._fallback_provider - return self._fallback_provider - def _sync_client(self) -> None: - fallback = self.backends.get(self._fallback_provider) - self.client = getattr(fallback, "client", None) - if self.client is not None: - return - for provider in self.configured_providers: - client = getattr(self.backends[provider], "client", None) - if client is not None: - self.client = client - return + self.client = getattr(self.backend, "client", None) async def create_collection( self, @@ -226,19 +114,17 @@ async def create_collection( dimension: int, index_type: str = "HNSW", tenant_field: str | None = None, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> None: - selected_provider = await self._provider_for(name, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: await backend.create_collection(name, dimension, index_type, tenant_field) async def delete_collection( self, name: str, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> None: - selected_provider = await self._provider_for(name, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: await backend.delete_collection(name) async def insert( @@ -250,10 +136,9 @@ async def insert( texts: list[str], embeddings: list[list[float]], metadata: list[dict] | None = None, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> int: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.insert( collection, ids, @@ -270,11 +155,10 @@ async def search( query_embedding: list[float], top_k: int = 10, filters: FilterExpression | None = None, - provider: VectorStoreProvider | None = None, payload_fields: list[str] | None = None, + **_: Any, ) -> list[dict]: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.search( collection, query_embedding, @@ -289,30 +173,27 @@ async def get_chunks( document_id: str, limit: int = 10000, offset: int = 0, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> tuple[list[dict], int]: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.get_chunks(collection, document_id, limit, offset) async def delete_by_document( self, collection: str, document_id: str, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> None: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: await backend.delete_by_document(collection, document_id) async def delete_by_ids( self, collection: str, ids: list[str], - provider: VectorStoreProvider | None = None, + **_: Any, ) -> None: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: await backend.delete_by_ids(collection, ids) async def text_search( @@ -321,10 +202,9 @@ async def text_search( query_terms: list[str], top_k: int = 10, filters: FilterExpression | None = None, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> list[dict]: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.text_search(collection, query_terms, top_k, filters) async def upsert( @@ -334,10 +214,9 @@ async def upsert( embeddings: list[list[float]], texts: list[str], metadata: list[dict] | None = None, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> int: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.upsert(collection, ids, embeddings, texts, metadata) async def export_collection_points( @@ -345,10 +224,9 @@ async def export_collection_points( collection: str, *, with_vectors: bool = True, - provider: VectorStoreProvider | None = None, + **_: Any, ) -> list[dict]: - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: return await backend.export_collection_points(collection, with_vectors=with_vectors) async def iter_collection_points( @@ -356,10 +234,9 @@ async def iter_collection_points( collection: str, *, with_vectors: bool = True, - provider: VectorStoreProvider | None = None, + **_: Any, ): - selected_provider = await self._provider_for(collection, provider) - async with self._backend(selected_provider) as backend: + async with self._backend() as backend: async for point in backend.iter_collection_points( collection, with_vectors=with_vectors, diff --git a/api/bigrag/services/vector_store/qdrant.py b/api/bigrag/services/vector_store/qdrant.py deleted file mode 100644 index 3c72ba31..00000000 --- a/api/bigrag/services/vector_store/qdrant.py +++ /dev/null @@ -1,499 +0,0 @@ -from __future__ import annotations - -import asyncio -from collections.abc import Awaitable, Callable -from typing import Any - -import httpx -from qdrant_client import AsyncQdrantClient, models - -from bigrag.logging import get_logger -from bigrag.services._retrieval_filters import FilterExpression -from bigrag.services.vector_store.base import ( - VectorStoreProvider, - _backend_name, - _build_payload, - _chunk_rows_from_payloads, - _point_id, - _row_from_payload, -) -from bigrag.services.vector_store.qdrant_filter import combine_filters, to_qdrant_filter - -logger = get_logger("bigrag.vector_store") - -_TRANSIENT_ERRORS = ( - ConnectionError, - TimeoutError, - OSError, - httpx.HTTPError, -) - - -class QdrantVectorStore: - provider: VectorStoreProvider = "qdrant" - supports_text_search = True - - def __init__( - self, - url: str = "http://localhost:6333", - *, - connect_timeout_seconds: int | float | None = 10, - search_ef: int | None = None, - prefix: str = "bigrag_", - prefer_grpc: bool = False, - grpc_port: int = 6334, - ) -> None: - self.url = url - self.client: AsyncQdrantClient | None = None - self._max_retries: int = 2 - self._connect_timeout_seconds = ( - None - if connect_timeout_seconds is None or connect_timeout_seconds <= 0 - else float(connect_timeout_seconds) - ) - self._search_ef = search_ef if search_ef and search_ef > 0 else None - self.prefix = prefix - self._prefer_grpc = bool(prefer_grpc) - self._grpc_port = int(grpc_port) if grpc_port else 6334 - - def connect(self) -> None: - self.client = AsyncQdrantClient( - url=self.url, - timeout=self._connect_timeout_seconds, - prefer_grpc=self._prefer_grpc, - grpc_port=self._grpc_port, - ) - logger.info( - "connected to qdrant", - url=self.url, - prefer_grpc=self._prefer_grpc, - grpc_port=self._grpc_port, - ) - - async def reconnect(self) -> None: - logger.warning("reconnecting to qdrant", url=self.url) - await self.close() - self.connect() - logger.info("reconnected to qdrant", url=self.url) - - async def _run_with_retry( - self, - fn: Callable[..., Awaitable[Any]], - *args: Any, - **kwargs: Any, - ) -> Any: - last_error = None - for attempt in range(self._max_retries + 1): - try: - return await fn(*args, **kwargs) - except _TRANSIENT_ERRORS as e: - last_error = e - if attempt < self._max_retries: - logger.warning( - "qdrant transient error", - attempt=attempt + 1, - max_attempts=self._max_retries + 1, - error=repr(e), - ) - await self.reconnect() - else: - raise - except Exception as e: - err_str = str(e).lower() - if any(kw in err_str for kw in ("connect", "timeout", "unavailable", "reset")): - last_error = e - if attempt < self._max_retries: - logger.warning( - "qdrant likely transient error", - attempt=attempt + 1, - max_attempts=self._max_retries + 1, - error=repr(e), - ) - await self.reconnect() - else: - raise - else: - raise - raise last_error - - async def close(self) -> None: - if self.client: - await self.client.close() - self.client = None - logger.info("qdrant connection closed") - - async def health_check(self) -> None: - client = self._client() - await self._run_with_retry(client.get_collections) - - def _client(self) -> AsyncQdrantClient: - if self.client is None: - self.connect() - if self.client is None: - raise RuntimeError("Qdrant client is not connected") - return self.client - - def _col(self, name: str) -> str: - return _backend_name(self.prefix, name) - - async def _create_payload_index( - self, - collection_name: str, - field_name: str, - schema: Any, - ) -> None: - client = self._client() - try: - await self._run_with_retry( - client.create_payload_index, - collection_name=collection_name, - field_name=field_name, - field_schema=schema, - wait=True, - ) - except Exception as exc: - if "already exists" in str(exc).lower() or "exists" in str(exc).lower(): - return - logger.warning( - "vector_store: payload index creation failed", - collection=collection_name, - field=field_name, - error=str(exc), - ) - - async def _ensure_payload_indexes( - self, - collection_name: str, - tenant_field: str | None = None, - ) -> None: - text_schema = models.TextIndexParams( - type=models.TextIndexType.TEXT, - tokenizer=models.TokenizerType.WORD, - min_token_len=2, - lowercase=True, - ) - indexes: list[tuple[str, Any]] = [ - ("id", "keyword"), - ("document_id", "keyword"), - ("chunk_index", "integer"), - ("char_start", "integer"), - ("char_end", "integer"), - ("page_no", "integer"), - ("text", text_schema), - ] - if tenant_field: - indexes.append((tenant_field, "keyword")) - - await asyncio.gather( - *[ - self._create_payload_index(collection_name, field_name, schema) - for field_name, schema in indexes - ] - ) - - async def create_collection( - self, - name: str, - dimension: int, - index_type: str = "HNSW", - tenant_field: str | None = None, - ) -> None: - col = self._col(name) - client = self._client() - - if not await self._run_with_retry(client.collection_exists, col): - await self._run_with_retry( - client.create_collection, - collection_name=col, - vectors_config=models.VectorParams( - size=dimension, - distance=models.Distance.COSINE, - ), - ) - logger.info( - "created qdrant collection", - collection=col, - dimension=dimension, - index=index_type, - ) - - await self._ensure_payload_indexes(col, tenant_field=tenant_field) - - async def delete_collection(self, name: str) -> None: - col = self._col(name) - client = self._client() - if await self._run_with_retry(client.collection_exists, col): - await self._run_with_retry(client.delete_collection, col) - logger.info("dropped qdrant collection", collection=col) - - async def insert( - self, - collection: str, - ids: list[str], - document_ids: list[str], - chunk_indices: list[int], - texts: list[str], - embeddings: list[list[float]], - metadata: list[dict] | None = None, - ) -> int: - col = self._col(collection) - points = [] - for i in range(len(ids)): - points.append( - models.PointStruct( - id=_point_id(col, ids[i]), - vector=embeddings[i], - payload=_build_payload( - id_=ids[i], - document_id=document_ids[i], - chunk_index=chunk_indices[i], - text=texts[i], - metadata=metadata[i] if metadata else None, - ), - ) - ) - - client = self._client() - await self._run_with_retry(client.upsert, collection_name=col, points=points, wait=True) - logger.info("inserted vectors", collection=col, count=len(points)) - return len(points) - - def _search_params(self) -> models.SearchParams | None: - if self._search_ef is None: - return None - return models.SearchParams(hnsw_ef=self._search_ef) - - @staticmethod - def _row_from_qdrant(point: Any) -> dict: - payload = dict(getattr(point, "payload", None) or {}) - point_id = str(getattr(point, "id", "")) - return _row_from_payload(point_id, getattr(point, "score", 0.0), payload) - - async def search( - self, - collection: str, - query_embedding: list[float], - top_k: int = 10, - filters: FilterExpression | None = None, - payload_fields: list[str] | None = None, - ) -> list[dict]: - col = self._col(collection) - - with_payload: Any = ( - models.PayloadSelectorInclude(include=list(payload_fields)) if payload_fields else True - ) - - client = self._client() - results = await self._run_with_retry( - client.query_points, - collection_name=col, - query=query_embedding, - limit=top_k, - query_filter=to_qdrant_filter(filters), - search_params=self._search_params(), - with_payload=with_payload, - with_vectors=False, - ) - - hits = [self._row_from_qdrant(point) for point in results.points] - logger.info("vector search", collection=col, top_k=top_k, hits=len(hits), filters=filters) - return hits - - async def get_chunks( - self, - collection: str, - document_id: str, - limit: int = 10000, - offset: int = 0, - ) -> tuple[list[dict], int]: - col = self._col(collection) - client = self._client() - if not await self._run_with_retry(client.collection_exists, col): - return [], 0 - - doc_filter = models.Filter( - must=[ - models.FieldCondition( - key="document_id", - match=models.MatchValue(value=document_id), - ) - ] - ) - - try: - count_resp = await self._run_with_retry( - client.count, - collection_name=col, - count_filter=doc_filter, - exact=True, - ) - total = int(getattr(count_resp, "count", 0)) - except Exception: - total = 0 - - needed = offset + max(limit, 0) - if needed <= 0: - return [], total - - results = [] - next_offset = None - page_size = min(max(needed, 256), 10000) - while True: - batch, next_offset = await self._run_with_retry( - client.scroll, - collection_name=col, - scroll_filter=doc_filter, - with_payload=True, - with_vectors=False, - limit=page_size, - offset=next_offset, - ) - results.extend(batch) - if next_offset is None: - break - if total and len(results) >= total: - break - payloads = [r.payload or {} for r in results] - rows, computed_total = _chunk_rows_from_payloads(payloads, limit, offset) - return rows, total or computed_total - - async def delete_by_document(self, collection: str, document_id: str) -> None: - col = self._col(collection) - client = self._client() - if not await self._run_with_retry(client.collection_exists, col): - return - await self._run_with_retry( - client.delete, - collection_name=col, - points_selector=models.Filter( - must=[ - models.FieldCondition( - key="document_id", - match=models.MatchValue(value=document_id), - ) - ] - ), - wait=True, - ) - logger.info("delete vectors by document", collection=col, document_id=document_id) - - async def delete_by_ids(self, collection: str, ids: list[str]) -> None: - col = self._col(collection) - client = self._client() - point_ids = [_point_id(col, id_) for id_ in ids] - await self._run_with_retry( - client.delete, - collection_name=col, - points_selector=point_ids, - wait=True, - ) - logger.info("delete vectors by ids", collection=col, count=len(ids)) - - async def text_search( - self, - collection: str, - query_terms: list[str], - top_k: int = 10, - filters: FilterExpression | None = None, - ) -> list[dict]: - col = self._col(collection) - terms = [term for term in query_terms if term] - if not terms: - return [] - - text_filter = models.Filter( - should=[ - models.FieldCondition(key="text", match=models.MatchText(text=term)) - for term in terms - ] - ) - combined_filter = combine_filters(to_qdrant_filter(filters), text_filter) - - try: - client = self._client() - results, _next_offset = await self._run_with_retry( - client.scroll, - collection_name=col, - scroll_filter=combined_filter, - with_payload=True, - with_vectors=False, - limit=top_k * 10, - ) - except _TRANSIENT_ERRORS: - raise - except Exception as exc: - logger.warning("text search query failed", collection=col, error=repr(exc)) - return [] - - logger.info("text search", collection=col, terms=len(terms), hits=len(results)) - return [self._row_from_qdrant(point) for point in results] - - async def upsert( - self, - collection: str, - ids: list[str], - embeddings: list[list[float]], - texts: list[str], - metadata: list[dict] | None = None, - ) -> int: - col = self._col(collection) - points = [] - for i in range(len(ids)): - points.append( - models.PointStruct( - id=_point_id(col, ids[i]), - vector=embeddings[i], - payload=_build_payload( - id_=ids[i], - document_id="", - chunk_index=0, - text=texts[i], - metadata=metadata[i] if metadata else None, - ), - ) - ) - - client = self._client() - await self._run_with_retry(client.upsert, collection_name=col, points=points, wait=True) - logger.info("upserted vectors", collection=col, count=len(points)) - return len(points) - - async def export_collection_points( - self, - collection: str, - *, - with_vectors: bool = True, - ) -> list[dict]: - return [ - point - async for point in self.iter_collection_points(collection, with_vectors=with_vectors) - ] - - async def iter_collection_points( - self, - collection: str, - *, - with_vectors: bool = True, - ): - col = self._col(collection) - client = self._client() - if not await self._run_with_retry(client.collection_exists, col): - return - offset = None - while True: - points, offset = await self._run_with_retry( - client.scroll, - collection_name=col, - limit=256, - offset=offset, - with_payload=True, - with_vectors=with_vectors, - ) - for point in points: - yield { - "id": str(getattr(point, "id", "")), - "payload": getattr(point, "payload", {}) or {}, - "vector": getattr(point, "vector", None) if with_vectors else None, - } - if offset is None: - break diff --git a/api/bigrag/services/vector_store/qdrant_filter.py b/api/bigrag/services/vector_store/qdrant_filter.py deleted file mode 100644 index ae1c44c0..00000000 --- a/api/bigrag/services/vector_store/qdrant_filter.py +++ /dev/null @@ -1,60 +0,0 @@ -from __future__ import annotations - -from qdrant_client import models - -from bigrag.services._retrieval_filters import FilterExpression - - -def to_qdrant_filter(filters: FilterExpression | None) -> models.Filter | None: - if filters is None: - return None - must: list[models.Condition] = [] - must_not: list[models.Condition] = [] - for condition in filters.conditions: - if condition.operator == "eq": - must.append( - models.FieldCondition( - key=condition.field, - match=models.MatchValue(value=condition.value), - ) - ) - elif condition.operator == "ne": - must_not.append( - models.FieldCondition( - key=condition.field, - match=models.MatchValue(value=condition.value), - ) - ) - elif condition.operator == "in": - must.append( - models.FieldCondition( - key=condition.field, - match=models.MatchAny(any=condition.value), - ) - ) - else: - must.append( - models.FieldCondition( - key=condition.field, - range=models.Range( - gt=condition.value if condition.operator == "gt" else None, - gte=condition.value if condition.operator == "gte" else None, - lt=condition.value if condition.operator == "lt" else None, - lte=condition.value if condition.operator == "lte" else None, - ), - ) - ) - if not must and not must_not: - return None - return models.Filter(must=must or None, must_not=must_not or None) - - -def combine_filters( - *filters: models.Filter | None, -) -> models.Filter | None: - active = [f for f in filters if f is not None] - if not active: - return None - if len(active) == 1: - return active[0] - return models.Filter(must=active) diff --git a/api/bigrag/services/vector_store/turbopuffer.py b/api/bigrag/services/vector_store/turbopuffer.py index 45d15492..5d25966e 100644 --- a/api/bigrag/services/vector_store/turbopuffer.py +++ b/api/bigrag/services/vector_store/turbopuffer.py @@ -7,7 +7,6 @@ from bigrag.logging import get_logger from bigrag.services._retrieval_filters import FilterCondition, FilterExpression from bigrag.services.vector_store.base import ( - VectorStoreFeatureError, VectorStoreProvider, _backend_name, _build_payload, @@ -53,7 +52,7 @@ def _schema(dimension: int) -> dict: _PUBLIC_ID_FIELD: {"type": "string"}, "document_id": {"type": "string"}, "chunk_index": {"type": "int"}, - "text": {"type": "string", "filterable": False}, + "text": {"type": "string", "full_text_search": True}, } @@ -69,7 +68,6 @@ def _row_payload(row: dict) -> dict: class TurbopufferVectorStore: provider: VectorStoreProvider = "turbopuffer" - supports_text_search = False def __init__( self, @@ -77,22 +75,23 @@ def __init__( api_key: str | None, region: str, namespace_prefix: str = "bigrag_", + base_url: str | None = None, ) -> None: self.api_key = api_key self.region = region self.prefix = namespace_prefix or "bigrag_" + self.base_url = base_url.rstrip("/") if base_url else None self.client: httpx.AsyncClient | None = None def connect(self) -> None: if not self.api_key: raise RuntimeError("turbopuffer API key is not configured") - base_url = f"https://{self.region}.turbopuffer.com" self.client = httpx.AsyncClient( - base_url=base_url, + base_url=self.base_url or f"https://{self.region}.turbopuffer.com", headers={"Authorization": f"Bearer {self.api_key}"}, timeout=30, ) - logger.info("connected to turbopuffer", region=self.region) + logger.info("connected to turbopuffer", region=self.region, base_url=self.base_url) def _client(self) -> httpx.AsyncClient: if self.client is None: @@ -261,7 +260,26 @@ async def text_search( top_k: int = 10, filters: FilterExpression | None = None, ) -> list[dict]: - raise VectorStoreFeatureError("turbopuffer does not support keyword or hybrid search in v1") + query = " ".join(term for term in query_terms if term).strip() + if not query: + return [] + payload: dict[str, Any] = { + "rank_by": ["text", "BM25", query], + "top_k": top_k, + "exclude_attributes": ["vector"], + } + turbo_filter = _to_turbopuffer_filter(filters) + if turbo_filter: + payload["filters"] = turbo_filter + rows = await self._query_rows(collection, payload) + results = [] + for row in rows: + point_id = str(row.get("id", "")) + score = row.get("$score") + if score is None: + score = max(0.0, 1.0 - float(row.get("$dist", 0.0))) + results.append(_row_from_payload(point_id, float(score), _row_payload(row))) + return results async def upsert( self, diff --git a/api/pyproject.toml b/api/pyproject.toml index 98a0454c..cf6e33de 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -11,7 +11,6 @@ dependencies = [ "asyncpg>=0.30.0,<1", "sqlalchemy[asyncio]>=2.0.36,<3", "alembic>=1.14.0,<2", - "qdrant-client>=1.17.0,<2", "docling>=2.25.0,<3", "pypdfium2>=5.7.0,<6", "huggingface-hub>=0.36.0,<2", diff --git a/api/uv.lock b/api/uv.lock index ed566c8f..c55d2bb2 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -210,7 +210,6 @@ dependencies = [ { name = "pydantic-settings" }, { name = "pypdfium2" }, { name = "python-multipart" }, - { name = "qdrant-client" }, { name = "redis", extra = ["hiredis"] }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "starlette" }, @@ -244,7 +243,6 @@ requires-dist = [ { name = "pydantic-settings", specifier = ">=2.7.0,<3" }, { name = "pypdfium2", specifier = ">=5.7.0,<6" }, { name = "python-multipart", specifier = ">=0.0.18,<1" }, - { name = "qdrant-client", specifier = ">=1.17.0,<2" }, { name = "redis", extras = ["hiredis"], specifier = ">=5.2.0,<8" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36,<3" }, { name = "starlette", specifier = ">=1.0.0,<2" }, @@ -931,47 +929,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/32/77ee8a6c1564fc345a491a4e85b3bf360e4cf26eac98c4532d2fdb96e01f/greenlet-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d60097128cb0a1cab9ea541186ea13cd7b847b8449a7787c2e2350da0cb82d86", size = 245324, upload-time = "2026-04-27T12:24:40.295Z" }, ] -[[package]] -name = "grpcio" -version = "1.80.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" }, - { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" }, - { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" }, - { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" }, - { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" }, - { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" }, - { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" }, - { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" }, - { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" }, - { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" }, - { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" }, - { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" }, - { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" }, - { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" }, - { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" }, - { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" }, - { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" }, - { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" }, - { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" }, - { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" }, - { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" }, - { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" }, - { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" }, - { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" }, - { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" }, - { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" }, - { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" }, - { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -981,19 +938,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] -[[package]] -name = "h2" -version = "4.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "hpack" }, - { name = "hyperframe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, -] - [[package]] name = "hf-xet" version = "1.5.0" @@ -1086,15 +1030,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/d6/191e6741addc97bcf5e755661f8c82f0fd0aa35f07ece56e858da689b57e/hiredis-3.3.1-cp314-cp314t-win_amd64.whl", hash = "sha256:ab1f646ff531d70bfd25f01e60708dfa3d105eb458b7dedd9fe9a443039fd809", size = 23811, upload-time = "2026-03-16T15:20:34.292Z" }, ] -[[package]] -name = "hpack" -version = "4.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, -] - [[package]] name = "httpcore" version = "1.0.9" @@ -1152,11 +1087,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] -[package.optional-dependencies] -http2 = [ - { name = "h2" }, -] - [[package]] name = "httpx-sse" version = "0.4.3" @@ -1186,15 +1116,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/11/0b64cc9024329b76d7547c19a67604a61d21d3ba678a69d1b220c29d5112/huggingface_hub-1.15.0-py3-none-any.whl", hash = "sha256:a4a59af04cbc41a3fe3fec429b171ef994ef8c971eda10136746f408dd4e3744", size = 663602, upload-time = "2026-05-15T11:42:50.487Z" }, ] -[[package]] -name = "hyperframe" -version = "6.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, -] - [[package]] name = "idna" version = "3.15" @@ -1377,6 +1298,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/65/d1/bc0ed2427bf609f2ee10da303a6a226f9c8bce94f945dc29a32ce55de6e4/lxml-6.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aa366a1e55b8ebfe8ca8ddc3cfe75c8ebade181aeb0f661d0cb05986b647f72a", size = 5260995, upload-time = "2026-05-18T19:18:37.091Z" }, { url = "https://files.pythonhosted.org/packages/69/8b/6772e1a4b513fc50a8d931f19edde0e13ae6918510a1e13ff67864f3e5ed/lxml-6.1.1-cp312-cp312-win32.whl", hash = "sha256:126c93f7f56f0eda92f6d8c619edc463a4f23d9252f1c9d0405a76f25fa9f11a", size = 3596382, upload-time = "2026-05-18T19:17:18.37Z" }, { url = "https://files.pythonhosted.org/packages/1b/89/45198e9624762af2dfd2cb8782598477ceb29f6e59caab560388ae1f4ec1/lxml-6.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:26e6eda8d38c1fcab1090dd196ee87cbd13788e531937610e2589085de074e77", size = 3997255, upload-time = "2026-05-18T19:17:56.781Z" }, + { url = "https://files.pythonhosted.org/packages/90/a9/7a54b6834088d9ae528a7b780584ba6a39a9457b0ac330479f20ffbc9449/lxml-6.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:6540377fbd53fe1b629172288c464fb18db11ce1fa7dc15891da10aa9dcc3e7f", size = 3659610, upload-time = "2026-05-19T19:22:50.843Z" }, { url = "https://files.pythonhosted.org/packages/a5/eb/7e6f37c5584ccbb2ff267f56fd0339016938c1c8684cfefab9b33ffc2f36/lxml-6.1.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:68a9198d0fc122d14bb76837de9aa80cf84caed990b5b237f532ed87d3706736", size = 8559780, upload-time = "2026-05-18T19:17:57.661Z" }, { url = "https://files.pythonhosted.org/packages/a1/36/587c2521cf23a2cd6c9c22108aa7528f683a1f195ed7ccd23a4b1786ad36/lxml-6.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7d47866cb32fb503450b6edc9df355d10dc49836af2e89901bd6ac6b0896d9d9", size = 4618006, upload-time = "2026-05-18T19:18:04.452Z" }, { url = "https://files.pythonhosted.org/packages/6e/ca/ab7bfe2bf4c972af5e7878262845ead3a24a929a9b04bc11c7c1ece6c82a/lxml-6.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb7c9811bfaa8b1ed5ed319f5d370dfbcaa59d52ea64be2a5a85e18195930354", size = 4924139, upload-time = "2026-05-18T19:19:04.873Z" }, @@ -1394,6 +1316,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/f8/f6a5e8185bcb28c2befae3d31f8e3df3b811cb0f47746517a81279fcafe1/lxml-6.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:47402e62c52ff5988c1e8c6c63177f5708bccf48e366dea4e3dcf1e645e04947", size = 5250276, upload-time = "2026-05-18T19:19:03.834Z" }, { url = "https://files.pythonhosted.org/packages/c7/f2/1a2b9f1b7a49d45495369be7ef9ad05b262930f2eab3e3145706fca8083f/lxml-6.1.1-cp313-cp313-win32.whl", hash = "sha256:3483644525531e1d5762b0c44a8e18b6efba321b6dcf8a8952de10b037618bca", size = 3596903, upload-time = "2026-05-18T19:17:29.863Z" }, { url = "https://files.pythonhosted.org/packages/e6/99/f4ffb024f238eec2131aaa09f3278fb6129cf892741bf68e1fc1afb8c100/lxml-6.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:a10bd2fd62e8ce916ececb342f348f190724a098c1faa056fdfb2a22ad5e8660", size = 3995869, upload-time = "2026-05-18T19:18:02.596Z" }, + { url = "https://files.pythonhosted.org/packages/d1/53/70eb8c5c6037f27448f1e3c54ebede9545a801ae63f0a7254afca4fe8e45/lxml-6.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:424aa57aca0897eb922aef34395bd1289b3b6f04e6bae20ea123c0c7e333cffc", size = 3658490, upload-time = "2026-05-19T19:22:53.846Z" }, { url = "https://files.pythonhosted.org/packages/13/e2/2e325795566de01d0d7c3bb57d3c370616b2d07b01214e84eec5d3b10963/lxml-6.1.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:19b7ab10b210b0b3ad7985d9ac4eb66ab09a90b20fe6e2f7ba55d01a234345d0", size = 8577146, upload-time = "2026-05-18T19:18:17.765Z" }, { url = "https://files.pythonhosted.org/packages/93/cf/5630b5e4be7d2e6bee8efe83865c925221103cf0221303b104ce134b01e2/lxml-6.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:c08e5c694306507275f2290073350c4f32e383db15213b2c69e7ff39c1193840", size = 4623866, upload-time = "2026-05-18T19:18:30.669Z" }, { url = "https://files.pythonhosted.org/packages/d2/51/3904907c063451cf8d4a5c9fe0cad95fa1f4ec57f4e3884fa0731bd7a305/lxml-6.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:74a9717fd0d82effef5c2854f0d917231d5324b5a3eb7275c43ac9fa32f97a14", size = 4950022, upload-time = "2026-05-18T19:19:31.958Z" }, @@ -1411,6 +1334,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/5d/b329acbbedc0b619ebc2be6cf7ee9ed07e80892c88d4dfd612c33805789a/lxml-6.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:63876be28efefa04a1df615b46770e82042cce445cfdce55160522f57b231ccb", size = 5264191, upload-time = "2026-05-18T19:19:21.118Z" }, { url = "https://files.pythonhosted.org/packages/d6/85/be36fb1425b30db3c3f9df75fe86343ebffb79e6320bd7f588e25bfeac39/lxml-6.1.1-cp314-cp314-win32.whl", hash = "sha256:7f7a92e8583f06b1fd49d01158143b8461cfcd135dcb10ec807270a3051bd603", size = 3657202, upload-time = "2026-05-18T19:17:39.509Z" }, { url = "https://files.pythonhosted.org/packages/b8/ce/3cf9a827342269f54d405a6202397de63f07c69cbd6ce7d183a3f0cba1e9/lxml-6.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:b2d444f2e66624d68e9c6b211e28a76e22fff5fcabcfff4deac18b529b7d4137", size = 4064497, upload-time = "2026-05-18T19:18:14.662Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3e/1a957bde8f0760039e627f94699f82caa782c9d838d86c3d28245ee67212/lxml-6.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:3fd9728a2735fda14f4e8235830c86b539e9661e849665bf926d3f867943b4bf", size = 3741991, upload-time = "2026-05-19T19:22:59.111Z" }, { url = "https://files.pythonhosted.org/packages/78/b2/00ed55b3a2efa4658fb795c38d1090ec9b3e8a6c3683d4441fa517f09c3b/lxml-6.1.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:787b2496d0dbe8cd180984e8d29e3a6f76e7ea34db781cb3bd55e4ba1ef8b4ee", size = 8827545, upload-time = "2026-05-18T19:18:41.193Z" }, { url = "https://files.pythonhosted.org/packages/c0/73/74573db19baa618d5f266f2407898b087ff6927115b00b71e5fc1b700847/lxml-6.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:2c8daa471358dc2d6fcf02165e80ec68f77871a286df95bc5cc3816153b0fd2c", size = 4735736, upload-time = "2026-05-18T19:18:46.761Z" }, { url = "https://files.pythonhosted.org/packages/16/02/6f7061f4f95f51e545d48e87647c54791d204a4e881be4156e7a26ba5338/lxml-6.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:acd7d70b64c0aae0c7922cca83d288a16f5f6da523637697872253415269baef", size = 4970291, upload-time = "2026-05-18T19:19:56.215Z" }, @@ -1428,6 +1352,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/2d/2dafd8149e94b05bb070690efd5bb2680720681e03ff03fc57d2b70a1105/lxml-6.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9e36f163528fc50cbef305f02a5fd66d404edf7049cdaff211dbc2cba5a7013e", size = 5247845, upload-time = "2026-05-18T19:19:36.649Z" }, { url = "https://files.pythonhosted.org/packages/ce/68/b30e913340c380ddac9580c6e6230991fc37240ec4f64704833e4f3e2769/lxml-6.1.1-cp314-cp314t-win32.whl", hash = "sha256:649dda677cf3bd6ac9ae14007ba0c824ded8ce5808b53fc7431d9140399118c1", size = 3897345, upload-time = "2026-05-18T19:17:33.562Z" }, { url = "https://files.pythonhosted.org/packages/3c/4e/9eb2af5335545f9fbcd7af57bcf87c6025d31eaa31b14ec184a6c8675328/lxml-6.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:793033d6c5cdf33a573f910d9bea14ef8f5771820411d118da8e1182edb53d5e", size = 4393350, upload-time = "2026-05-18T19:18:10.076Z" }, + { url = "https://files.pythonhosted.org/packages/7f/2c/0f1e93c636720e8a3eb59af2bfda99d98b55891e1c53bc30c2e0e865f01b/lxml-6.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:58bb955caba94e467d2a96da17660d2d704e0675894cba21ab8a775b8621fd1c", size = 3817223, upload-time = "2026-05-19T19:22:56.823Z" }, ] [[package]] @@ -2094,33 +2019,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dd/34/b6f19941adcdaf415b5e8a8d577499f5b6a76b59cbae37f9b125a9ffe9f2/polyfactory-3.3.0-py3-none-any.whl", hash = "sha256:686abcaa761930d3df87b91e95b26b8d8cb9fdbbbe0b03d5f918acff5c72606e", size = 62707, upload-time = "2026-02-22T09:46:25.985Z" }, ] -[[package]] -name = "portalocker" -version = "3.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644, upload-time = "2025-06-14T13:20:40.03Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424, upload-time = "2025-06-14T13:20:38.083Z" }, -] - -[[package]] -name = "protobuf" -version = "7.34.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" }, - { url = "https://files.pythonhosted.org/packages/eb/9d/aa69df2724ff63efa6f72307b483ce0827f4347cc6d6df24b59e26659fef/protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b", size = 325753, upload-time = "2026-03-20T17:34:38.751Z" }, - { url = "https://files.pythonhosted.org/packages/92/e8/d174c91fd48e50101943f042b09af9029064810b734e4160bbe282fa1caa/protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a", size = 340198, upload-time = "2026-03-20T17:34:39.871Z" }, - { url = "https://files.pythonhosted.org/packages/53/1b/3b431694a4dc6d37b9f653f0c64b0a0d9ec074ee810710c0c3da21d67ba7/protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4", size = 324267, upload-time = "2026-03-20T17:34:41.1Z" }, - { url = "https://files.pythonhosted.org/packages/85/29/64de04a0ac142fb685fd09999bc3d337943fb386f3a0ec57f92fd8203f97/protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a", size = 426628, upload-time = "2026-03-20T17:34:42.536Z" }, - { url = "https://files.pythonhosted.org/packages/4d/87/cb5e585192a22b8bd457df5a2c16a75ea0db9674c3a0a39fc9347d84e075/protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c", size = 437901, upload-time = "2026-03-20T17:34:44.112Z" }, - { url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" }, -] - [[package]] name = "psutil" version = "7.2.2" @@ -2466,24 +2364,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] -[[package]] -name = "qdrant-client" -version = "1.18.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "grpcio" }, - { name = "httpx", extra = ["http2"] }, - { name = "numpy" }, - { name = "portalocker" }, - { name = "protobuf" }, - { name = "pydantic" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/65/45/5b1bdd15a3c7730eefb9c113600829e20d689b82b5a23f9e07d107094004/qdrant_client-1.18.0.tar.gz", hash = "sha256:52e8ece1a7d40519801bf0b70713bfa0f6b7ae28c7275bbe0b0286fbed7f6db4", size = 352580, upload-time = "2026-05-11T14:12:38.702Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/10/c437bd2ac41ef30d3019063e6ce537dc111e9214473b337ee88f7fa6359a/qdrant_client-1.18.0-py3-none-any.whl", hash = "sha256:093aa8cf8a420ee3ad2a68b007e1378d7992b2600e0b53c193fc172674f659cd", size = 398126, upload-time = "2026-05-11T14:12:36.998Z" }, -] - [[package]] name = "rapidocr" version = "3.8.1" diff --git a/app/src/components/navigation/sidebar.tsx b/app/src/components/navigation/sidebar.tsx index 0cf14854..f9f1430e 100644 --- a/app/src/components/navigation/sidebar.tsx +++ b/app/src/components/navigation/sidebar.tsx @@ -7,7 +7,6 @@ import { BookOpen, Cloud, Cpu, - Database, FlaskConical, HardDrive, KeyRound, @@ -68,7 +67,6 @@ const NAV_GROUPS: readonly NavGroup[] = [ items: [ { admin: true, href: "/backups", icon: Archive, label: "Backups" }, { admin: true, href: "/data-storage", icon: HardDrive, label: "Data Storage" }, - { admin: true, href: "/vector-storage", icon: Database, label: "Vector Storage" }, { admin: true, href: "/settings", icon: Settings, label: "Settings" }, ], }, diff --git a/app/src/features/collections/collection-form-state.ts b/app/src/features/collections/collection-form-state.ts index 7ab2e28a..6cdb5d66 100644 --- a/app/src/features/collections/collection-form-state.ts +++ b/app/src/features/collections/collection-form-state.ts @@ -10,7 +10,6 @@ export type CreateCollectionFormValues = { presetId: string; tenantGuardEnabled: boolean; tenantField: string; - vectorStoreProvider: "qdrant" | "turbopuffer"; }; export type CollectionSearchMode = "semantic" | "keyword" | "hybrid"; @@ -34,7 +33,6 @@ export const defaultCreateCollectionFormValues = (): CreateCollectionFormValues presetId: "", tenantGuardEnabled: false, tenantField: "", - vectorStoreProvider: "qdrant", }); export const defaultCollectionSearchFormValues = (): CollectionSearchFormValues => ({ @@ -118,7 +116,6 @@ export const createCollectionBodyFromValues = ({ presetId, tenantGuardEnabled, tenantField, - vectorStoreProvider, }: CreateCollectionFormValues) => ({ chunk_overlap: chunkOverlap, chunk_size: chunkSize, @@ -129,7 +126,6 @@ export const createCollectionBodyFromValues = ({ multimodal_enrichment_enabled: multimodalEnrichmentEnabled, name: normalizeCollectionName(name), tenant_field: tenantGuardEnabled ? tenantField.trim() || null : null, - vector_store_provider: vectorStoreProvider, }); export const collectionSearchBodyFromValues = ({ diff --git a/app/src/features/collections/create-collection-modal.tsx b/app/src/features/collections/create-collection-modal.tsx index 1d46fe0d..dbfa9233 100644 --- a/app/src/features/collections/create-collection-modal.tsx +++ b/app/src/features/collections/create-collection-modal.tsx @@ -147,18 +147,6 @@ export const CreateCollectionModal = ({ open, onClose }: Props) => { /> )} - - {(field) => ( - patchDraft({ apiKey: event.target.value })} + placeholder={complete ? "Saved" : "tpuf_..."} + type="password" + value={draft.apiKey} + /> + patchDraft({ region: event.target.value })} + placeholder="aws-us-east-1" + value={draft.region} + /> + patchDraft({ namespacePrefix: event.target.value })} + placeholder="bigrag_" + value={draft.namespacePrefix} + /> + patchDraft({ baseUrl: event.target.value })} + placeholder="https://api.turbopuffer.com" + value={draft.baseUrl} + /> + +
+
+ {complete + ? "Turbopuffer is configured for this instance." + : skipped + ? "Skipped for now. System health will keep reporting vector readiness." + : "Save a working vector store now, or skip and configure it later."} +
+
+ {!complete && ( + + )} + +
+
+ + ); +}; diff --git a/app/src/routeTree.gen.ts b/app/src/routeTree.gen.ts index 2908d41d..d755d516 100644 --- a/app/src/routeTree.gen.ts +++ b/app/src/routeTree.gen.ts @@ -12,6 +12,7 @@ import { Route as DashboardWebhooksRouteImport } from "./routes/_dashboard.webho import { Route as DashboardUsageRouteImport } from "./routes/_dashboard.usage"; import { Route as DashboardSettingsRouteImport } from "./routes/_dashboard.settings"; import { Route as DashboardOverviewRouteImport } from "./routes/_dashboard.overview"; +import { Route as DashboardOnboardingRouteImport } from "./routes/_dashboard.onboarding"; import { Route as DashboardModelsRouteImport } from "./routes/_dashboard.models"; import { Route as DashboardMcpRouteImport } from "./routes/_dashboard.mcp"; import { Route as DashboardEvalsRouteImport } from "./routes/_dashboard.evals"; @@ -68,6 +69,11 @@ const DashboardOverviewRoute = DashboardOverviewRouteImport.update({ path: "/overview", getParentRoute: () => DashboardRoute, } as any); +const DashboardOnboardingRoute = DashboardOnboardingRouteImport.update({ + id: "/onboarding", + path: "/onboarding", + getParentRoute: () => DashboardRoute, +} as any); const DashboardModelsRoute = DashboardModelsRouteImport.update({ id: "/models", path: "/models", @@ -203,6 +209,7 @@ export interface FileRoutesByFullPath { "/evals": typeof DashboardEvalsRoute; "/mcp": typeof DashboardMcpRoute; "/models": typeof DashboardModelsRoute; + "/onboarding": typeof DashboardOnboardingRoute; "/overview": typeof DashboardOverviewRoute; "/settings": typeof DashboardSettingsRoute; "/usage": typeof DashboardUsageRoute; @@ -232,6 +239,7 @@ export interface FileRoutesByTo { "/evals": typeof DashboardEvalsRoute; "/mcp": typeof DashboardMcpRoute; "/models": typeof DashboardModelsRoute; + "/onboarding": typeof DashboardOnboardingRoute; "/overview": typeof DashboardOverviewRoute; "/settings": typeof DashboardSettingsRoute; "/usage": typeof DashboardUsageRoute; @@ -262,6 +270,7 @@ export interface FileRoutesById { "/_dashboard/evals": typeof DashboardEvalsRoute; "/_dashboard/mcp": typeof DashboardMcpRoute; "/_dashboard/models": typeof DashboardModelsRoute; + "/_dashboard/onboarding": typeof DashboardOnboardingRoute; "/_dashboard/overview": typeof DashboardOverviewRoute; "/_dashboard/settings": typeof DashboardSettingsRoute; "/_dashboard/usage": typeof DashboardUsageRoute; @@ -293,6 +302,7 @@ export interface FileRouteTypes { | "/evals" | "/mcp" | "/models" + | "/onboarding" | "/overview" | "/settings" | "/usage" @@ -322,6 +332,7 @@ export interface FileRouteTypes { | "/evals" | "/mcp" | "/models" + | "/onboarding" | "/overview" | "/settings" | "/usage" @@ -351,6 +362,7 @@ export interface FileRouteTypes { | "/_dashboard/evals" | "/_dashboard/mcp" | "/_dashboard/models" + | "/_dashboard/onboarding" | "/_dashboard/overview" | "/_dashboard/settings" | "/_dashboard/usage" @@ -424,6 +436,13 @@ declare module "@tanstack/react-router" { preLoaderRoute: typeof DashboardOverviewRouteImport; parentRoute: typeof DashboardRoute; }; + "/_dashboard/onboarding": { + id: "/_dashboard/onboarding"; + path: "/onboarding"; + fullPath: "/onboarding"; + preLoaderRoute: typeof DashboardOnboardingRouteImport; + parentRoute: typeof DashboardRoute; + }; "/_dashboard/models": { id: "/_dashboard/models"; path: "/models"; @@ -650,6 +669,7 @@ interface DashboardRouteChildren { DashboardEvalsRoute: typeof DashboardEvalsRoute; DashboardMcpRoute: typeof DashboardMcpRoute; DashboardModelsRoute: typeof DashboardModelsRoute; + DashboardOnboardingRoute: typeof DashboardOnboardingRoute; DashboardOverviewRoute: typeof DashboardOverviewRoute; DashboardSettingsRoute: typeof DashboardSettingsRoute; DashboardUsageRoute: typeof DashboardUsageRoute; @@ -669,6 +689,7 @@ const DashboardRouteChildren: DashboardRouteChildren = { DashboardEvalsRoute: DashboardEvalsRoute, DashboardMcpRoute: DashboardMcpRoute, DashboardModelsRoute: DashboardModelsRoute, + DashboardOnboardingRoute: DashboardOnboardingRoute, DashboardOverviewRoute: DashboardOverviewRoute, DashboardSettingsRoute: DashboardSettingsRoute, DashboardUsageRoute: DashboardUsageRoute, diff --git a/app/src/routes/_auth.setup.tsx b/app/src/routes/_auth.setup.tsx index 4c4bbf46..88f38e07 100644 --- a/app/src/routes/_auth.setup.tsx +++ b/app/src/routes/_auth.setup.tsx @@ -30,7 +30,7 @@ const SetupPage = () => { try { await setup.mutateAsync(setupBodyFromValues(value)); toast.success("Admin account created"); - navigate({ to: "/overview", replace: true }); + navigate({ to: "/onboarding", replace: true }); } catch (err) { toast.error(err instanceof Error ? err.message : "Setup failed"); } diff --git a/app/src/routes/_dashboard.onboarding.tsx b/app/src/routes/_dashboard.onboarding.tsx new file mode 100644 index 00000000..cfcb9af1 --- /dev/null +++ b/app/src/routes/_dashboard.onboarding.tsx @@ -0,0 +1,6 @@ +import { createFileRoute } from "@tanstack/react-router"; +import { OnboardingPage } from "@/features/onboarding/onboarding-page"; + +export const Route = createFileRoute("/_dashboard/onboarding")({ + component: () => , +}); diff --git a/website/content/docs/admin-ui.mdx b/website/content/docs/admin-ui.mdx index 6ba70afd..93ddf89a 100644 --- a/website/content/docs/admin-ui.mdx +++ b/website/content/docs/admin-ui.mdx @@ -30,6 +30,7 @@ For cross-site production deployments, enable secure session cookies and usually | Path | Purpose | |------|---------| | `/setup` | One-time page to create the first admin account. Disappears once `needs_setup: false`. | +| `/onboarding` | Authenticated first-run provider setup. Requires one verified embedding preset, prompts for Turbopuffer, and then continues to `/overview`. | | `/login` | Email + password login for admins and members. | | `/overview` | Platform-wide stats — collection count, document states, stored token count, queue depth, readiness, and worker heartbeat. | | `/collections` | List, create, delete, and search collections. | @@ -123,6 +124,6 @@ The admin UI uses session cookies exclusively. To automate anything the admin UI ## When to skip the Admin UI -- **Pure backend deployment** — no admin UI process, everything via API keys and SDKs. Set `BIGRAG_ENV=prod` and use [`POST /v1/auth/setup`](/docs/api-reference/authentication) from an admin workstation to bootstrap. +- **Pure backend deployment** — no admin UI process, everything via API keys and SDKs. Set `BIGRAG_ENV=prod`, use [`POST /v1/auth/setup`](/docs/api-reference/authentication) from an admin workstation to bootstrap, then create at least one embedding preset through the admin API before indexing documents. - **Embedded experiences** — if you're surfacing search inside your own app, skip admin UI and build against the [query API](/docs/api-reference/query). - **Read-only operators** — create a `member` account; they get `/overview`, `/collections` (read), and `/chat` but can't mutate. diff --git a/website/content/docs/api-reference/authentication.mdx b/website/content/docs/api-reference/authentication.mdx index 4f0e95b4..c70bc6fe 100644 --- a/website/content/docs/api-reference/authentication.mdx +++ b/website/content/docs/api-reference/authentication.mdx @@ -32,7 +32,7 @@ curl -X POST http://localhost:4000/v1/auth/setup \ Once at least one admin exists, `setup-status` returns `{"needs_setup": false}` and `/v1/auth/setup` returns `409`. -The admin UI exposes the same flow at `http://localhost:3000/setup`. +The admin UI exposes the same flow at `http://localhost:3000/setup`. After the first admin is created, the UI redirects the signed-in admin to `/onboarding` to create a verified embedding preset and optionally save Turbopuffer settings before continuing to `/overview`. The setup endpoint itself still only creates the first admin and session. ## Session auth (admin UI, browser) diff --git a/website/content/docs/getting-started/configuration.mdx b/website/content/docs/getting-started/configuration.mdx index f1e09065..6a906ea6 100644 --- a/website/content/docs/getting-started/configuration.mdx +++ b/website/content/docs/getting-started/configuration.mdx @@ -63,7 +63,7 @@ Environment variables override TOML values. CLI flags passed to `python -m bigra ## Runtime settings -After the first admin account exists, go to `/settings` to manage platform runtime settings, `/data-storage` to manage uploaded source-file storage, and `/models` to manage model presets and model runtime settings. Turbopuffer is configured from the UI, is the vector backend for every collection, and is stored in Postgres in the `instance_settings` table. Secret settings are encrypted with `BIGRAG_MASTER_KEY` and redacted on read. +After the first admin account exists, the admin UI sends first-run operators to `/onboarding` to create one verified embedding preset and optionally save Turbopuffer settings. Later, go to `/settings` to manage platform runtime settings, `/data-storage` to manage uploaded source-file storage, and `/models` to manage model presets and model runtime settings. Turbopuffer is configured from the UI, is the vector backend for every collection, and is stored in Postgres in the `instance_settings` table. Secret settings are encrypted with `BIGRAG_MASTER_KEY` and redacted on read. The same surface is available through [`/v1/admin/settings`](/docs/api-reference/instance-settings). diff --git a/website/content/docs/getting-started/quickstart.mdx b/website/content/docs/getting-started/quickstart.mdx index 97fef456..dbc9f72f 100644 --- a/website/content/docs/getting-started/quickstart.mdx +++ b/website/content/docs/getting-started/quickstart.mdx @@ -42,7 +42,13 @@ curl -X POST $BASE/v1/auth/setup \ -c cookies.txt ``` -The response sets a `bigrag_session` cookie. The admin UI exposes the same flow at `http://localhost:3000/setup`. +The response sets a `bigrag_session` cookie. The admin UI exposes the same flow at `http://localhost:3000/setup`, then redirects to `/onboarding` where you create one verified embedding preset and can optionally save Turbopuffer settings before landing on `/overview`. + + + +### Complete provider onboarding + +In the admin UI, finish `/onboarding` by adding one embedding preset. Turbopuffer can be saved there too, or skipped and configured later from the runtime settings API. From a7557b72d841987049900c78590c9c57baf94367 Mon Sep 17 00:00:00 2001 From: Bigint <69431456+bigint@users.noreply.github.com> Date: Thu, 21 May 2026 23:55:04 +0530 Subject: [PATCH 9/9] fix: make e2e wait for api liveness --- api/bigrag/services/maintenance.py | 1 - e2e/Makefile | 18 +++++++++++++----- e2e/README.md | 7 ++++++- website/content/docs/development/testing.mdx | 2 +- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/api/bigrag/services/maintenance.py b/api/bigrag/services/maintenance.py index e5a57a58..08e6fb55 100644 --- a/api/bigrag/services/maintenance.py +++ b/api/bigrag/services/maintenance.py @@ -11,7 +11,6 @@ MAINTENANCE_LOCK_NAME = "maintenance" BACKUP_LOCK_NAME = MAINTENANCE_LOCK_NAME -VECTOR_MIGRATION_LOCK_NAME = MAINTENANCE_LOCK_NAME class MaintenanceActiveError(RuntimeError): diff --git a/e2e/Makefile b/e2e/Makefile index 13320a15..a86036b5 100644 --- a/e2e/Makefile +++ b/e2e/Makefile @@ -3,6 +3,7 @@ E2E_PROJECT ?= bigrag-e2e COMPOSE = docker compose -p $(E2E_PROJECT) -f ../docker-compose.yml -f docker-compose.e2e.yml API_BASE ?= http://localhost:4000 +API_LIVENESS ?= $(API_BASE)/health E2E_SERVICES = bigrag-api bigrag-worker fake-openai fake-turbopuffer webhook-sink minio up: @@ -10,14 +11,14 @@ up: $(MAKE) wait-ready wait-ready: - @echo "Waiting for $(API_BASE)/health/ready ..." + @echo "Waiting for $(API_LIVENESS) ..." @for i in $$(seq 1 60); do \ - if curl -fsS $(API_BASE)/health/ready >/dev/null 2>&1; then \ + if curl -fsS $(API_LIVENESS) >/dev/null 2>&1; then \ echo "ready"; exit 0; \ fi; \ sleep 2; \ done; \ - echo "timed out waiting for /health/ready"; exit 1 + echo "timed out waiting for $(API_LIVENESS)"; exit 1 down: $(COMPOSE) down -v @@ -41,13 +42,20 @@ test-sdk-ts: test: test-api test-sdk-py test-sdk-ts e2e: - $(MAKE) up + @if ! $(MAKE) up; then \ + echo "=== bigrag-api logs (last 1000 lines) ==="; \ + $(COMPOSE) logs --tail=1000 bigrag-api || true; \ + echo "=== bigrag-worker logs (last 1000 lines) ==="; \ + $(COMPOSE) logs --tail=1000 bigrag-worker || true; \ + $(COMPOSE) down -v || true; \ + exit 1; \ + fi @if ! $(MAKE) test; then \ echo "=== bigrag-api logs (last 1000 lines) ==="; \ $(COMPOSE) logs --tail=1000 bigrag-api || true; \ echo "=== bigrag-worker logs (last 1000 lines) ==="; \ $(COMPOSE) logs --tail=1000 bigrag-worker || true; \ - $(MAKE) down; \ + $(COMPOSE) down -v || true; \ exit 1; \ fi $(MAKE) down diff --git a/e2e/README.md b/e2e/README.md index d20c3abf..13a4881c 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -31,7 +31,7 @@ make e2e # up, test, down | Target | What it does | |--------------------|--------------------------------------------------------------------| -| `make up` | Brings up the API/SDK e2e compose stack and waits for `/health/ready` | +| `make up` | Brings up the API/SDK e2e compose stack and waits for `/health` | | `make down` | Tears down the stack and removes volumes | | `make logs` | Tails compose logs | | `make install` | `uv sync` + `pnpm install` | @@ -45,6 +45,11 @@ The Makefile uses the `bigrag-e2e` Docker Compose project. Its `down` target removes e2e volumes only and does not remove the default `bigrag` dev volumes used by `./dev.sh`. +`make up` waits for API liveness on `/health`. The pytest admin fixture then +creates the first admin when needed and seeds e2e runtime settings for +`fake-openai`, `fake-turbopuffer`, and `webhook-sink` before tests that need +configured providers run. + ## Architecture ``` diff --git a/website/content/docs/development/testing.mdx b/website/content/docs/development/testing.mdx index fa3c3c3f..14cf850d 100644 --- a/website/content/docs/development/testing.mdx +++ b/website/content/docs/development/testing.mdx @@ -37,4 +37,4 @@ The E2E workflow uses the local fake OpenAI service only. There is no maintained The `e2e/` workspace contains API pytest coverage, Python SDK contract tests, and TypeScript SDK Vitest coverage. It intentionally does not include Playwright or other UI browser tests. -The local `e2e/Makefile` runs Docker Compose under the `bigrag-e2e` project name. `make down` removes that isolated e2e stack and its volumes without deleting the default `bigrag` dev volumes used by `./dev.sh`. +The local `e2e/Makefile` runs Docker Compose under the `bigrag-e2e` project name. `make up` waits for API liveness on `/health`; pytest then creates the first admin when needed and seeds fake-provider runtime settings before provider-dependent tests run. `make down` removes that isolated e2e stack and its volumes without deleting the default `bigrag` dev volumes used by `./dev.sh`.