From d4918e6156362c7b6ea883526e9ed7040b203a62 Mon Sep 17 00:00:00 2001 From: oritwoen <18102267+oritwoen@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:02:30 +0200 Subject: [PATCH 1/2] fix: expand chunk IDs in remove() when chunking is enabled Documents indexed with a chunker are stored as `{id}#chunk-{i}` but remove() was passing parent IDs directly to drivers, matching nothing. Now retriv expands parent IDs to their chunk IDs via listIds() before forwarding to drivers. Also adds listIds() to the sqlite-fts driver. --- src/db/sqlite-fts.ts | 5 +++++ src/retriv.ts | 18 +++++++++++++++++- test/retriv.test.ts | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/db/sqlite-fts.ts b/src/db/sqlite-fts.ts index f0fbb0f..5f97010 100644 --- a/src/db/sqlite-fts.ts +++ b/src/db/sqlite-fts.ts @@ -207,6 +207,11 @@ export async function sqliteFts(config: SqliteFtsConfig = {}): Promise r.id) + }, + async clear() { db.exec('DELETE FROM documents_fts') db.exec('DELETE FROM documents_meta') diff --git a/src/retriv.ts b/src/retriv.ts index 56a25c1..9d52266 100644 --- a/src/retriv.ts +++ b/src/retriv.ts @@ -220,9 +220,25 @@ export async function createRetriv(options: RetrivOptions): Promise d.listIds) + if (lister) { + const allIds = await lister.listIds!() + const idSet = new Set(ids) + removeIds = allIds.filter((id) => { + if (idSet.has(id)) + return true + const sep = id.indexOf('#chunk-') + return sep >= 0 && idSet.has(id.substring(0, sep)) + }) + } + } const results = await Promise.all( - drivers.filter(d => d.remove).map(d => d.remove!(ids)), + drivers.filter(d => d.remove).map(d => d.remove!(removeIds)), ) + for (const id of ids) + parentDocs.delete(id) return { count: results[0]?.count ?? 0 } }, diff --git a/test/retriv.test.ts b/test/retriv.test.ts index 7820dac..9ee1f6c 100644 --- a/test/retriv.test.ts +++ b/test/retriv.test.ts @@ -130,6 +130,26 @@ describe('createRetriv', () => { await retriv.close?.() }) + it('removes chunked documents by parent id', async () => { + const retriv = await createRetriv({ + driver: sqliteFts({ path: ':memory:' }), + chunking: markdownChunker({ chunkSize: 20, chunkOverlap: 0 }), + }) + + await retriv.index([ + { id: 'doc1', content: 'First part.\n\nSecond part.\n\nThird part.' }, + { id: 'doc2', content: 'Keep this.\n\nStill here.\n\nNot removed.' }, + ]) + + await retriv.remove?.(['doc1']) + + const doc1Results = await retriv.search('part', { limit: 10 }) + const doc2Results = await retriv.search('keep', { limit: 10 }) + + expect(doc1Results.every(r => !r.id.startsWith('doc1'))).toBe(true) + expect(doc2Results.length).toBeGreaterThanOrEqual(1) + }) + it('extracts snippets with highlights', async () => { const retriv = await createRetriv({ driver: sqliteFts({ path: ':memory:' }), From b82d62d860768528f141d8bab1e23a2326a4b5ad Mon Sep 17 00:00:00 2001 From: oritwoen <18102267+oritwoen@users.noreply.github.com> Date: Mon, 30 Mar 2026 11:27:47 +0200 Subject: [PATCH 2/2] fix: fail fast when chunked remove() has no listIds() driver Address review feedback: - throw instead of silently falling back to bare IDs - use filter().toHaveLength(0) instead of every() to avoid vacuous truth --- src/retriv.ts | 20 ++++++++++---------- test/retriv.test.ts | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/retriv.ts b/src/retriv.ts index 9d52266..23b4f3a 100644 --- a/src/retriv.ts +++ b/src/retriv.ts @@ -223,16 +223,16 @@ export async function createRetriv(options: RetrivOptions): Promise d.listIds) - if (lister) { - const allIds = await lister.listIds!() - const idSet = new Set(ids) - removeIds = allIds.filter((id) => { - if (idSet.has(id)) - return true - const sep = id.indexOf('#chunk-') - return sep >= 0 && idSet.has(id.substring(0, sep)) - }) - } + if (!lister) + throw new Error('remove() with chunking requires a driver that implements listIds()') + const allIds = await lister.listIds!() + const idSet = new Set(ids) + removeIds = allIds.filter((id) => { + if (idSet.has(id)) + return true + const sep = id.indexOf('#chunk-') + return sep >= 0 && idSet.has(id.substring(0, sep)) + }) } const results = await Promise.all( drivers.filter(d => d.remove).map(d => d.remove!(removeIds)), diff --git a/test/retriv.test.ts b/test/retriv.test.ts index 9ee1f6c..5d257a8 100644 --- a/test/retriv.test.ts +++ b/test/retriv.test.ts @@ -146,7 +146,7 @@ describe('createRetriv', () => { const doc1Results = await retriv.search('part', { limit: 10 }) const doc2Results = await retriv.search('keep', { limit: 10 }) - expect(doc1Results.every(r => !r.id.startsWith('doc1'))).toBe(true) + expect(doc1Results.filter(r => r.id.startsWith('doc1'))).toHaveLength(0) expect(doc2Results.length).toBeGreaterThanOrEqual(1) })