diff --git a/src/db/sqlite-fts.ts b/src/db/sqlite-fts.ts index f0fbb0f..5f97010 100644 --- a/src/db/sqlite-fts.ts +++ b/src/db/sqlite-fts.ts @@ -207,6 +207,11 @@ export async function sqliteFts(config: SqliteFtsConfig = {}): Promise r.id) + }, + async clear() { db.exec('DELETE FROM documents_fts') db.exec('DELETE FROM documents_meta') diff --git a/src/retriv.ts b/src/retriv.ts index 56a25c1..23b4f3a 100644 --- a/src/retriv.ts +++ b/src/retriv.ts @@ -220,9 +220,25 @@ export async function createRetriv(options: RetrivOptions): Promise d.listIds) + if (!lister) + throw new Error('remove() with chunking requires a driver that implements listIds()') + const allIds = await lister.listIds!() + const idSet = new Set(ids) + removeIds = allIds.filter((id) => { + if (idSet.has(id)) + return true + const sep = id.indexOf('#chunk-') + return sep >= 0 && idSet.has(id.substring(0, sep)) + }) + } const results = await Promise.all( - drivers.filter(d => d.remove).map(d => d.remove!(ids)), + drivers.filter(d => d.remove).map(d => d.remove!(removeIds)), ) + for (const id of ids) + parentDocs.delete(id) return { count: results[0]?.count ?? 0 } }, diff --git a/test/retriv.test.ts b/test/retriv.test.ts index 7820dac..5d257a8 100644 --- a/test/retriv.test.ts +++ b/test/retriv.test.ts @@ -130,6 +130,26 @@ describe('createRetriv', () => { await retriv.close?.() }) + it('removes chunked documents by parent id', async () => { + const retriv = await createRetriv({ + driver: sqliteFts({ path: ':memory:' }), + chunking: markdownChunker({ chunkSize: 20, chunkOverlap: 0 }), + }) + + await retriv.index([ + { id: 'doc1', content: 'First part.\n\nSecond part.\n\nThird part.' }, + { id: 'doc2', content: 'Keep this.\n\nStill here.\n\nNot removed.' }, + ]) + + await retriv.remove?.(['doc1']) + + const doc1Results = await retriv.search('part', { limit: 10 }) + const doc2Results = await retriv.search('keep', { limit: 10 }) + + expect(doc1Results.filter(r => r.id.startsWith('doc1'))).toHaveLength(0) + expect(doc2Results.length).toBeGreaterThanOrEqual(1) + }) + it('extracts snippets with highlights', async () => { const retriv = await createRetriv({ driver: sqliteFts({ path: ':memory:' }),