diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs index 77931a27..4043117d 100644 --- a/scripts/ci-test-manifest.mjs +++ b/scripts/ci-test-manifest.mjs @@ -1,87 +1,92 @@ -export const CI_TEST_GROUPS = [ - "cli-smoke", - "core-regression", - "storage-and-schema", - "llm-clients-and-auth", - "packaging-and-workflow", -]; - -export const CI_TEST_MANIFEST = [ - { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-error-hints.test.mjs" }, - { group: "llm-clients-and-auth", runner: "node", file: "test/cjk-recursion-regression.test.mjs" }, - { group: "storage-and-schema", runner: "node", file: "test/migrate-legacy-schema.test.mjs" }, - { group: "storage-and-schema", runner: "node", file: "test/config-session-strategy-migration.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/scope-access-undefined.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/reflection-bypass-hook.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-scope-filter.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/store-empty-scope-filter.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/storage-path-normalization.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/recall-text-cleanup.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/update-consistency-lancedb.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/strip-envelope-metadata.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/auto-recall-timeout.test.mjs", args: ["--test"] }, - { group: "cli-smoke", runner: "node", file: "test/import-markdown/import-markdown.test.mjs", args: ["--test"] }, - { group: "cli-smoke", runner: "node", file: "test/cli-smoke.mjs" }, - { group: "cli-smoke", runner: "node", file: "test/functional-e2e.mjs" }, - { group: "storage-and-schema", runner: "node", file: "test/per-agent-auto-recall.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" }, - { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" }, - { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" }, - { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" }, - { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" }, - { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] }, - { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/smart-metadata-v2.mjs" }, - { group: "storage-and-schema", runner: "node", file: "test/vector-search-cosine.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/context-support-e2e.mjs" }, - { group: "core-regression", runner: "node", file: "test/temporal-facts.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/memory-update-supersede.test.mjs" }, - { group: "llm-clients-and-auth", runner: "node", file: "test/memory-upgrader-diagnostics.test.mjs" }, - { group: "llm-clients-and-auth", runner: "node", file: "test/llm-api-key-client.test.mjs", args: ["--test"] }, - { group: "llm-clients-and-auth", runner: "node", file: "test/llm-oauth-client.test.mjs", args: ["--test"] }, - { group: "llm-clients-and-auth", runner: "node", file: "test/cli-oauth-login.test.mjs", args: ["--test"] }, - { group: "packaging-and-workflow", runner: "node", file: "test/workflow-fork-guards.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/clawteam-scope.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/cross-process-lock.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/lock-stress-test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/lock-release-on-error.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/preference-slots.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/is-latest-auto-supersede.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/temporal-awareness.test.mjs", args: ["--test"] }, - // Issue #598 regression tests - { group: "core-regression", runner: "node", file: "test/store-serialization.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/mmr-tiny.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/access-tracker-retry.test.mjs" }, - { group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" }, - // Issue #629 batch embedding fix - { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" }, - // Issue #665 bulkStore tests - // Issue #690 cross-call batch accumulator tests - { group: "storage-and-schema", runner: "node", file: "test/issue-690-cross-call-batch.test.mjs", args: ["--test"] }, - // Issue #665 bulkStore tests (from upstream) - { group: "storage-and-schema", runner: "node", file: "test/bulk-store.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/bulk-store-edge-cases.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store.test.mjs", args: ["--test"] }, - { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] }, - // Issue #680 regression tests (from upstream) - { group: "core-regression", runner: "node", file: "test/memory-reflection-issue680-tdd.test.mjs", args: ["--test"] }, - // Issue #606 SDK migration Bug 2 regression tests - { group: "core-regression", runner: "node", file: "test/issue606_sdk-migration.test.mjs" }, - // PR #713 inference regression tests - inferProviderFromBaseURL + model fallback - { group: "core-regression", runner: "node", file: "test/infer-provider-from-baseurl.test.mjs" }, - // Issue #736 recall governance - isRecallUsed() unit tests - { group: "core-regression", runner: "node", file: "test/is-recall-used.test.mjs", args: ["--test"] }, - // Issue #492 agentId validation tests - { group: "core-regression", runner: "node", file: "test/agentid-validation.test.mjs", args: ["--test"] }, - { group: "core-regression", runner: "node", file: "test/command-reflection-guard.test.mjs", args: ["--test"] }, - // Tier 1 memory counter fix - { group: "core-regression", runner: "node", file: "test/tier1-counters.test.mjs", args: ["--test"] }, -]; - -export function getEntriesForGroup(group) { - if (!CI_TEST_GROUPS.includes(group)) { - throw new Error(`Unknown CI test group: ${group}`); - } - - return CI_TEST_MANIFEST.filter((entry) => entry.group === group); -} +export const CI_TEST_GROUPS = [ + "cli-smoke", + "core-regression", + "storage-and-schema", + "llm-clients-and-auth", + "packaging-and-workflow", +]; + +export const CI_TEST_MANIFEST = [ + { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-error-hints.test.mjs" }, + { group: "llm-clients-and-auth", runner: "node", file: "test/cjk-recursion-regression.test.mjs" }, + { group: "storage-and-schema", runner: "node", file: "test/migrate-legacy-schema.test.mjs" }, + { group: "storage-and-schema", runner: "node", file: "test/config-session-strategy-migration.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/scope-access-undefined.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/reflection-bypass-hook.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-scope-filter.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/store-empty-scope-filter.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/storage-path-normalization.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/recall-text-cleanup.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/update-consistency-lancedb.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/strip-envelope-metadata.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/auto-recall-timeout.test.mjs", args: ["--test"] }, + { group: "cli-smoke", runner: "node", file: "test/import-markdown/import-markdown.test.mjs", args: ["--test"] }, + { group: "cli-smoke", runner: "node", file: "test/cli-smoke.mjs" }, + { group: "cli-smoke", runner: "node", file: "test/functional-e2e.mjs" }, + { group: "storage-and-schema", runner: "node", file: "test/per-agent-auto-recall.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" }, + { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" }, + { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" }, + { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" }, + { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" }, + { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] }, + { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/smart-metadata-v2.mjs" }, + { group: "storage-and-schema", runner: "node", file: "test/vector-search-cosine.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/context-support-e2e.mjs" }, + { group: "core-regression", runner: "node", file: "test/temporal-facts.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/memory-update-supersede.test.mjs" }, + { group: "llm-clients-and-auth", runner: "node", file: "test/memory-upgrader-diagnostics.test.mjs" }, + { group: "llm-clients-and-auth", runner: "node", file: "test/llm-api-key-client.test.mjs", args: ["--test"] }, + { group: "llm-clients-and-auth", runner: "node", file: "test/llm-oauth-client.test.mjs", args: ["--test"] }, + { group: "llm-clients-and-auth", runner: "node", file: "test/cli-oauth-login.test.mjs", args: ["--test"] }, + { group: "packaging-and-workflow", runner: "node", file: "test/workflow-fork-guards.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/clawteam-scope.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/cross-process-lock.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/lock-stress-test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/lock-release-on-error.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/preference-slots.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/is-latest-auto-supersede.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/temporal-awareness.test.mjs", args: ["--test"] }, + // Issue #598 regression tests + { group: "core-regression", runner: "node", file: "test/store-serialization.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/mmr-tiny.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/access-tracker-retry.test.mjs" }, + { group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" }, + // Issue #629 batch embedding fix + { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" }, + // Issue #665 bulkStore tests + // Issue #690 cross-call batch accumulator tests + { group: "storage-and-schema", runner: "node", file: "test/issue-690-cross-call-batch.test.mjs", args: ["--test"] }, + // Issue #665 bulkStore tests (from upstream) + { group: "storage-and-schema", runner: "node", file: "test/bulk-store.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/bulk-store-edge-cases.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] }, + // Issue #680 regression tests (from upstream) + { group: "core-regression", runner: "node", file: "test/memory-reflection-issue680-tdd.test.mjs", args: ["--test"] }, + // Issue #606 SDK migration Bug 2 regression tests + { group: "core-regression", runner: "node", file: "test/issue606_sdk-migration.test.mjs" }, + // PR #713 inference regression tests - inferProviderFromBaseURL + model fallback + { group: "core-regression", runner: "node", file: "test/infer-provider-from-baseurl.test.mjs" }, + // Issue #736 recall governance - isRecallUsed() unit tests + { group: "core-regression", runner: "node", file: "test/is-recall-used.test.mjs", args: ["--test"] }, + // Issue #492 agentId validation tests + { group: "core-regression", runner: "node", file: "test/agentid-validation.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/command-reflection-guard.test.mjs", args: ["--test"] }, + // Tier 1 memory counter fix + { group: "core-regression", runner: "node", file: "test/tier1-counters.test.mjs", args: ["--test"] }, + // Issue #693 extraction write validation tests + { group: "core-regression", runner: "node", file: "test/extraction-validation.test.mjs", args: ["--test"] }, + { group: "core-regression", runner: "node", file: "test/dedup-false-alarm.test.mjs", args: ["--test"] }, + // Issues #675 #676 batch mode implementation tests + { group: "core-regression", runner: "node", file: "test/pr678-pr723-batch-invalidation.test.mjs" }, +]; + +export function getEntriesForGroup(group) { + if (!CI_TEST_GROUPS.includes(group)) { + throw new Error(`Unknown CI test group: ${group}`); + } + + return CI_TEST_MANIFEST.filter((entry) => entry.group === group); +} \ No newline at end of file diff --git a/src/memory-categories.ts b/src/memory-categories.ts index 7edc7f53..af6604e9 100644 --- a/src/memory-categories.ts +++ b/src/memory-categories.ts @@ -76,6 +76,24 @@ export type ExtractionStats = { superseded?: number; // temporal fact replacements }; +/** + * Payload delivered to `ExtractPersistOptions.onExtractionValidationFailed` + * when the number of entries actually written to the store differs from + * the number of candidates produced by the LLM. + * + * @see ExtractPersistOptions.onExtractionValidationFailed + */ +export type ExtractionValidation = { + /** Number of candidates the LLM intended to create (createEntries.length) */ + expected: number; + /** Number of rows actually written (countAfter - countBefore) */ + actual: number; + /** expected - actual; positive = under-write, negative = over-write (concurrent delete) */ + mismatch: number; + /** Session key passed to extractAndPersist */ + sessionKey: string; +}; + /** Validate and normalize a category string. */ export function normalizeCategory(raw: string): MemoryCategory | null { const lower = raw.toLowerCase().trim(); diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 11354ae6..c136c36b 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -25,6 +25,7 @@ import { type DedupDecision, type DedupResult, type ExtractionStats, + type ExtractionValidation, type MemoryCategory, ALWAYS_MERGE_CATEGORIES, MERGE_SUPPORTED_CATEGORIES, @@ -54,6 +55,21 @@ import { batchDedup } from "./batch-dedup.js"; type StoreEntry = Omit; +/** + * Interface for tracking old entries that need to be invalidated (marked as superseded). + * Used in batch mode to collect invalidation operations and execute them after bulkStore. + */ +interface InvalidateEntry { + id: string; + metadata: string; + /** Index into the bulkStore result array for backfilling superseded_by */ + bulkIndex?: number; + /** Original metadata before invalidation, used for rollback on partial failures */ + _origMetadata?: string; + /** ID of the new entry that supersedes this one (set during second pass) */ + newEntryId?: string; +} + // ============================================================================ // Envelope Metadata Stripping // ============================================================================ @@ -273,6 +289,14 @@ export interface ExtractPersistOptions { * - pass a non-empty array to restrict reads to those scopes */ scopeFilter?: string[]; + /** + * Optional callback invoked when the number of entries actually written + * differs from the number of entries the pipeline attempted to write. + * + * Only triggered when `actual !== expected` (mismatch !== 0). + * If omitted, mismatches are silently ignored (fail-silent behavior). + */ + onExtractionValidationFailed?: (validation: ExtractionValidation) => void; } export class SmartExtractor { @@ -420,6 +444,10 @@ export class SmartExtractor { } const createEntries: Omit[] = []; + const invalidateEntries: InvalidateEntry[] = []; + // MR2: track matchIds already queued for supersession in this batch to prevent + // duplicate supersedes of the same entry (which would leave inconsistent superseded_by). + const queuedSupersedeMatchIds = new Set(); for (const { index, candidate } of processableCandidates) { try { @@ -432,6 +460,8 @@ export class SmartExtractor { scopeFilter, precomputedVectors.get(index), createEntries, + invalidateEntries, + queuedSupersedeMatchIds, ); } catch (err) { this.log( @@ -441,7 +471,113 @@ export class SmartExtractor { } if (createEntries.length > 0) { - await this.store.bulkStore(createEntries); + const countBefore = await this.store.count(); + const bulkResults = await this.store.bulkStore(createEntries); + const countAfter = await this.store.count(); + const actual = countAfter - countBefore; + const expected = createEntries.length; + + if (actual !== expected && options.onExtractionValidationFailed) { + options.onExtractionValidationFailed({ + expected, + actual, + mismatch: expected - actual, + sessionKey, + }); + } + + // SECOND PASS: backfill superseded_by for superseded old entries. + // bulkStore returns entries in the same order they were pushed. + // For each invalidateEntry that came from a supersede (bulkIndex is set), + // the new entry's ID is at bulkResults[bulkIndex].id. + // We parse the stored metadata and update it with the new entry's ID. + if (invalidateEntries.length > 0) { + for (const inv of invalidateEntries) { + if (inv.bulkIndex !== undefined && inv.bulkIndex < bulkResults.length) { + const newEntryId = bulkResults[inv.bulkIndex].id; + inv.newEntryId = newEntryId; // persist for rollback + const oldMeta = parseSmartMetadata(inv.metadata, { id: inv.id }); + const updatedMeta = buildSmartMetadata({ metadata: inv.metadata }, { + superseded_by: newEntryId, + relations: appendRelation(oldMeta.relations ?? [], { + type: "superseded_by", + targetId: newEntryId, + }), + }); + inv.metadata = stringifySmartMetadata(updatedMeta); + } + } + } + } + + // Invalidate old entries that were superseded (must happen after bulkStore). + // Each update() call acquires its own lock. This is unavoidable: LanceDB does not support + // atomic "bulk update with where clause". + if (invalidateEntries.length > 0) { + const results = await Promise.allSettled( + invalidateEntries.map((inv) => + this.store.update(inv.id, { metadata: inv.metadata }, scopeFilter), + ), + ); + + const failed = results + .map((r, i) => ({ inv: invalidateEntries[i], result: r })) + .filter(({ result }) => result.status === 'rejected'); + + + if (failed.length > 0) { + const failedIds = failed.map(({ inv }) => inv.id).join(', '); + const failedCount = failed.length; + const succeededCount = invalidateEntries.length - failedCount; + + this.log( + `memory-pro: smart-extractor: ${failedCount}/${invalidateEntries.length} invalidation updates failed after bulkStore succeeded. Failed IDs: ${failedIds}. Rolling back ${succeededCount} succeeded update(s)…`, + ); + + // Rollback Phase 1: delete ALL new entries that bulkStore wrote. + const newEntryIdsToDelete = invalidateEntries + .map((inv) => inv.newEntryId) + .filter((id): id is string => !!id); + + + const succeeded = results + .map((r, i) => ({ inv: invalidateEntries[i], result: r })) + .filter(({ result }) => result.status === 'fulfilled'); + + + const deleteResults = await Promise.allSettled( + newEntryIdsToDelete.map((id) => this.store.delete(id, scopeFilter)), + ); + const deleteFailed = deleteResults.filter((r) => r.status === 'rejected'); + if (deleteFailed.length > 0) { + this.log( + `memory-pro: smart-extractor: ROLLBACK FAILED — ${deleteFailed.length} new entry delete(s) failed. Partial rollback: old entries may still be superseded.`, + ); + } + + // Rollback Phase 2: restore old entries' metadata from _origMetadata. + const restoreResults = await Promise.allSettled( + succeeded.map(({ inv }) => { + const orig = inv._origMetadata; + if (!orig) return Promise.resolve(); + return this.store.update(inv.id, { metadata: orig }, scopeFilter); + }), + ); + + const restoreFailed = restoreResults.filter((r) => r.status === 'rejected'); + const totalFailed = deleteFailed.length + restoreFailed.length; + const deleteSucceeded = deleteResults.filter((r) => r.status === 'fulfilled').length; + if (totalFailed > 0) { + stats.rolledBack = (stats.rolledBack ?? 0) + deleteSucceeded; + this.log( + `memory-pro: smart-extractor: ROLLBACK FAILED — ${totalFailed} operations failed (${deleteFailed.length} deletes + ${restoreFailed.length} restores). Database may have inconsistent supersede state. Affected IDs: ${failedIds}`, + ); + } else { + this.log( + `memory-pro: smart-extractor: Rollback complete — ${succeededCount} old entries restored, ${deleteSucceeded} new entries deleted. No partial state left.`, + ); + } + } } return stats; @@ -663,6 +799,8 @@ export class SmartExtractor { scopeFilter?: string[], precomputedVector?: number[], createEntries?: Omit[], + invalidateEntries?: InvalidateEntry[], + queuedSupersedeMatchIds?: Set, ): Promise { // Profile always merges (skip dedup — admission control still applies) if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) { @@ -764,18 +902,30 @@ export class SmartExtractor { dedupResult.matchId && TEMPORAL_VERSIONED_CATEGORIES.has(candidate.category) ) { - await this.handleSupersede( - candidate, - vector, - dedupResult.matchId, - sessionKey, - targetScope, - scopeFilter, - admission?.audit, - createEntries, - ); - stats.created++; - stats.superseded = (stats.superseded ?? 0) + 1; + // MR2: if this matchId is already queued for supersession by an earlier + // candidate in this batch, skip the supersede and create as a new entry. + if (queuedSupersedeMatchIds?.has(dedupResult.matchId)) { + this.log( + `memory-pro: smart-extractor: matchId ${dedupResult.matchId.slice(0, 8)} already queued for supersession — creating as new entry instead`, + ); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); + stats.created++; + } else { + queuedSupersedeMatchIds?.add(dedupResult.matchId); + await this.handleSupersede( + candidate, + vector, + dedupResult.matchId, + sessionKey, + targetScope, + scopeFilter, + admission?.audit, + createEntries, + invalidateEntries, + ); + stats.created++; + stats.superseded = (stats.superseded ?? 0) + 1; + } } else { createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; @@ -808,18 +958,31 @@ export class SmartExtractor { TEMPORAL_VERSIONED_CATEGORIES.has(candidate.category) && dedupResult.contextLabel === "general" ) { - await this.handleSupersede( - candidate, - vector, - dedupResult.matchId, - sessionKey, - targetScope, - scopeFilter, - admission?.audit, - createEntries, - ); - stats.created++; - stats.superseded = (stats.superseded ?? 0) + 1; + // MR2: same deduplication guard as the regular supersede path — + // if this matchId is already queued for supersession, create as + // a new entry instead of double-superseding. + if (queuedSupersedeMatchIds?.has(dedupResult.matchId)) { + this.log( + `memory-pro: smart-extractor: CONTRADICT matchId ${dedupResult.matchId.slice(0, 8)} already queued — creating as new entry`, + ); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); + stats.created++; + } else { + queuedSupersedeMatchIds?.add(dedupResult.matchId); + await this.handleSupersede( + candidate, + vector, + dedupResult.matchId, + sessionKey, + targetScope, + scopeFilter, + admission?.audit, + createEntries, + invalidateEntries, + ); + stats.created++; + stats.superseded = (stats.superseded ?? 0) + 1; + } } else { await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit, createEntries); stats.created++; @@ -1162,6 +1325,7 @@ export class SmartExtractor { scopeFilter?: string[], admissionAudit?: AdmissionAuditRecord, createEntries?: StoreEntry[], + invalidateEntries?: InvalidateEntry[], ): Promise { const existing = await this.store.getById(matchId, scopeFilter); if (!existing) { @@ -1175,6 +1339,80 @@ export class SmartExtractor { existingMeta.fact_key ?? deriveFactKey(candidate.category, candidate.abstract); const storeCategory = this.mapToStoreCategory(candidate.category); const supersedeClassifyText = candidate.content || candidate.abstract; + + + // FIX #676: When createEntries is provided, push to batch instead of calling + // store.store() directly. The store.update() to invalidate the old record still + // runs individually (LanceDB does not support batch partial-updates by ID). + if (createEntries) { + // Capture position BEFORE pushing so we can find this new entry in bulkStore results + const bulkIndex = createEntries.length; + + createEntries.push({ + text: candidate.abstract, + vector, + category: storeCategory, + scope: targetScope, + importance: this.getDefaultImportance(candidate.category), + metadata: stringifySmartMetadata( + buildSmartMetadata( + { + text: candidate.abstract, + category: storeCategory, + }, + { + l0_abstract: candidate.abstract, + l1_overview: candidate.overview, + l2_content: candidate.content, + memory_category: candidate.category, + tier: "working", + access_count: 0, + confidence: 0.7, + source_session: sessionKey, + source: "auto-capture", + state: "confirmed", // #350: write confirmed to unblock auto-recall + memory_layer: "working", + injected_count: 0, + bad_recall_count: 0, + suppressed_until_turn: 0, + valid_from: now, + fact_key: factKey, + supersedes: matchId, + relations: appendRelation([], { + type: "supersedes", + targetId: matchId, + }), + memory_temporal_type: classifyTemporal(supersedeClassifyText), + valid_until: inferExpiry(supersedeClassifyText), + }, + ), + ), + }); + + // Invalidate the old entry. Must happen AFTER bulkStore completes. + // We store bulkIndex so the second pass can backfill superseded_by + // once bulkStore returns the generated IDs. + const invalidatedMetadata = buildSmartMetadata(existing, { + fact_key: factKey, + invalidated_at: now, + // NOTE: valid_from is deliberately NOT updated here — preserving the + // old entry's original valid_from maintains correct version-chain + // chronology. + }); + invalidateEntries?.push({ + id: matchId, + metadata: stringifySmartMetadata(invalidatedMetadata), + bulkIndex, // enables second-pass backfill of superseded_by + _origMetadata: existing.metadata, + }); + + this.log( + `memory-pro: smart-extractor: superseded [${candidate.category}] ${matchId.slice(0, 8)} (queued for batch + invalidate queued)`, + ); + return; + } + + // Standalone path (no createEntries — backward compatible) const created = await this.store.store({ text: candidate.abstract, vector, @@ -1226,6 +1464,7 @@ export class SmartExtractor { }), }); + await this.store.update( matchId, { metadata: stringifySmartMetadata(invalidatedMetadata) }, @@ -1342,18 +1581,24 @@ export class SmartExtractor { createEntries?: StoreEntry[], ): Promise { // 1. Record contradiction on the existing memory + // If matchId does not exist, skip contradiction entirely — do NOT create + // a new entry with a dangling contradicts reference. const existing = await this.store.getById(matchId, scopeFilter); - if (existing) { - const meta = parseSmartMetadata(existing.metadata, existing); - const supportInfo = parseSupportInfo(meta.support_info); - const updated = updateSupportStats(supportInfo, contextLabel, "contradict"); - meta.support_info = updated; - await this.store.update( - matchId, - { metadata: stringifySmartMetadata(meta) }, - scopeFilter, + if (!existing) { + this.log( + `memory-pro: smart-extractor: contradict target ${matchId.slice(0, 8)} not found — skipping, no entry created`, ); + return; } + const meta = parseSmartMetadata(existing.metadata, existing); + const supportInfo = parseSupportInfo(meta.support_info); + const updated = updateSupportStats(supportInfo, contextLabel, "contradict"); + meta.support_info = updated; + await this.store.update( + matchId, + { metadata: stringifySmartMetadata(meta) }, + scopeFilter, + ); // 2. Store the contradicting entry as a new memory const storeCategory = this.mapToStoreCategory(candidate.category); diff --git a/test/dedup-false-alarm.test.mjs b/test/dedup-false-alarm.test.mjs new file mode 100644 index 00000000..80d77ee0 --- /dev/null +++ b/test/dedup-false-alarm.test.mjs @@ -0,0 +1,165 @@ +/** + * P0 驗證測試:確認 bulkStore 不會因 batchDedup 去重 + * + * 問題:是否 bulkStore 內部呼叫 batchDedup,導致 near-duplicate entries + * 被錯誤過濾,造成 countAfter - countBefore < createEntries.length? + * + * 測試策略: + * 1. 直接構造兩個 cosine similarity = 0.95 的向量(保證是 near-duplicate) + * 2. 用 batchDedup 確認它們確實被視為 duplicates + * 3. 透過 bulkStore 寫入這兩個 entry + * 4. 驗證 count 增加 2(而非 1)→ 確認 bulkStore 不去重 + */ + +import { describe, it, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { MemoryStore } = jiti("../src/store.ts"); +const { batchDedup } = jiti("../src/batch-dedup.ts"); + +/** 256-dim constant vector with a controlled cosine similarity to a base vector */ +function makeNearDuplicateVector(baseVec, similarity = 0.95) { + const dim = baseVec.length; + // Scale factor: cosine = scale * |base| / |target| = scale (since |base|=|target|=1) + // Actually: cos(base, target) = scale when target = scale * base + orth + const scale = similarity; + return baseVec.map(v => v * scale); +} + +/** Two identical vectors — maximum similarity */ +function makeIdenticalVector(dim = 256) { + const rng = makeRng(42); + return Array.from({ length: dim }, () => rng()); +} + +/** Seeded LCG RNG for deterministic vectors */ +function makeRng(seed) { + let s = seed >>> 0; + return () => { + s = (Math.imul(1664525, s) + 1013904223) >>> 0; + return s / 0x100000000; + }; +} + +/** Cosine similarity between two vectors */ +function cosineSimilarity(a, b) { + let dot = 0, normA = 0, normB = 0; + for (let i = 0; i < a.length; i++) { + dot += a[i] * b[i]; + normA += a[i] * a[i]; + normB += b[i] * b[i]; + } + return dot / (Math.sqrt(normA) * Math.sqrt(normB)); +} + +const TEST_DB_PREFIX = "/tmp/test-dedup-p0-"; + +describe("P0: bulkStore does NOT deduplicate near-duplicate entries", () => { + /** @type {MemoryStore} */ + let store; + let dbPath; + + afterEach(async () => { + if (store) { + try { await store.deleteAll("test-session"); } catch {} + try { await store.destroy(); } catch {} + } + }); + + it("bulkStore writes both near-duplicate entries (cosine = 0.95)", async () => { + // Step 1: Create base vector and a near-duplicate (cosine = 0.95) + const dim = 256; + const baseVec = makeIdenticalVector(dim); + const dupVec = makeNearDuplicateVector(baseVec, 0.95); + + const cosSim = cosineSimilarity(baseVec, dupVec); + console.log(`[P0] cosine similarity between base and near-duplicate: ${cosSim.toFixed(4)}`); + assert(cosSim > 0.94, `Near-duplicate should have cosine > 0.94, got ${cosSim}`); + + // Step 2: Verify batchDedup marks one as duplicate + const dedupResult = batchDedup( + ["abstract one", "abstract two"], + [baseVec, dupVec], + 0.85 // default threshold + ); + console.log(`[P0] batchDedup: ${dedupResult.inputCount} → ${dedupResult.outputCount}, duplicates=${JSON.stringify(dedupResult.duplicateIndices)}`); + assert(dedupResult.outputCount < dedupResult.inputCount, + `batchDedup should mark one as duplicate (input=${dedupResult.inputCount}, output=${dedupResult.outputCount})`); + + // Step 3: Create MemoryStore and write both via bulkStore + dbPath = TEST_DB_PREFIX + Date.now() + "-1"; + store = new MemoryStore({ dbPath, vectorDim: dim }); + const countBefore = await store.count(); + + await store.bulkStore([ + { + text: "Meeting attendance — quarterly business review", + vector: baseVec, + category: "fact", + scope: "test-session", + importance: 0.5, + metadata: JSON.stringify({ l0_abstract: "abstract one" }), + }, + { + text: "Quarterly business review with team lead", + vector: dupVec, + category: "fact", + scope: "test-session", + importance: 0.5, + metadata: JSON.stringify({ l0_abstract: "abstract two" }), + }, + ]); + + const countAfter = await store.count(); + const delta = countAfter - countBefore; + + console.log(`[P0 result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`); + + // KEY ASSERTION: delta should be exactly 2 — bulkStore does NOT dedupe + assert.strictEqual(delta, 2, + `bulkStore should write both entries (delta=2), got delta=${delta}. ` + + `If delta=1, bulkStore is internally deduplicating near-duplicate entries — this is a P0 bug.`); + }); + + it("bulkStore writes all 5 entries even when batchDedup would reduce them to 1", async () => { + const dim = 256; + + // Create 5 identical vectors — batchDedup with threshold 0.85 will keep only 1 + const baseVec = makeIdenticalVector(dim); + const vectors = Array.from({ length: 5 }, () => baseVec); + + const dedupResult = batchDedup( + Array(5).fill("abstract"), + vectors, + 0.85 + ); + console.log(`[P0 batch] batchDedup: 5 identical vectors → ${dedupResult.outputCount} survivors`); + assert(dedupResult.outputCount < 5, + "Sanity: 5 identical vectors should produce < 5 survivors in batchDedup"); + + // Now write all 5 via bulkStore + dbPath = TEST_DB_PREFIX + Date.now() + "-2"; + store = new MemoryStore({ dbPath, vectorDim: dim }); + const countBefore = await store.count(); + + await store.bulkStore(vectors.map((v, i) => ({ + text: `Event ${i + 1}`, + vector: v, + category: "fact", + scope: "test-session", + importance: 0.5, + metadata: JSON.stringify({ l0_abstract: `abstract ${i}` }), + }))); + + const countAfter = await store.count(); + const delta = countAfter - countBefore; + + console.log(`[P0 batch result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`); + + // KEY ASSERTION: all 5 should be written despite batchDedup saying they're duplicates + assert.strictEqual(delta, 5, + `bulkStore should write all 5 entries even if batchDedup would drop 4 of them (delta=5), got delta=${delta}`); + }); +}); diff --git a/test/extraction-validation.test.mjs b/test/extraction-validation.test.mjs new file mode 100644 index 00000000..a57ccf1e --- /dev/null +++ b/test/extraction-validation.test.mjs @@ -0,0 +1,389 @@ +/** + * Test: Extraction Write Validation (Issue #693) + * + * Tests the countBefore/countAfter validation logic in extractAndPersist(). + * + * Key challenge: SmartExtractor's `batchDedup` uses cosine similarity (threshold 0.85) + * on embedded candidate abstracts to filter near-duplicates BEFORE the dedup pipeline. + * Simple charCodeAt-based vectors all score >0.85 similarity (common English letters). + * + * Solution: `DeterministicRandomEmbedder` generates 256-dim vectors using a + * seeded RNG (Mulberry32) keyed on the text content. Each distinct text produces + * a unique vector with cosine similarity < 0.85, ensuring candidates survive batchDedup. + * + * Validates: + * T1. Normal extraction: expected === actual, mismatch = 0, callback NOT triggered + * T2. Empty extraction: skipped, mismatch undefined (validation skipped) + * T3. Partial bulkStore failure: actual < expected → mismatch > 0, callback triggered + * T4. Post-write deletion (compactor race): actual < expected → mismatch > 0 + * T5. Callback is optional — no error if omitted even on mismatch + * T6. Multiple extractions each get independent validation state + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { SmartExtractor } = jiti("../src/smart-extractor.ts"); + +// ============================================================================ +// Helpers +// ============================================================================ + +/** + * Mulberry32 seeded PRNG — fast, deterministic, good distribution. + * Returns a new RNG function seeded from an integer. + */ +function makeRng(seed) { + let s = seed >>> 0; + return () => { + s |= 0; + s = (s + 0x6d2b79f5) | 0; + let t = Math.imul(s ^ (s >>> 15), 1 | s); + t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; + return ((t ^ (t >>> 14)) >>> 0) / 4294967296; + }; +} + +/** + * Deterministic random embedder. + * + * Produces 256-dimensional vectors using a seeded RNG keyed on the text. + * Different texts → different seeds → vectors with cosine similarity < 0.85, + * ensuring candidates survive SmartExtractor's internal batchDedup filter. + * + * This lets us test with 2+ candidates without fighting the dedup logic. + */ +function makeDeterministicEmbedder() { + return { + async embed(text) { + const seed = [...text].reduce((acc, c) => acc + c.charCodeAt(0), 0) >>> 0; + const rng = makeRng(seed === 0 ? 1 : seed); + return Array.from({ length: 256 }, () => rng()); + }, + async embedBatch(texts) { + return texts.map((t) => { + const seed = [...t].reduce((acc, c) => acc + c.charCodeAt(0), 0) >>> 0; + const rng = makeRng(seed === 0 ? 1 : seed); + return Array.from({ length: 256 }, () => rng()); + }); + }, + }; +} + +/** + * Mock LLM — returns configurable candidates and "create" for all dedup decisions. + * The "create" decision ensures candidates progress through the dedup pipeline + * and reach bulkStore without special handling (no handleSupersede, handleMerge, etc.). + */ +function makeLlm(candidates = []) { + return { + async completeJson(_prompt, mode) { + if (mode === "extract-candidates") return { memories: candidates }; + if (mode === "dedup-decision") return { decision: "create", reason: "no match" }; + if (mode === "merge-memory") return candidates[0] ?? null; + return null; + }, + }; +} + +/** + * Mock store with configurable write behavior. + * + * Config: + * initialCount — starting row count (default 0) + * dropLastN — silently drop last N entries on bulkStore (partial write failure) + * bulkStoreThrows — throw on bulkStore (total write failure) + */ +function makeStore(config = {}) { + const { initialCount = 0, dropLastN = 0, bulkStoreThrows = false } = config; + let rowCount = initialCount; + const entries = []; + + const store = { + _config: config, + + async count() { return rowCount; }, + + async vectorSearch() { return []; }, + + async store(entry) { + rowCount++; + entries.push({ action: "store", id: entry.id ?? "?" }); + return { ...entry, id: "direct-id-" + entries.length }; + }, + + async bulkStore(batchEntries) { + if (bulkStoreThrows) throw new Error("bulkStore simulated failure"); + const stored = dropLastN > 0 + ? batchEntries.slice(0, batchEntries.length - dropLastN) + : batchEntries; + for (let i = 0; i < stored.length; i++) { + rowCount++; + entries.push({ action: "bulkStore", id: stored[i].id ?? "bulk-" + i }); + } + if (dropLastN > 0) entries.push({ action: "bulkStore_dropped", count: dropLastN }); + return stored.map((e, i) => ({ ...e, id: "bulk-id-" + i })); + }, + + async update(_id, _patch, _scopeFilter) { + entries.push({ action: "update", id: _id }); + }, + + async getById() { return null; }, + + async delete(_id) { + rowCount = Math.max(0, rowCount - 1); + entries.push({ action: "delete", id: _id }); + }, + + get entries() { return [...entries]; }, + get rowCount() { return rowCount; }, + reset() { rowCount = initialCount; entries.length = 0; }, + }; + return store; +} + +function makeExtractor(embedder, llm, store, config = {}) { + return new SmartExtractor(store, embedder, llm, { + user: "User", + extractMinMessages: 1, + extractMaxChars: 8000, + defaultScope: "global", + log() {}, + debugLog() {}, + ...config, + }); +} + +// ============================================================================ +// Tests +// ============================================================================ + +describe("Issue #693: Extraction write validation", () => { + + // -------------------------------------------------------------------------- + // T1: Normal extraction — expected === actual, mismatch = 0, no callback + // -------------------------------------------------------------------------- + it("T1: normal extraction passes validation with mismatch=0", async () => { + const embedder = makeDeterministicEmbedder(); + // Two semantically different abstracts → low cosine similarity → both survive batchDedup + const llm = makeLlm([ + { + category: "preferences", + abstract: "User prefers dark mode interface settings for eye comfort during coding", + overview: "Display preference", + content: "The user prefers dark mode interface settings on their workstation for reduced eye strain during extended coding sessions.", + }, + { + category: "entities", + abstract: "User works at Acme Corporation headquarters in the R&D division", + overview: "Employment information", + content: "The user is employed as a senior software engineer at Acme Corporation's research and development headquarters facility.", + }, + ]); + const store = makeStore({ initialCount: 0 }); + const extractor = makeExtractor(embedder, llm, store); + + let callbackInvoked = false; + let receivedValidation = null; + + const stats = await extractor.extractAndPersist( + "I use dark mode at work and work at Acme Corp", + "session-t1", + { + onExtractionValidationFailed(validation) { + callbackInvoked = true; + receivedValidation = validation; + }, + } + ); + + assert.strictEqual(stats.created, 2, "both candidates should be created"); + assert.strictEqual( + stats.validationMismatch, + undefined, + "validationMismatch should be undefined (not in public ExtractionStats)" + ); + assert.strictEqual(callbackInvoked, false, "callback should NOT be triggered"); + assert.strictEqual(store.rowCount, 2, "both entries should be written"); + assert.strictEqual(receivedValidation, null, "no validation object received"); + }); + + // -------------------------------------------------------------------------- + // T2: Empty extraction — no bulkStore, validation skipped + // -------------------------------------------------------------------------- + it("T2: empty extraction skips validation", async () => { + const embedder = makeDeterministicEmbedder(); + const llm = makeLlm([]); + const store = makeStore({ initialCount: 5 }); + const extractor = makeExtractor(embedder, llm, store); + + let callbackInvoked = false; + + const stats = await extractor.extractAndPersist( + "nothing to extract here", + "session-t2", + { onExtractionValidationFailed() { callbackInvoked = true; } } + ); + + assert.strictEqual(stats.created, 0, "no entries created"); + assert.strictEqual( + stats.validationMismatch, + undefined, + "validationMismatch should be undefined for empty extraction (validation skipped)" + ); + assert.strictEqual(callbackInvoked, false, "callback should NOT fire"); + assert.strictEqual(store.rowCount, 5, "pre-existing count unchanged"); + }); + + // -------------------------------------------------------------------------- + // T3: Partial bulkStore failure — actual < expected → mismatch > 0 + // -------------------------------------------------------------------------- + it("T3: partial bulkStore failure triggers mismatch > 0", async () => { + const embedder = makeDeterministicEmbedder(); + // 3 candidates → dropLastN=1 → actual=2, mismatch=1 + const llm = makeLlm([ + { + category: "preferences", + abstract: "User prefers dark mode interface settings for eye comfort during coding", + overview: "Display preference", + content: "The user prefers dark mode interface settings on their workstation for reduced eye strain during extended coding sessions.", + }, + { + category: "preferences", + abstract: "User prefers light theme when editing documents and writing emails", + overview: "Display preference", + content: "In contrast to dark mode, the user prefers light theme when editing documents and writing emails in their daily productivity workflow.", + }, + { + category: "entities", + abstract: "User works at Acme Corporation headquarters in the R&D division", + overview: "Employment information", + content: "The user is employed as a senior software engineer at Acme Corporation's research and development headquarters facility.", + }, + ]); + const store = makeStore({ initialCount: 0, dropLastN: 1 }); + const extractor = makeExtractor(embedder, llm, store); + + let callbackInvoked = false; + let receivedValidation = null; + + const stats = await extractor.extractAndPersist( + "I use dark mode for coding but light theme for writing emails at Acme Corp", + "session-t3", + { + onExtractionValidationFailed(validation) { + callbackInvoked = true; + receivedValidation = validation; + }, + } + ); + + // Expected = 3, Actual = 2 (dropLastN=1), Mismatch = 1 + // Note: validationMismatch is NOT written to stats (removed from public API) + // Only the callback receives the mismatch information + assert.strictEqual(callbackInvoked, true, "callback SHOULD be triggered"); + assert.ok(receivedValidation); + assert.strictEqual(receivedValidation.expected, 3); + assert.strictEqual(receivedValidation.actual, 2); + assert.strictEqual(receivedValidation.mismatch, 1); + assert.strictEqual(receivedValidation.sessionKey, "session-t3"); + assert.strictEqual(store.rowCount, 2, "only 2 rows written"); + }); + + // -------------------------------------------------------------------------- + // T5: Callback is optional — no error if omitted + // -------------------------------------------------------------------------- + it("T5: callback is optional — no error if omitted even on mismatch", async () => { + const embedder = makeDeterministicEmbedder(); + // 2 candidates that survive batchDedup, with dropLastN=1 + const llm = makeLlm([ + { + category: "events", + abstract: "User attended quarterly business review meeting with the team lead", + overview: "Meeting attendance", + content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.", + }, + { + category: "events", + abstract: "User participated in a formal code review session with constructive feedback", + overview: "Code review participation", + content: "The user actively participated in a formal code review session where they provided constructive feedback on pull request implementations and discussed architectural decision implications.", + }, + ]); + const store = makeStore({ initialCount: 0, dropLastN: 1 }); + const extractor = makeExtractor(embedder, llm, store); + + // Should NOT throw even though mismatch occurs and callback is absent + // Note: validationMismatch is NOT written to stats (exposed only via callback) + await extractor.extractAndPersist( + "User said: I attended the quarterly business review and participated in a code review", + "session-t5", + {} // no callback + ); + assert.strictEqual(store.rowCount, 1, "1 row written despite mismatch (dropLastN=1)"); + }); + + // -------------------------------------------------------------------------- + // T6: Multiple extractions — independent validation state + // -------------------------------------------------------------------------- + it("T6: multiple extractions each get independent validation", async () => { + const embedder = makeDeterministicEmbedder(); + + // First extraction: normal (no mismatch) + const llm1 = makeLlm([{ + category: "events", + abstract: "User attended quarterly business review meeting with the team lead", + overview: "Meeting", + content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.", + }]); + const store1 = makeStore({ initialCount: 0 }); + const extractor1 = makeExtractor(embedder, llm1, store1); + store1.reset(); + + const validations = []; + + const stats1 = await extractor1.extractAndPersist( + "User said: I attended the quarterly business review", + "session-multi-1", + { onExtractionValidationFailed(v) { validations.push(v); } } + ); + + assert.strictEqual(stats1.validationMismatch, undefined, "first: validationMismatch undefined"); + assert.strictEqual(validations.length, 0, "first: no callback fired"); + + // Second extraction: partial write failure (dropLastN=1) → mismatch=1 + const llm2 = makeLlm([ + { + category: "events", + abstract: "User attended quarterly business review meeting with the team lead", + overview: "Meeting", + content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.", + }, + { + category: "events", + abstract: "User participated in a formal code review session with constructive feedback", + overview: "Code review", + content: "The user actively participated in a formal code review session where they provided constructive feedback on pull request implementations and discussed architectural decision implications.", + }, + ]); + const store2 = makeStore({ initialCount: 0, dropLastN: 1 }); + const extractor2 = makeExtractor(embedder, llm2, store2); + + const stats2 = await extractor2.extractAndPersist( + "User said: I attended a quarterly meeting and participated in a code review", + "session-multi-2", + { onExtractionValidationFailed(v) { validations.push(v); } } + ); + + // Second extraction: 2 candidates, dropLastN=1 → actual=1, expected=2, mismatch=1 + // Note: validationMismatch is NOT written to stats (removed from public API) + // Only the callback receives the mismatch + assert.strictEqual(validations.length, 1, "second: callback fired once"); + assert.strictEqual(validations[0].sessionKey, "session-multi-2"); + assert.strictEqual(validations[0].mismatch, 1); + }); + +}); diff --git a/test/pr678-pr723-batch-invalidation.test.mjs b/test/pr678-pr723-batch-invalidation.test.mjs new file mode 100644 index 00000000..674dc215 --- /dev/null +++ b/test/pr678-pr723-batch-invalidation.test.mjs @@ -0,0 +1,395 @@ +import { describe, it } from "node:test"; +import assert from "node:assert"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { SmartExtractor } = jiti("../src/smart-extractor.ts"); +const { buildSmartMetadata, parseSmartMetadata, stringifySmartMetadata } = jiti("../src/smart-metadata.ts"); + +// ============================================================================ +// Store mock — records all operations, supports batch mode +// ============================================================================ + +let bulkStoreIndex = 0; +let updateShouldFail = false; +let deleteShouldFail = false; + +function makeBatchStore({ existingEntries = [] }) { + const entries = [...existingEntries]; + const operations = []; + + updateShouldFail = false; + deleteShouldFail = false; + bulkStoreIndex = 0; + + const store = { + async vectorSearch(_vector, _limit, _minScore, _scopeFilter, _options) { + return existingEntries.map((e) => ({ entry: e, score: 1.0 })); + }, + + async store(entry) { + const id = `gen-${entries.length}`; + const full = { ...entry, id }; + entries.push(full); + operations.push({ op: "store", entry: { ...entry }, id }); + return full; + }, + + async bulkStore(batchEntries) { + const results = []; + for (const entry of batchEntries) { + const id = `bulk-${bulkStoreIndex++}`; + const full = { ...entry, id }; + entries.push(full); + results.push(full); + operations.push({ op: "bulkStore", entry: { ...entry }, id }); + } + return results; + }, + + async update(id, patch, _scopeFilter) { + operations.push({ op: "update", id, patch: { ...patch } }); + if (updateShouldFail) { + const err = new Error("mock update failure"); + err.code = "MOCK_FAILURE"; + throw err; + } + const entry = entries.find((e) => e.id === id); + if (entry) Object.assign(entry, patch); + }, + + async delete(id, _scopeFilter) { + operations.push({ op: "delete", id }); + if (deleteShouldFail) { + const err = new Error("mock delete failure"); + err.code = "MOCK_FAILURE"; + throw err; + } + const idx = entries.findIndex((e) => e.id === id); + if (idx >= 0) entries.splice(idx, 1); + }, + + async getById(id, _scopeFilter) { + operations.push({ op: "getById", id }); + const found = existingEntries.find((e) => e.id === id); + return found ? JSON.parse(JSON.stringify(found)) : null; + }, + + async count() { + return entries.length; + }, + + get entries() { + return [...entries]; + }, + get operations() { + return [...operations]; + }, + }; + + return store; +} + +// ============================================================================ +// Embedder mock — deterministic vectors +// ============================================================================ + +function makeEmbedder() { + return { + async embed(text) { + // Position-based vector: first 8 chars → [0.1, 0.2, ..., 0.8] + // "abc" → [0.1, 0.2, 0.3, 0, 0, 0, 0, 0] + // Ensures identical prefixes produce identical vectors (similarity 1.0) + const v = Array(8).fill(0); + const prefix = text ? text.slice(0, 8) : ""; + for (let i = 0; i < 8; i++) { + v[i] = i < prefix.length ? (i + 1) * 0.1 : 0; + } + return v; + }, + async embedBatch(texts) { + return texts.map((t) => { + const v = Array(8).fill(0); + const prefix = t ? t.slice(0, 8) : ""; + for (let i = 0; i < 8; i++) { + v[i] = i < prefix.length ? (i + 1) * 0.1 : 0; + } + return v; + }); + }, + }; +} + +// ============================================================================ +// LLM mock — returns candidates and dedup decisions +// ============================================================================ + +function makeLlm(candidates, dedupResponse = null) { + let callCount = 0; + return { + async completeJson(_prompt, mode) { + if (mode === "extract-candidates") { + return { memories: candidates }; + } + if (mode === "dedup-decision") { + callCount++; + if (typeof dedupResponse === "function") { + return dedupResponse(callCount - 1); + } + if (dedupResponse) return dedupResponse; + return { decision: "create", reason: "test" }; + } + return null; + }, + }; +} + +// ============================================================================ +// Tests +// ============================================================================ + +describe("Issue #675 #676 — handleSupersede batch mode", () => { + it("batch mode: uses bulkStore for new entry and calls update to invalidate old entry", async () => { + const existingEntry = { + id: "existing-001", + text: "old preference", + vector: Array(8).fill(0.1), + category: "preferences", + scope: "test", + importance: 0.5, + metadata: stringifySmartMetadata( + buildSmartMetadata( + { text: "old preference", category: "preferences", importance: 0.5 }, + { + memory_category: "preferences", + l0_abstract: "old preference", + tier: "working", + state: "confirmed", + valid_from: Date.now() - 1000, + fact_key: "preferences:old", + }, + ), + ), + }; + + // supersede: tell LLM to supersede the existing entry (match_index 1 = first in topSimilar) + const store = makeBatchStore({ existingEntries: [existingEntry] }); + const embedder = makeEmbedder(); + const llm = makeLlm( + [ + { + category: "preferences", + abstract: "new preference text", + overview: "Updated preference", + content: "new preference text", + confidence: 0.9, + support: [], + }, + ], + { decision: "supersede", reason: "test supersede", match_index: 1 }, + ); + + const extractor = new SmartExtractor(store, embedder, llm, {}); + const stats = await extractor.extractAndPersist("user updated preference", "session-1"); + + const ops = store.operations; + assert.ok( + ops.some((op) => op.op === "getById" && op.id === "existing-001"), + "should call getById for existing entry", + ); + + // bulkStore is called (batch mode), NOT store + const bulkStoreCalls = ops.filter((op) => op.op === "bulkStore"); + assert.equal(bulkStoreCalls.length, 1, "should call bulkStore exactly once"); + assert.equal(stats.created, 1, "stats.created should be 1"); + assert.equal(stats.superseded, 1, "stats.superseded should be 1"); + }); + + it("batch mode: preserves original valid_from when invalidating old entry", async () => { + const originalValidFrom = Date.now() - 2000; + const existingEntry = { + id: "existing-002", + text: "old preference", + vector: Array(8).fill(0.1), + category: "preferences", + scope: "test", + importance: 0.5, + metadata: stringifySmartMetadata( + buildSmartMetadata( + { text: "old preference", category: "preferences", importance: 0.5 }, + { + memory_category: "preferences", + l0_abstract: "old preference", + tier: "working", + state: "confirmed", + valid_from: originalValidFrom, + fact_key: "preferences:old", + }, + ), + ), + }; + + const store = makeBatchStore({ existingEntries: [existingEntry] }); + const embedder = makeEmbedder(); + const llm = makeLlm( + [ + { + category: "preferences", + abstract: "updated text", + overview: "Updated", + content: "updated text", + confidence: 0.9, + support: [], + }, + ], + { decision: "supersede", reason: "test supersede", match_index: 1 }, + ); + + const extractor = new SmartExtractor(store, embedder, llm, {}); + await extractor.extractAndPersist("user updated", "session-2"); + + const ops = store.operations; + const updateCalls = ops.filter((op) => op.op === "update"); + assert.equal(updateCalls.length, 1, "should call update to invalidate old entry"); + const updatedMeta = parseSmartMetadata(updateCalls[0].patch.metadata, {}); + assert.equal( + updatedMeta.valid_from, + originalValidFrom, + "original valid_from should be preserved in invalidated entry", + ); + }); + + it("standalone path (no existing entry): uses bulkStore (batch mode) for new entry", async () => { + // No existing entries → dedup returns "create" → goes through batch path + const store = makeBatchStore({ existingEntries: [] }); + const embedder = makeEmbedder(); + const llm = makeLlm([ + { + category: "preferences", + abstract: "brand new fact", + overview: "New", + content: "brand new fact", + confidence: 0.9, + support: [], + }, + ]); + + const extractor = new SmartExtractor(store, embedder, llm, {}); + const stats = await extractor.extractAndPersist("new fact", "session-3"); + + const ops = store.operations; + // Since no existing, vectorSearch returns [], dedup returns "create" → bulkStore + const bulkStoreCalls = ops.filter((op) => op.op === "bulkStore"); + assert.equal(bulkStoreCalls.length, 1, "should call bulkStore for new entry"); + assert.equal(stats.created, 1, "stats.created should be 1"); + }); +}); +describe("Issue #675 #676 — handleContradict null check", () => { + it("skips contradiction when getById returns null — no entry created, no update", async () => { + // Use "patterns" (TEMPORAL_VERSIONED) so contradict goes to handleSupersede + // (not handleContradict). With empty store, topSimilar=[] and match_index: 1, + // hasValidIndex = false → destructive decision guard triggers → degrade to "create". + const store = makeBatchStore({ existingEntries: [] }); + const embedder = makeEmbedder(); + const llm = makeLlm( + [ + { + category: "patterns", // TEMPORAL_VERSIONED → handleSupersede path + abstract: "some pattern detail that is long enough", + overview: "Pattern", + content: "some pattern detail that is long enough", + confidence: 0.9, + support: [], + }, + ], + // match_index: 1 but topSimilar is [] → hasValidIndex = false + // → destructive decision guard triggers → degrade to "create" + { decision: "contradict", reason: "test contradict", match_index: 1 }, + ); + + const extractor = new SmartExtractor(store, embedder, llm, {}); + const stats = await extractor.extractAndPersist( + "contradict with missing target", + "session-no-target", + ); + + const ops = store.operations; + + // With empty store, contradict degrades to create → no getById called + const getByIdCalls = ops.filter((op) => op.op === "getById"); + assert.equal(getByIdCalls.length, 0, "no getById when topSimilar is empty (decision degraded to create)"); + + // Correctly creates the new entry (contradict degraded to create) + const bulkStoreCalls = ops.filter((op) => op.op === "bulkStore"); + assert.equal(bulkStoreCalls.length, 1, "should call bulkStore (contradict degraded to create)"); + assert.equal(stats.created, 1, "stats.created should be 1"); + + // No update because no existing target to contradict + const updateCalls = ops.filter((op) => op.op === "update"); + assert.equal(updateCalls.length, 0, "should NOT call update (no existing entry to contradict)"); + }); +}); + +describe("Issue #675 #676 — rollback on partial invalidation failure", () => { + it("bulkStore succeeds, update fails: deletes new entries (rollback phase 1)", async () => { + const existingEntry = { + id: "rollback-001", + text: "old preference", + vector: Array(8).fill(0.1), + category: "preferences", + scope: "test", + importance: 0.5, + metadata: stringifySmartMetadata( + buildSmartMetadata( + { text: "old preference", category: "preferences", importance: 0.5 }, + { + memory_category: "preferences", + l0_abstract: "old preference", + tier: "working", + state: "confirmed", + valid_from: Date.now() - 1000, + fact_key: "preferences:rollback", + }, + ), + ), + }; + + const store = makeBatchStore({ existingEntries: [existingEntry] }); + updateShouldFail = true; // Make update fail + + const embedder = makeEmbedder(); + const llm = makeLlm( + [ + { + category: "preferences", + abstract: "new preference text", + overview: "New Pref", + content: "new preference text", + confidence: 0.9, + support: [], + }, + ], + { decision: "supersede", reason: "test supersede", match_index: 1 }, + ); + + const extractor = new SmartExtractor(store, embedder, llm, {}); + const stats = await extractor.extractAndPersist( + "rollback test", + "session-rollback", + ); + + const ops = store.operations; + + // bulkStore was called (new entry created) + const bulkStoreCalls = ops.filter((op) => op.op === "bulkStore"); + assert.equal(bulkStoreCalls.length, 1, "bulkStore should have been called"); + + // update failed (we configured updateShouldFail) + const updateCalls = ops.filter((op) => op.op === "update"); + assert.ok(updateCalls.length >= 1, "update should have been attempted"); + + // Rollback: delete new entries created by bulkStore + const deleteCalls = ops.filter((op) => op.op === "delete"); + assert.equal(deleteCalls.length, 1, "rollback should delete the new entry from bulkStore"); + }); +}); \ No newline at end of file diff --git a/test/preference-slots.test.mjs b/test/preference-slots.test.mjs index 1849ef31..4e20fabe 100644 --- a/test/preference-slots.test.mjs +++ b/test/preference-slots.test.mjs @@ -125,6 +125,7 @@ function makeGuardExtractor({ vectorSearchResults, onDedupCalled }) { stored.push(...entries); return entries; }, + async count() { return stored.length; }, }; const embedder = { async embed() { @@ -241,6 +242,7 @@ test("dedup guard: non-preference category -> skips guard, goes to LLM", async ( }, async store() {}, async bulkStore() { return []; }, + async count() { return 0; }, }; const embedder = { async embed() { return [0.1, 0.2, 0.3]; }, diff --git a/test/smart-extractor-batch-embed.test.mjs b/test/smart-extractor-batch-embed.test.mjs index ea0120b0..634f291e 100644 --- a/test/smart-extractor-batch-embed.test.mjs +++ b/test/smart-extractor-batch-embed.test.mjs @@ -99,6 +99,9 @@ function makeStore() { async getById(_id, _scopeFilter) { return null; }, + async count() { + return entries.length; + }, get entries() { return [...entries]; }, diff --git a/test/smart-extractor-bulk-store-edge-cases.test.mjs b/test/smart-extractor-bulk-store-edge-cases.test.mjs index 54153158..2eb5ac50 100644 --- a/test/smart-extractor-bulk-store-edge-cases.test.mjs +++ b/test/smart-extractor-bulk-store-edge-cases.test.mjs @@ -61,6 +61,7 @@ class MockStore { async vectorSearch() { return []; } async getById() { return null; } + async count() { return 0; } } // ============================================================ diff --git a/test/smart-extractor-bulk-store.test.mjs b/test/smart-extractor-bulk-store.test.mjs index cef140c5..b39c06f0 100644 --- a/test/smart-extractor-bulk-store.test.mjs +++ b/test/smart-extractor-bulk-store.test.mjs @@ -66,6 +66,7 @@ class MockStore { async vectorSearch() { return []; } async getById() { return null; } + async count() { return 0; } } // ============================================================ diff --git a/test/smart-extractor-scope-filter.test.mjs b/test/smart-extractor-scope-filter.test.mjs index adef26da..488fb12f 100644 --- a/test/smart-extractor-scope-filter.test.mjs +++ b/test/smart-extractor-scope-filter.test.mjs @@ -13,6 +13,7 @@ function makeExtractor(scopeFilters) { }, async store() {}, async bulkStore() {}, + async count() { return 0; }, }; const embedder = {