From 3d6480a67c2a67c2a6105dbe6c8eb47dee9067b8 Mon Sep 17 00:00:00 2001 From: Justn Date: Mon, 20 Apr 2026 23:55:05 +0900 Subject: [PATCH] feat: add missing-null-guard prior + more artifact patterns + docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small follow-ups bundled to keep self-review cost to one cycle. A. Finding-class priors: new `missing-null-guard` class Observed on PR #499's own self-review as an FP (claimed missing guard on a value that's already guarded at function entry). Adds 4 patterns covering "missing null check", "no null guard", "null/undefined check", "undefined reference". Multiplier 0.7 — same as missing-validation, same structural FP shape. 3 new tests. C. Chunker: more BUILT_IN_ARTIFACT_PATTERNS Python (*.pyc, __pycache__, *.whl, egg-info), Go (vendor/), Rust (target/debug, target/release), Java (*.class, *.jar, *.war), Ruby (.bundle/), plus *.avif, *.bz2, *.wasm binary assets. 14 new pattern-match tests. E. Docs: HALLUCINATION_FILTER_DESIGN.md Original doc still described the 3-check Layer-1 design from #428. Replaced the check list with the current 7-check set (from #428, #462, #468 main, #499), documented the FINDING_CLASS_PRIORS table and witness-based echo dampener for discoverability. Tests: 3425 → 3442 (+17). Typecheck clean. --- docs/HALLUCINATION_FILTER_DESIGN.md | 32 +++++++++++++++---- packages/core/src/pipeline/chunker.ts | 16 ++++++++++ .../core/src/pipeline/finding-class-scorer.ts | 11 +++++++ .../src/tests/finding-class-scorer.test.ts | 31 ++++++++++++++++++ .../core/src/tests/pipeline-chunker.test.ts | 15 +++++++++ 5 files changed, 99 insertions(+), 6 deletions(-) diff --git a/docs/HALLUCINATION_FILTER_DESIGN.md b/docs/HALLUCINATION_FILTER_DESIGN.md index e0605db..65cdb33 100644 --- a/docs/HALLUCINATION_FILTER_DESIGN.md +++ b/docs/HALLUCINATION_FILTER_DESIGN.md @@ -46,17 +46,37 @@ Findings reference files not in the diff, lines outside diff hunks, or fabricate **Cost**: $0 (pure code, no model calls) **Blocks**: Path 3 -Programmatic validation of every evidence document: +Programmatic validation of every evidence document. The filter has grown from 3 checks in the original design to **7 checks** as of 2026-04-20: -1. **File existence**: `doc.filePath` must be in `extractFileListFromDiff(diffContent)`. If not → remove. -2. **Line range**: `doc.lineRange` must overlap with at least one diff hunk for that file. If not → remove. -3. **Code quote verification**: If `doc.problem` contains inline code quotes, check if they exist in the diff. If fabricated → confidence × 0.5. +| # | Check | Effect | +|---|-------|--------| +| 1 | File existence | `doc.filePath` must be in diff file list → else hard-remove | +| 2 | Line range | `doc.lineRange` must overlap a diff hunk ±10 lines → else hard-remove | +| 3 | Code quote verification | Backtick-quoted code must appear in diff → fabricated > 50% → confidence × 0.5 | +| 4 | Self-contradiction | Claim of "added" vs actual removals (or vice versa) → confidence × 0.5 | +| 5 | Speculative language | Hedge markers ("may not exist", "potentially broken") → confidence × 0.7 | +| 6 | Evidence quality (#468) | 0–1 score from evidence list length + problem length + specificity markers → multiplier 0.7 + 0.3 × score | +| 7 | Finding-class prior (#468 follow-up) | Empirically FP-heavy categories (ReDoS, may-throw, missing-validation, missing-null-guard, zero-width) → per-class multiplier 0.5–0.85 | + +Checks 1–2 are hard removes. Checks 3–7 apply multiplicative penalties that compound — a finding that trips speculation (×0.7) + evidence (×0.7) + class prior (×0.7) ends at 0.34× its raw confidence. ```typescript -function filterHallucinations(docs, diffContent): { filtered, removed } +function filterHallucinations(docs, diffContent): { filtered, removed, uncertain } ``` -**Expected impact**: ~3-4 findings removed per review (files not in diff, fabricated code). +**Expected impact**: with all seven checks active, the 2026-04-20 baseline showed the `fp-moderator-regex` fixture's phantom CRITICAL findings moved from must-fix (100% confidence) to verify/ignore (43% or below) across multiple runs. + +#### Finding-class priors table + +`packages/core/src/pipeline/finding-class-scorer.ts` exports `FINDING_CLASS_PRIORS` — the one place to tune per-class multipliers. Ordered so specific classes match before `generic-potential`. Each entry maps multiple regex patterns to a single id + multiplier. To add a new class when an FP pattern emerges empirically, append a new entry with patterns matched against `issueTitle + problem`. + +#### Witness-based corroboration echo dampener (#5) + +`computeL1Confidence` applies an additional ×0.75 dampener when: +- 3+ co-located findings exist with non-empty evidence +- Majority share the same fingerprint (first 80 normalised chars of joined `evidence[]`) + +This catches correlated-failure cascades where multiple reviewers latch onto the same superficial cue instead of corroborating independently. ### Layer 2: Corroboration Scoring diff --git a/packages/core/src/pipeline/chunker.ts b/packages/core/src/pipeline/chunker.ts index ed2c75b..2661fac 100644 --- a/packages/core/src/pipeline/chunker.ts +++ b/packages/core/src/pipeline/chunker.ts @@ -325,6 +325,7 @@ export const BUILT_IN_ARTIFACT_PATTERNS = [ '**/*.ico', '**/*.bmp', '**/*.tiff', + '**/*.avif', '**/*.woff', '**/*.woff2', '**/*.ttf', @@ -339,12 +340,27 @@ export const BUILT_IN_ARTIFACT_PATTERNS = [ '**/*.tar', '**/*.gz', '**/*.tgz', + '**/*.bz2', '**/*.7z', '**/*.rar', + '**/*.wasm', // Test snapshots '**/*.snap', '**/__snapshots__/**', + + // Additional ecosystem artifacts (Python / Go / Rust / Java / Ruby) + '**/*.pyc', + '**/__pycache__/**', + '**/*.whl', + '**/*.egg-info/**', + 'vendor/**', + 'target/debug/**', + 'target/release/**', + '**/*.class', + '**/*.jar', + '**/*.war', + '.bundle/**', ]; // ============================================================================ diff --git a/packages/core/src/pipeline/finding-class-scorer.ts b/packages/core/src/pipeline/finding-class-scorer.ts index 599b731..7333f66 100644 --- a/packages/core/src/pipeline/finding-class-scorer.ts +++ b/packages/core/src/pipeline/finding-class-scorer.ts @@ -88,6 +88,17 @@ export const FINDING_CLASS_PRIORS: FindingClassPrior[] = [ /\bunvalidated\s+(?:input|parameter|argument|user\s+input)\b/i, ], }, + { + id: 'missing-null-guard', + label: 'missing null / undefined guard', + multiplier: 0.7, + patterns: [ + /\bmissing\s+null(?:[/\-\s]?(?:undefined))?\s+(?:guard|check)\b/i, + /\b(?:no|without|lacks)\s+null(?:[/\-\s]?(?:undefined))?\s+(?:guard|check)\b/i, + /\bnull\s*\/\s*undefined\s+check\b/i, + /\bundefined\s+reference\b/i, + ], + }, { id: 'generic-potential', label: 'generic "potential" security concern', diff --git a/packages/core/src/tests/finding-class-scorer.test.ts b/packages/core/src/tests/finding-class-scorer.test.ts index 419a83c..6935972 100644 --- a/packages/core/src/tests/finding-class-scorer.test.ts +++ b/packages/core/src/tests/finding-class-scorer.test.ts @@ -104,6 +104,37 @@ describe('matchFindingClass — positive matches', () => { expect(match.id).toBe('zero-width'); }); + it('catches "missing null guard" (PR #499 self-review FP)', () => { + const match = matchFindingClass( + doc({ + issueTitle: 'Missing null guard for activeReviewers in computeL1Confidence', + problem: 'The function does not check whether activeReviewers is null before use.', + }), + )!; + expect(match.id).toBe('missing-null-guard'); + expect(match.multiplier).toBe(0.7); + }); + + it('catches "no null check" phrasing', () => { + const match = matchFindingClass( + doc({ + issueTitle: 'Null dereference risk', + problem: 'The code has no null check on the response object.', + }), + )!; + expect(match.id).toBe('missing-null-guard'); + }); + + it('catches "null/undefined check" phrasing', () => { + const match = matchFindingClass( + doc({ + issueTitle: 'Potential undefined reference in handler', + problem: 'A null/undefined check is missing before property access.', + }), + )!; + expect(match.id).toBe('missing-null-guard'); + }); + it('catches generic "potential security concern" phrasing (run 3 FP)', () => { const match = matchFindingClass( doc({ diff --git a/packages/core/src/tests/pipeline-chunker.test.ts b/packages/core/src/tests/pipeline-chunker.test.ts index 4d5e69e..af65da5 100644 --- a/packages/core/src/tests/pipeline-chunker.test.ts +++ b/packages/core/src/tests/pipeline-chunker.test.ts @@ -231,6 +231,21 @@ describe('BUILT_IN_ARTIFACT_PATTERNS', () => { 'src/__snapshots__/foo.test.ts.snap', 'components/__snapshots__/Button.test.tsx.snap', 'src/foo.snap', + // Additional ecosystems + 'assets/hero.avif', + 'releases/archive.bz2', + 'pkg/runtime.wasm', + 'src/foo.pyc', + 'src/__pycache__/compiled.cpython-312.pyc', + 'dist/my_pkg-1.0-py3-none-any.whl', + 'dist/my_pkg.egg-info/PKG-INFO', + 'vendor/github.com/pkg/errors/errors.go', + 'target/debug/build/foo.o', + 'target/release/build/foo.o', + 'classes/com/example/Foo.class', + 'libs/common-1.0.jar', + 'deploy/webapp-1.0.war', + '.bundle/config', ].filter((p) => !p.endsWith('.ignored-ext-so-skip')); // Files that MUST survive the built-in filter — real source code that