From c4655ca5ee83f9dd67d4fcafd37d58e1f97c66d2 Mon Sep 17 00:00:00 2001
From: "Finn (EACG)" <fln@eacg.de>
Date: Thu, 11 Jun 2026 06:55:32 +0000
Subject: [PATCH 1/2] feat(checkpoint): add append-only scan log for sub-phase
 resume and dynamic concurrency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces phase-boundary-only checkpointing with a JSONL append-only scan
log (.sandyaa/scan-log-<hash>.jsonl).  Each completed sub-step appends one
line; on resume the log is replayed so the scan continues from the last
saved step rather than from scratch.

Five save points per scan:
  1. After AI file prioritization — restores the ranked file list so the
     expensive LLM call is not repeated.
  2. After each chunk's vulnerability detection — raw findings are stored;
     if interrupted mid-verification the detector is not re-invoked.
  3. After each finding's recursive verification — verificationStatus,
     confidence, and contradictions are saved so already-verified findings
     are skipped on resume.
  4. After each POC generation attempt — success/failed/error result and the
     POC code itself are saved so already-generated POCs are restored.
  5. After SARIF generation — prevents duplicate report writes on resume.

Additional changes:
  - RecursiveStrategyEngine.apply() now accepts an optional options bag with
    alreadyVerified (Map<id, result>) and onFindingVerified callback.
  - Orchestrator restores allVulnerabilities from the scan log for completed
    chunks so the final SARIF includes findings from all runs.
  - STRATEGY_CONCURRENCY in context-analyzer now scales with os.cpus().length
    (max(2, cpuCount * 2 - 2), clamped to [2, 8]) instead of a hard-coded 4.
  - Old checkpoint files remain fully backwards-compatible.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/analyzer/context-analyzer.ts    |   6 +-
 src/orchestrator/orchestrator.ts    | 269 ++++++++++++++++++++--------
 src/recursive/recursive-strategy.ts |  38 +++-
 src/utils/scan-log.ts               | 146 +++++++++++++++
 4 files changed, 378 insertions(+), 81 deletions(-)
 create mode 100644 src/utils/scan-log.ts
diff --git a/src/analyzer/context-analyzer.ts b/src/analyzer/context-analyzer.ts
index 76c2924..62cd666 100644
--- a/src/analyzer/context-analyzer.ts
+++ b/src/analyzer/context-analyzer.ts
@@ -9,6 +9,7 @@ import { PathResolver } from '../utils/path-resolver.js';
 import { LightweightCodeFilter } from '../utils/code-filter.js';
 import * as fs from 'fs/promises';
 import { realpathSync } from 'fs';
+import * as os from 'os';
 import * as path from 'path';
 import { fileURLToPath } from 'url';
 import chalk from 'chalk';
@@ -451,7 +452,10 @@ export class ContextAnalyzer {
     const STRATEGY_CONCURRENCY = (() => {
       const raw = parseInt(process.env.SANDYAA_STRATEGY_CONCURRENCY || '', 10);
       if (Number.isFinite(raw) && raw >= 1 && raw <= 8) return raw;
-      return 4;
+      // Scale with available CPUs: leave 2 threads for the orchestrator process.
+      // Clamp to [2, 8] so we stay aggressive on beefy machines but sane elsewhere.
+      const cpuCount = os.cpus().length;
+      return Math.max(2, Math.min(8, cpuCount * 2 - 2));
     })();
 
     type StrategyOutcome = {
diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts
index d738525..731f33c 100644
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@@ -4,6 +4,7 @@ import { POCGenerator } from '../poc-gen/poc-generator.js';
 import { Reporter } from '../reporter/reporter.js';
 import { SarifReporter } from '../reporter/sarif-reporter.js';
 import { Checkpoint } from '../utils/checkpoint.js';
+import { ScanLog, ScanState } from '../utils/scan-log.js';
 import { FileScanner } from '../utils/file-scanner.js';
 import { RecursiveStrategyEngine } from '../recursive/recursive-strategy.js';
 import { GitHelper } from '../utils/git-helper.js';
@@ -107,6 +108,7 @@ export interface Config {
 export class Orchestrator {
   private config: Config;
   private checkpoint: Checkpoint;
+  private scanLog: ScanLog;
   private analyzer: ContextAnalyzer;
   private detector: VulnerabilityDetector;
   private pocGen: POCGenerator;
@@ -127,8 +129,9 @@ export class Orchestrator {
 
   constructor(config: Config) {
     this.config = config;
-    // Checkpoint, Reporter, and Detector will be initialized in run() with target-specific path
+    // Checkpoint, ScanLog, Reporter, and Detector will be initialized in run() with target-specific path
     this.checkpoint = null as any; // Temporary, will be set in run()
+    this.scanLog = null as any;    // Temporary, will be set in run()
     this.reporter = null as any; // Temporary, will be set in run()
     this.detector = null as any; // Temporary, will be set in run()
     this.analyzer = new ContextAnalyzer(config);
@@ -154,14 +157,17 @@ export class Orchestrator {
     this.dashboard = new DashboardRenderer();
   }
 
+  private getSandyaaDir(): string {
+    return path.dirname(this.config.output.checkpoint_file);
+  }
+
   private getCheckpointFile(targetPath: string): string {
     // Create unique checkpoint file for each project (based on absolute path hash)
     const hash = crypto.createHash('sha256')
       .update(path.resolve(targetPath))
       .digest('hex')
       .substring(0, 12);
-    const checkpointDir = path.dirname(this.config.output.checkpoint_file);
-    return path.join(checkpointDir, `checkpoint-${hash}.json`);
+    return path.join(this.getSandyaaDir(), `checkpoint-${hash}.json`);
   }
 
   async run(startFresh: boolean = false, sarif: boolean = false, tsUpload?: string, tsProject?: string): Promise<void> {
@@ -205,9 +211,10 @@ export class Orchestrator {
       executor.setTargetPath(resolvedTarget);
     }
 
-    // Initialize project-specific checkpoint, reporter, and detector (after we know final target path)
+    // Initialize project-specific checkpoint, scan log, reporter, and detector
     const checkpointFile = this.getCheckpointFile(targetPath);
     this.checkpoint = new Checkpoint(checkpointFile);
+    this.scanLog = new ScanLog(ScanLog.getLogFile(targetPath, this.getSandyaaDir()));
     this.reporter = new Reporter(this.config, targetPath);
     this.detector = new VulnerabilityDetector(this.config, targetPath);
     if (sarif || tsUpload) {
@@ -228,12 +235,20 @@ export class Orchestrator {
 
     // Check for existing checkpoint and ask user
     let processedFiles = new Set<string>();
+    let scanState: ScanState = {
+      prioritizedFiles: null,
+      detectedChunks: new Map(),
+      verifiedFindings: new Map(),
+      pocResults: new Map(),
+      sarifWritten: false,
+    };
     const checkpointData = await this.checkpoint.loadForTarget(targetPath);
 
     if (startFresh) {
       // User explicitly wants fresh start
       if (checkpointData && checkpointData.processedFiles.length > 0) {
         await this.checkpoint.clear();
+        await this.scanLog.clear();
         console.log(chalk.green('Starting fresh analysis (checkpoint cleared)...\n'));
       }
     } else if (checkpointData && checkpointData.processedFiles.length > 0) {
@@ -263,9 +278,14 @@ export class Orchestrator {
       if (shouldResume) {
         processedFiles = new Set(checkpointData.processedFiles);
         totalBugsFound = checkpointData.totalBugsFound;
+        scanState = await this.scanLog.loadState();
         console.log(chalk.green('Resuming from checkpoint...\n'));
+        if (scanState.detectedChunks.size > 0) {
+          console.log(chalk.gray(`    Scan log: ${scanState.detectedChunks.size} chunks with cached detection, ${scanState.verifiedFindings.size} verified findings, ${scanState.pocResults.size} POCs`));
+        }
       } else {
         await this.checkpoint.clear();
+        await this.scanLog.clear();
         console.log(chalk.green('Starting fresh analysis...\n'));
       }
     }
@@ -297,16 +317,27 @@ export class Orchestrator {
     let phaseStart = 0;
 
     if (filesToProcess.length > 1000 && processedFiles.size === 0) {
-      const prioritizer = new FilePrioritizer(targetPath, this.config.provider);
-      const prioritized = await prioritizer.prioritize(filesToProcess, {
-        phase: 'high-value',
-        samplingRate: 0.1,
-        focusAreas: []
-      });
+      // Re-use saved prioritization from the scan log if available (skips AI call)
+      if (scanState.prioritizedFiles && scanState.prioritizedFiles.length > 0) {
+        prioritizedFiles = scanState.prioritizedFiles;
+        console.log(chalk.gray(`    Restored prioritized file list from scan log (${prioritizedFiles.length} files)`));
+      } else {
+        const prioritizer = new FilePrioritizer(targetPath, this.config.provider);
+        const prioritized = await prioritizer.prioritize(filesToProcess, {
+          phase: 'high-value',
+          samplingRate: 0.1,
+          focusAreas: []
+        });
 
-      prioritizedFiles = prioritized
-        .sort((a: any, b: any) => b.priority - a.priority)
-        .map((p: any) => p.path);
+        prioritizedFiles = prioritized
+          .sort((a: any, b: any) => b.priority - a.priority)
+          .map((p: any) => p.path);
+
+        await this.scanLog.append({
+          step: 'prioritize',
+          result: { high_priority_count: prioritizedFiles.length, files: prioritizedFiles },
+        });
+      }
 
       // Phase 1: Analyze prioritized targets only
       filesToProcess = prioritizedFiles;
@@ -316,10 +347,35 @@ export class Orchestrator {
       console.log(chalk.cyan(`Phase 2: Systematic coverage (${filesToProcess.length} files remaining)\n`));
     }
 
+    // Restore allVulnerabilities for chunks already completed in a prior run.
+    // This ensures the final SARIF report is complete even on a resumed scan.
+    const allVulnerabilities: any[] = [];
+    for (const [, chunkData] of scanState.detectedChunks) {
+      const allFilesProcessed = chunkData.files.every(f => processedFiles.has(f));
+      if (!allFilesProcessed) continue;  // Will be (re-)processed in the main loop below
+      for (const finding of chunkData.findings) {
+        const enriched = { ...finding };
+        const verifyResult = scanState.verifiedFindings.get(finding.id);
+        if (verifyResult) {
+          enriched.verificationStatus = verifyResult.status;
+          enriched.confidence = verifyResult.confidence;
+          enriched.needsManualReview = verifyResult.needsManualReview;
+          if (verifyResult.contradictions) enriched.contradictions = verifyResult.contradictions;
+        }
+        const pocResult = scanState.pocResults.get(finding.id);
+        if (pocResult?.poc) {
+          enriched.poc = { ...pocResult.poc };
+        }
+        allVulnerabilities.push(enriched);
+      }
+    }
+    if (allVulnerabilities.length > 0) {
+      console.log(chalk.gray(`    Restored ${allVulnerabilities.length} findings from scan log for prior-run chunks`));
+    }
+
     // Process files in dynamic chunks (adapts based on complexity)
     let iteration = 0;
     let i = 0;
-    const allVulnerabilities: any[] = [];
 
     while (i < filesToProcess.length) {
       iteration++;
@@ -330,7 +386,7 @@ export class Orchestrator {
 
       const { bugsFound, findings } = await this.processChunk(
         chunk, iteration, phase, targetPath, processedFiles, totalBugsFound,
-        estimatedChunksRemaining, files.length
+        estimatedChunksRemaining, files.length, scanState
       );
       totalBugsFound += bugsFound;
       allVulnerabilities.push(...findings);
@@ -375,7 +431,7 @@ export class Orchestrator {
 
             const { bugsFound, findings } = await this.processChunk(
               chunk, iteration, 'systematic', targetPath, processedFiles, totalBugsFound,
-              estimatedChunksRemaining, files.length
+              estimatedChunksRemaining, files.length, scanState
             );
             totalBugsFound += bugsFound;
             allVulnerabilities.push(...findings);
@@ -409,8 +465,9 @@ export class Orchestrator {
     }
 
     // Generate SARIF report if requested
-    if (this.sarifReporter) {
+    if (this.sarifReporter && !scanState.sarifWritten) {
       await this.sarifReporter.generate(allVulnerabilities);
+      await this.scanLog.append({ step: 'sarif', result: { written: true } });
 
       // Upload to TrustSource if --ts-upload was given
       if (tsUpload) {
@@ -421,6 +478,8 @@ export class Orchestrator {
           console.log(chalk.yellow('The local SARIF file is still available in the findings directory.'));
         }
       }
+    } else if (this.sarifReporter && scanState.sarifWritten) {
+      console.log(chalk.gray('SARIF already written in a prior run — skipping duplicate generation.'));
     }
 
     // Generate summary report
@@ -451,7 +510,14 @@ export class Orchestrator {
     processedFiles: Set<string>,
     totalBugsFound: number,
     estimatedChunksRemaining: number,
-    totalFilesCount: number
+    totalFilesCount: number,
+    scanState: ScanState = {
+      prioritizedFiles: null,
+      detectedChunks: new Map(),
+      verifiedFindings: new Map(),
+      pocResults: new Map(),
+      sarifWritten: false,
+    }
   ): Promise<{ bugsFound: number; findings: any[] }> {
     console.log(chalk.bold(`\n[${phase}] Chunk ${iteration} (${chunk.length} files | ~${estimatedChunksRemaining} chunks remaining)`));
     console.log(chalk.gray(`  ${this.dynamicChunker.getExplanation()}`));
@@ -484,40 +550,55 @@ export class Orchestrator {
       `${contextTokens.toLocaleString()} tokens (${contextWindowPercent}% of ${(getDefaultContextWindow() / 1000).toFixed(0)}k)`
     ));
 
-    // Vulnerability Detection
-    console.log(chalk.cyan(`\n  → Vulnerability detection: correlating findings and analyzing exploitability...`));
-    const detectionStartTokens = this.totalTokensUsed;
-    let vulnerabilities = await this.detector.detect(context);
-    const detectionTokens = this.totalTokensUsed - detectionStartTokens;
+    // Vulnerability Detection — use scan log cache if this chunk was already detected
+    let vulnerabilities: any[] = [];
+    let detectionTokens = 0;
+    const chunkKey = ScanLog.chunkKey(chunk);
+    const cachedDetect = scanState.detectedChunks.get(chunkKey);
 
-    // Update dashboard with detection results
-    this.dashboard.update({
-      phase: 'vulnerability-detection',
-      tokensUsed: this.totalTokensUsed,
-    });
+    if (cachedDetect) {
+      vulnerabilities = cachedDetect.findings;
+      console.log(chalk.gray(`\n  → Vulnerability detection: restored ${vulnerabilities.length} finding(s) from scan log (chunk already detected)`));
+      this.dashboard.update({ phase: 'vulnerability-detection', tokensUsed: this.totalTokensUsed });
+    } else {
+      console.log(chalk.cyan(`\n  → Vulnerability detection: correlating findings and analyzing exploitability...`));
+      const detectionStartTokens = this.totalTokensUsed;
+      vulnerabilities = await this.detector.detect(context);
+      detectionTokens = this.totalTokensUsed - detectionStartTokens;
+
+      // Persist detection result to scan log immediately (before recursive verification)
+      await this.scanLog.append({
+        step: 'detect',
+        chunk: iteration,
+        files: chunk,
+        result: { findings: vulnerabilities },
+      });
 
-    if (vulnerabilities.length > 0) {
-      console.log(chalk.green(`    ✓ Found ${vulnerabilities.length} potential vulnerabilities | ${detectionTokens.toLocaleString()} tokens`));
+      this.dashboard.update({ phase: 'vulnerability-detection', tokensUsed: this.totalTokensUsed });
 
-      // Feed findings to dashboard
-      for (const v of vulnerabilities) {
-        const sev = (v.severity?.toLowerCase() || 'low') as 'critical' | 'high' | 'medium' | 'low';
-        this.dashboard.addFinding(sev, `${v.type} at ${v.location?.file?.split('/').pop() || 'unknown'}`);
-      }
+      if (vulnerabilities.length > 0) {
+        console.log(chalk.green(`    ✓ Found ${vulnerabilities.length} potential vulnerabilities | ${detectionTokens.toLocaleString()} tokens`));
 
-      // Show sample of what was found (Claude decides what's important)
-      const sample = vulnerabilities.slice(0, 3);
-      for (const vuln of sample) {
-        const severity = vuln.severity?.toUpperCase() || 'UNKNOWN';
-        const severityColor = ['critical', 'high'].includes(vuln.severity?.toLowerCase() || '') ? chalk.red : chalk.yellow;
-        const attackerNote = vuln.attackerControlled?.isControlled ? 'VERIFIED ' : '';
-        console.log(severityColor(`      ${attackerNote}[${severity}] ${vuln.type} at ${vuln.location.file.split('/').pop()}:${vuln.location.line}`));
-      }
-      if (vulnerabilities.length > 3) {
-        console.log(chalk.gray(`      ... and ${vulnerabilities.length - 3} more`));
+        // Feed findings to dashboard
+        for (const v of vulnerabilities) {
+          const sev = (v.severity?.toLowerCase() || 'low') as 'critical' | 'high' | 'medium' | 'low';
+          this.dashboard.addFinding(sev, `${v.type} at ${v.location?.file?.split('/').pop() || 'unknown'}`);
+        }
+
+        // Show sample of what was found
+        const sample = vulnerabilities.slice(0, 3);
+        for (const vuln of sample) {
+          const severity = vuln.severity?.toUpperCase() || 'UNKNOWN';
+          const severityColor = ['critical', 'high'].includes(vuln.severity?.toLowerCase() || '') ? chalk.red : chalk.yellow;
+          const attackerNote = vuln.attackerControlled?.isControlled ? 'VERIFIED ' : '';
+          console.log(severityColor(`      ${attackerNote}[${severity}] ${vuln.type} at ${vuln.location.file.split('/').pop()}:${vuln.location.line}`));
+        }
+        if (vulnerabilities.length > 3) {
+          console.log(chalk.gray(`      ... and ${vulnerabilities.length - 3} more`));
+        }
+      } else {
+        console.log(chalk.gray(`    ✓ No vulnerabilities in this chunk | ${detectionTokens.toLocaleString()} tokens`));
       }
-    } else {
-      console.log(chalk.gray(`    ✓ No vulnerabilities in this chunk | ${detectionTokens.toLocaleString()} tokens`));
     }
 
     // Recursive Analysis (if enabled)
@@ -525,7 +606,12 @@ export class Orchestrator {
       this.dashboard.update({ phase: 'validation' });
       console.log(chalk.cyan(`\n  → Recursive verification: tracing call chains, checking contradictions...`));
       const recursiveStartTokens = this.totalTokensUsed;
-      const enhanced = await this.recursiveEngine.apply(vulnerabilities, context);
+      const enhanced = await this.recursiveEngine.apply(vulnerabilities, context, {
+        alreadyVerified: scanState.verifiedFindings,
+        onFindingVerified: async (id: string, result: any) => {
+          await this.scanLog.append({ step: 'verify', finding_id: id, result });
+        },
+      });
       const recursiveTokens = this.totalTokensUsed - recursiveStartTokens;
 
       // Count verification statuses instead of filtering
@@ -610,42 +696,71 @@ export class Orchestrator {
         const vuln = vulnerabilities[vi];
 
         if (this.config.poc.generate) {
-          try {
-            process.stdout.write(chalk.hex('#FF8C00')(`\r    ⚡ POC ${vi + 1}/${vulnerabilities.length}: Generating for ${vuln.id}...`));
-            const poc = await this.pocGen.generate(vuln, context);
-
-            // Anti-hallucination: Validate POC actually works
-            if (this.config.poc.validate) {
-              process.stdout.write(chalk.hex('#FF8C00')(`\r    ⚡ POC ${vi + 1}/${vulnerabilities.length}: Validating ${vuln.id}...          `));
-              const isValid = await this.pocGen.validate(poc);
-              if (isValid) {
-                vuln.poc = poc;
-                vuln.poc.validated = true;
-                process.stdout.write('\r' + ' '.repeat(100) + '\r');
-                console.log(chalk.green(`      ✓ ${vuln.id}: POC validated`));
+          // Restore POC from scan log if this finding was already handled in a prior run
+          const savedPoc = scanState.pocResults.get(vuln.id);
+          if (savedPoc) {
+            if (savedPoc.poc) {
+              vuln.poc = savedPoc.poc;
+            }
+            if (savedPoc.needsManualReview) {
+              vuln.needsManualReview = true;
+            }
+            process.stdout.write('\r' + ' '.repeat(100) + '\r');
+            console.log(chalk.gray(`      ${vuln.id}: POC restored from scan log (${savedPoc.status})`));
+          } else {
+            try {
+              process.stdout.write(chalk.hex('#FF8C00')(`\r    ⚡ POC ${vi + 1}/${vulnerabilities.length}: Generating for ${vuln.id}...`));
+              const poc = await this.pocGen.generate(vuln, context);
+
+              // Anti-hallucination: Validate POC actually works
+              if (this.config.poc.validate) {
+                process.stdout.write(chalk.hex('#FF8C00')(`\r    ⚡ POC ${vi + 1}/${vulnerabilities.length}: Validating ${vuln.id}...          `));
+                const isValid = await this.pocGen.validate(poc);
+                if (isValid) {
+                  vuln.poc = poc;
+                  vuln.poc.validated = true;
+                  process.stdout.write('\r' + ' '.repeat(100) + '\r');
+                  console.log(chalk.green(`      ✓ ${vuln.id}: POC validated`));
+                  await this.scanLog.append({
+                    step: 'poc', finding_id: vuln.id,
+                    result: { status: 'success', poc: vuln.poc, validated: true },
+                  });
+                } else {
+                  // POC didn't work - KEEP THE FINDING but mark it
+                  vuln.poc = poc;
+                  vuln.poc.validated = false;
+                  vuln.needsManualReview = true;
+                  if (!vuln.verificationStatus) {
+                    vuln.verificationStatus = 'unverified';
+                  }
+                  process.stdout.write('\r' + ' '.repeat(100) + '\r');
+                  console.log(chalk.yellow(`      ⚠ ${vuln.id}: POC validation failed - marked for manual review`));
+                  await this.scanLog.append({
+                    step: 'poc', finding_id: vuln.id,
+                    result: { status: 'failed', poc: vuln.poc, validated: false, needsManualReview: true },
+                  });
+                }
               } else {
-                // POC didn't work - KEEP THE FINDING but mark it
+                // Validation skipped
                 vuln.poc = poc;
                 vuln.poc.validated = false;
-                vuln.needsManualReview = true;
-                if (!vuln.verificationStatus) {
-                  vuln.verificationStatus = 'unverified';
-                }
                 process.stdout.write('\r' + ' '.repeat(100) + '\r');
-                console.log(chalk.yellow(`      ⚠ ${vuln.id}: POC validation failed - marked for manual review`));
+                console.log(chalk.gray(`      ${vuln.id}: POC generated (validation skipped)`));
+                await this.scanLog.append({
+                  step: 'poc', finding_id: vuln.id,
+                  result: { status: 'skipped', poc: vuln.poc },
+                });
               }
-            } else {
-              // Validation skipped
-              vuln.poc = poc;
-              vuln.poc.validated = false;
+            } catch (error) {
+              // POC generation failed - STILL KEEP THE FINDING
               process.stdout.write('\r' + ' '.repeat(100) + '\r');
-              console.log(chalk.gray(`      ${vuln.id}: POC generated (validation skipped)`));
+              console.log(chalk.yellow(`      ⚠ ${vuln.id}: POC generation failed, reported without POC`));
+              vuln.needsManualReview = true;
+              await this.scanLog.append({
+                step: 'poc', finding_id: vuln.id,
+                result: { status: 'error', needsManualReview: true },
+              });
             }
-          } catch (error) {
-            // POC generation failed - STILL KEEP THE FINDING
-            process.stdout.write('\r' + ' '.repeat(100) + '\r');
-            console.log(chalk.yellow(`      ⚠ ${vuln.id}: POC generation failed, reported without POC`));
-            vuln.needsManualReview = true;
           }
         }
 
diff --git a/src/recursive/recursive-strategy.ts b/src/recursive/recursive-strategy.ts
index fffed8e..7fb8651 100644
--- a/src/recursive/recursive-strategy.ts
+++ b/src/recursive/recursive-strategy.ts
@@ -32,7 +32,13 @@ export class RecursiveStrategyEngine {
 
   async apply(
     vulnerabilities: Vulnerability[],
-    context: CodeContext
+    context: CodeContext,
+    options?: {
+      /** Findings already verified in a prior run — skip re-verification for these. */
+      alreadyVerified?: Map<string, any>;
+      /** Called after each finding is verified; use to append scan-log entries. */
+      onFindingVerified?: (id: string, result: any) => Promise<void>;
+    }
   ): Promise<EnhancedVulnerability[]> {
     if (!this.config.enabled) {
       return vulnerabilities.map(v => ({ ...v, recursive: null }));
@@ -41,6 +47,21 @@ export class RecursiveStrategyEngine {
     const enhanced: EnhancedVulnerability[] = [];
 
     for (const vuln of vulnerabilities) {
+      // Restore from scan log if this finding was already verified in a prior run
+      const savedVerify = options?.alreadyVerified?.get(vuln.id);
+      if (savedVerify) {
+        console.log(`  Recursive analysis: ${vuln.id} (restored from scan log)`);
+        enhanced.push({
+          ...vuln,
+          recursive: null,
+          verificationStatus: savedVerify.status ?? 'verified',
+          confidence: savedVerify.confidence ?? 'high',
+          needsManualReview: savedVerify.needsManualReview ?? false,
+          contradictions: savedVerify.contradictions,
+        });
+        continue;
+      }
+
       console.log(`  Recursive analysis: ${vuln.id}`);
 
       let recursiveData: RecursiveAnalysis | null = null;
@@ -136,14 +157,25 @@ export class RecursiveStrategyEngine {
       const passedExploitabilityChecks = exploitabilityProof.filter(p => p.startsWith('✓')).length;
       const totalExploitabilityChecks = 5; // 5 validations
 
+      const derivedConfidence = verificationStatus === 'verified' ? 'high'
+        : verificationStatus === 'uncertain' ? 'medium' : 'low';
+
+      if (options?.onFindingVerified) {
+        await options.onFindingVerified(vuln.id, {
+          status: verificationStatus,
+          confidence: derivedConfidence,
+          needsManualReview: verificationStatus !== 'verified',
+          contradictions: contradictions.length > 0 ? contradictions : undefined,
+        });
+      }
+
       enhanced.push({
         ...vuln,
         poc: refinedPOC,
         recursive: recursiveData,
         verificationStatus,
         contradictions: contradictions.length > 0 ? contradictions : undefined,
-        confidence: verificationStatus === 'verified' ? 'high' :
-                   verificationStatus === 'uncertain' ? 'medium' : 'low',
+        confidence: derivedConfidence,
         needsManualReview: verificationStatus !== 'verified',
         // GOD-LEVEL: Add recursive exploitability proof
         recursiveExploitabilityProof: exploitabilityProof.length > 0 ? {
diff --git a/src/utils/scan-log.ts b/src/utils/scan-log.ts
new file mode 100644
index 0000000..2421454
--- /dev/null
+++ b/src/utils/scan-log.ts
@@ -0,0 +1,146 @@
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import * as crypto from 'crypto';
+
+export type ScanLogStepName = 'prioritize' | 'detect' | 'verify' | 'poc' | 'sarif';
+
+export interface ScanLogEntry {
+  step: ScanLogStepName;
+  ts: string;
+  chunk?: number;
+  files?: string[];
+  finding_id?: string;
+  result: any;
+}
+
+export interface DetectedChunk {
+  files: string[];
+  findings: any[];
+}
+
+export interface ScanState {
+  prioritizedFiles: string[] | null;
+  /** Keyed by ScanLog.chunkKey(files) — order-independent. */
+  detectedChunks: Map<string, DetectedChunk>;
+  /** finding_id → saved verify result */
+  verifiedFindings: Map<string, any>;
+  /** finding_id → saved poc result */
+  pocResults: Map<string, any>;
+  sarifWritten: boolean;
+}
+
+/**
+ * Append-only JSONL scan log.  Each completed sub-step appends one line:
+ *
+ *   {"step":"prioritize","result":{"files":[...]},"ts":"..."}
+ *   {"step":"detect","chunk":1,"files":[...],"result":{"findings":[...]},"ts":"..."}
+ *   {"step":"verify","finding_id":"CTAE-001","result":{"status":"verified"},"ts":"..."}
+ *   {"step":"poc","finding_id":"CTAE-001","result":{"status":"success","poc":{...}},"ts":"..."}
+ *
+ * On resume: read all lines, replay completed steps, continue from the first
+ * missing one.  Partial writes don't corrupt earlier state.
+ */
+export class ScanLog {
+  private logFile: string;
+
+  constructor(logFile: string) {
+    this.logFile = logFile;
+  }
+
+  static getLogFile(targetPath: string, sandyaaDir: string): string {
+    const hash = crypto.createHash('sha256')
+      .update(path.resolve(targetPath))
+      .digest('hex')
+      .substring(0, 12);
+    return path.join(sandyaaDir, `scan-log-${hash}.jsonl`);
+  }
+
+  /** Append one step entry.  Safe to call concurrently — each line is atomic. */
+  async append(entry: Omit<ScanLogEntry, 'ts'>): Promise<void> {
+    try {
+      const dir = path.dirname(this.logFile);
+      await fs.mkdir(dir, { recursive: true });
+      const line = JSON.stringify({ ...entry, ts: new Date().toISOString() }) + '\n';
+      await fs.appendFile(this.logFile, line, 'utf-8');
+    } catch (error) {
+      console.warn('Failed to append to scan log:', error);
+    }
+  }
+
+  /** Read all entries and materialise a resume-ready ScanState. */
+  async loadState(): Promise<ScanState> {
+    const state: ScanState = {
+      prioritizedFiles: null,
+      detectedChunks: new Map(),
+      verifiedFindings: new Map(),
+      pocResults: new Map(),
+      sarifWritten: false,
+    };
+
+    try {
+      const content = await fs.readFile(this.logFile, 'utf-8');
+      const lines = content.split('\n').filter((l: string) => l.trim());
+
+      for (const line of lines) {
+        try {
+          const entry: ScanLogEntry = JSON.parse(line);
+          switch (entry.step) {
+            case 'prioritize':
+              state.prioritizedFiles = entry.result?.files ?? null;
+              break;
+            case 'detect':
+              if (entry.files) {
+                const key = ScanLog.chunkKey(entry.files);
+                state.detectedChunks.set(key, {
+                  files: entry.files,
+                  findings: entry.result?.findings ?? [],
+                });
+              }
+              break;
+            case 'verify':
+              if (entry.finding_id) {
+                state.verifiedFindings.set(entry.finding_id, entry.result);
+              }
+              break;
+            case 'poc':
+              if (entry.finding_id) {
+                state.pocResults.set(entry.finding_id, entry.result);
+              }
+              break;
+            case 'sarif':
+              state.sarifWritten = entry.result?.written === true;
+              break;
+          }
+        } catch {
+          // Skip malformed lines — partial writes don't corrupt prior entries
+        }
+      }
+    } catch {
+      // File doesn't exist yet
+    }
+
+    return state;
+  }
+
+  /** Stable, order-independent key for a set of file paths. */
+  static chunkKey(files: string[]): string {
+    return [...files].sort().join('\0');
+  }
+
+  async exists(): Promise<boolean> {
+    try {
+      await fs.access(this.logFile);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  async clear(): Promise<void> {
+    try {
+      await fs.unlink(this.logFile);
+    } catch {
+      // Ignore
+    }
+  }
+}

From 7b3a0c04b4b345f61fe9cadf79a19ffa984878cd Mon Sep 17 00:00:00 2001
From: "Finn (EACG)" <fln@eacg.de>
Date: Thu, 11 Jun 2026 07:00:58 +0000
Subject: [PATCH 2/2] fix(checkpoint): address PR review findings on scan-log
 robustness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- fix(scan-log): normalise paths in chunkKey() to forward-slash absolute
  paths so relative vs. absolute and cross-OS paths produce the same cache
  key (silent cache miss on resume fixed)
- fix(scan-log): replace silent console.warn in append() with a sticky
  writeFailed flag; emit console.error once with chalk.red so disk-full
  and permission errors are visible instead of silently losing durability
- fix(scan-log): extract applyToMemState() helper; maintain an in-memory
  ScanState updated on every append(); expose getState() so future
  callers avoid re-reading the growing JSONL file
- fix(orchestrator): guard restored allVulnerabilities against ghost
  findings — only apply verifiedFindings/pocResults whose IDs are present
  in the current chunk's detectedChunks findings set; stale verify/poc
  entries from prior aborted runs no longer bleed into the final report
- fix(orchestrator): remove default value from processChunk's scanState
  parameter so TypeScript enforces the argument at every call-site
- fix(context-analyzer): guard os.cpus().length with Math.max(1, ...)
  to handle restricted container environments that return an empty array

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/analyzer/context-analyzer.ts |  3 +-
 src/orchestrator/orchestrator.ts | 18 +++---
 src/utils/scan-log.ts            | 97 +++++++++++++++++++++-----------
 3 files changed, 74 insertions(+), 44 deletions(-)

diff --git a/src/analyzer/context-analyzer.ts b/src/analyzer/context-analyzer.ts
index 62cd666..9b71fb6 100644
--- a/src/analyzer/context-analyzer.ts
+++ b/src/analyzer/context-analyzer.ts
@@ -454,7 +454,8 @@ export class ContextAnalyzer {
       if (Number.isFinite(raw) && raw >= 1 && raw <= 8) return raw;
       // Scale with available CPUs: leave 2 threads for the orchestrator process.
       // Clamp to [2, 8] so we stay aggressive on beefy machines but sane elsewhere.
-      const cpuCount = os.cpus().length;
+      // Guard against os.cpus() returning [] in restricted container environments.
+      const cpuCount = Math.max(1, os.cpus().length);
       return Math.max(2, Math.min(8, cpuCount * 2 - 2));
     })();
 
diff --git a/src/orchestrator/orchestrator.ts b/src/orchestrator/orchestrator.ts
index 731f33c..3d0acfe 100644
--- a/src/orchestrator/orchestrator.ts
+++ b/src/orchestrator/orchestrator.ts
@@ -349,21 +349,27 @@ export class Orchestrator {
 
     // Restore allVulnerabilities for chunks already completed in a prior run.
     // This ensures the final SARIF report is complete even on a resumed scan.
+    //
+    // Guard: only apply verifiedFindings/pocResults whose finding IDs are present
+    // in the current chunk's findings.  A prior run may have detected finding B,
+    // but a re-detection after a code change only produced finding A — B's verify/poc
+    // entries must not bleed into this run's output as ghost findings.
     const allVulnerabilities: any[] = [];
     for (const [, chunkData] of scanState.detectedChunks) {
       const allFilesProcessed = chunkData.files.every(f => processedFiles.has(f));
       if (!allFilesProcessed) continue;  // Will be (re-)processed in the main loop below
+      const currentFindingIds = new Set(chunkData.findings.map((f: any) => f.id));
       for (const finding of chunkData.findings) {
         const enriched = { ...finding };
         const verifyResult = scanState.verifiedFindings.get(finding.id);
-        if (verifyResult) {
+        if (verifyResult && currentFindingIds.has(finding.id)) {
           enriched.verificationStatus = verifyResult.status;
           enriched.confidence = verifyResult.confidence;
           enriched.needsManualReview = verifyResult.needsManualReview;
           if (verifyResult.contradictions) enriched.contradictions = verifyResult.contradictions;
         }
         const pocResult = scanState.pocResults.get(finding.id);
-        if (pocResult?.poc) {
+        if (pocResult?.poc && currentFindingIds.has(finding.id)) {
           enriched.poc = { ...pocResult.poc };
         }
         allVulnerabilities.push(enriched);
@@ -511,13 +517,7 @@ export class Orchestrator {
     totalBugsFound: number,
     estimatedChunksRemaining: number,
     totalFilesCount: number,
-    scanState: ScanState = {
-      prioritizedFiles: null,
-      detectedChunks: new Map(),
-      verifiedFindings: new Map(),
-      pocResults: new Map(),
-      sarifWritten: false,
-    }
+    scanState: ScanState
   ): Promise<{ bugsFound: number; findings: any[] }> {
     console.log(chalk.bold(`\n[${phase}] Chunk ${iteration} (${chunk.length} files | ~${estimatedChunksRemaining} chunks remaining)`));
     console.log(chalk.gray(`  ${this.dynamicChunker.getExplanation()}`));
diff --git a/src/utils/scan-log.ts b/src/utils/scan-log.ts
index 2421454..38f89a2 100644
--- a/src/utils/scan-log.ts
+++ b/src/utils/scan-log.ts
@@ -1,6 +1,7 @@
 import * as fs from 'fs/promises';
 import * as path from 'path';
 import * as crypto from 'crypto';
+import chalk from 'chalk';
 
 export type ScanLogStepName = 'prioritize' | 'detect' | 'verify' | 'poc' | 'sarif';
 
@@ -42,6 +43,14 @@ export interface ScanState {
  */
 export class ScanLog {
   private logFile: string;
+  private writeFailed = false;
+  private memState: ScanState = {
+    prioritizedFiles: null,
+    detectedChunks: new Map(),
+    verifiedFindings: new Map(),
+    pocResults: new Map(),
+    sarifWritten: false,
+  };
 
   constructor(logFile: string) {
     this.logFile = logFile;
@@ -57,19 +66,30 @@ export class ScanLog {
 
   /** Append one step entry.  Safe to call concurrently — each line is atomic. */
   async append(entry: Omit<ScanLogEntry, 'ts'>): Promise<void> {
+    const full: ScanLogEntry = { ...entry, ts: new Date().toISOString() } as ScanLogEntry;
+    this.applyToMemState(full);
     try {
       const dir = path.dirname(this.logFile);
       await fs.mkdir(dir, { recursive: true });
-      const line = JSON.stringify({ ...entry, ts: new Date().toISOString() }) + '\n';
+      const line = JSON.stringify(full) + '\n';
       await fs.appendFile(this.logFile, line, 'utf-8');
     } catch (error) {
-      console.warn('Failed to append to scan log:', error);
+      if (!this.writeFailed) {
+        this.writeFailed = true;
+        console.error(chalk.red(`[scan-log] Write failed — resume capability lost for this run: ${error}`));
+      }
     }
   }
 
-  /** Read all entries and materialise a resume-ready ScanState. */
+  /** Return the current in-memory state (updated on every append, no disk read). */
+  getState(): ScanState {
+    return this.memState;
+  }
+
+  /** Read all entries from disk and materialise a resume-ready ScanState.
+   *  Also populates the in-memory state so subsequent getState() calls are free. */
   async loadState(): Promise<ScanState> {
-    const state: ScanState = {
+    this.memState = {
       prioritizedFiles: null,
       detectedChunks: new Map(),
       verifiedFindings: new Map(),
@@ -84,33 +104,7 @@ export class ScanLog {
       for (const line of lines) {
         try {
           const entry: ScanLogEntry = JSON.parse(line);
-          switch (entry.step) {
-            case 'prioritize':
-              state.prioritizedFiles = entry.result?.files ?? null;
-              break;
-            case 'detect':
-              if (entry.files) {
-                const key = ScanLog.chunkKey(entry.files);
-                state.detectedChunks.set(key, {
-                  files: entry.files,
-                  findings: entry.result?.findings ?? [],
-                });
-              }
-              break;
-            case 'verify':
-              if (entry.finding_id) {
-                state.verifiedFindings.set(entry.finding_id, entry.result);
-              }
-              break;
-            case 'poc':
-              if (entry.finding_id) {
-                state.pocResults.set(entry.finding_id, entry.result);
-              }
-              break;
-            case 'sarif':
-              state.sarifWritten = entry.result?.written === true;
-              break;
-          }
+          this.applyToMemState(entry);
         } catch {
           // Skip malformed lines — partial writes don't corrupt prior entries
         }
@@ -119,12 +113,47 @@ export class ScanLog {
       // File doesn't exist yet
     }
 
-    return state;
+    return this.memState;
+  }
+
+  private applyToMemState(entry: ScanLogEntry): void {
+    switch (entry.step) {
+      case 'prioritize':
+        this.memState.prioritizedFiles = entry.result?.files ?? null;
+        break;
+      case 'detect':
+        if (entry.files) {
+          const key = ScanLog.chunkKey(entry.files);
+          this.memState.detectedChunks.set(key, {
+            files: entry.files,
+            findings: entry.result?.findings ?? [],
+          });
+        }
+        break;
+      case 'verify':
+        if (entry.finding_id) {
+          this.memState.verifiedFindings.set(entry.finding_id, entry.result);
+        }
+        break;
+      case 'poc':
+        if (entry.finding_id) {
+          this.memState.pocResults.set(entry.finding_id, entry.result);
+        }
+        break;
+      case 'sarif':
+        this.memState.sarifWritten = entry.result?.written === true;
+        break;
+    }
   }
 
-  /** Stable, order-independent key for a set of file paths. */
+  /** Stable, order-independent key for a set of file paths.
+   *  Normalises to absolute forward-slash paths so relative vs. absolute
+   *  and Windows vs. POSIX paths all produce the same key. */
   static chunkKey(files: string[]): string {
-    return [...files].sort().join('\0');
+    return [...files]
+      .map(f => path.resolve(f).split(path.sep).join('/'))
+      .sort()
+      .join('\0');
   }
 
   async exists(): Promise<boolean> {