From c9e15a6dcf05aaaaf9ed6b6083b987fd3124f9a6 Mon Sep 17 00:00:00 2001 From: Jordan Coin Jackson Date: Thu, 29 Jan 2026 01:49:00 -0500 Subject: [PATCH 1/2] Add size limits to prevent context overflow in large repos - Session-start hook now uses adaptive depth based on repo size: - >5000 files: depth 2 - >2000 files: depth 3 - Otherwise: depth 4 - Both hook and MCP get_structure enforce 60KB max output (~15k tokens) - Truncates cleanly at line boundaries with helpful message - Prevents consuming >10% of LLM context window Fixes issue where 10k+ file repos (like Rails monoliths) would output 1.3MB+ of tree structure, overwhelming Claude Code's context. Co-Authored-By: Claude Opus 4.5 --- cmd/hooks.go | 39 ++++++++++++++++++++++++++++++++++++--- mcp/main.go | 11 +++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/cmd/hooks.go b/cmd/hooks.go index de10223..413cb16 100644 --- a/cmd/hooks.go +++ b/cmd/hooks.go @@ -94,13 +94,46 @@ func hookSessionStart(root string) error { fmt.Println("📍 Project Context:") fmt.Println() - // Run codemap to show full tree structure + // Run codemap with adaptive depth based on repo size + // Goal: Keep output under 60KB (~15k tokens, <10% of context) exe, err := os.Executable() if err == nil { - cmd := exec.Command(exe, root) - cmd.Stdout = os.Stdout + // Count files to determine appropriate depth + fileCount := 0 + if state := watch.ReadState(root); state != nil { + fileCount = state.FileCount + } + + // Adaptive depth: large repos get shallower trees + depth := "4" + if fileCount > 5000 { + depth = "2" + } else if fileCount > 2000 { + depth = "3" + } + + cmd := exec.Command(exe, "--depth", depth, root) + + // Capture output to enforce size limit + var buf strings.Builder + cmd.Stdout = &buf cmd.Stderr = os.Stderr cmd.Run() + + output := buf.String() + const maxBytes = 60000 // ~15k tokens, <10% of 200k context + + if len(output) > maxBytes { + // Truncate and add warning + output = output[:maxBytes] + // Find last newline to avoid cutting mid-line + if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 { + output = output[:idx] + } + output += "\n\n... (truncated - repo has " + fmt.Sprintf("%d", fileCount) + " files, use `codemap .` for full tree)\n" + } + + fmt.Print(output) fmt.Println() } diff --git a/mcp/main.go b/mcp/main.go index 85d836d..08dcbfc 100644 --- a/mcp/main.go +++ b/mcp/main.go @@ -188,6 +188,17 @@ func handleGetStructure(ctx context.Context, req *mcp.CallToolRequest, input Pat render.Tree(&buf, project) output := stripANSI(buf.String()) + // Enforce size limit: ~15k tokens max (60KB) + const maxBytes = 60000 + if len(output) > maxBytes { + output = output[:maxBytes] + // Find last newline to avoid cutting mid-line + if idx := strings.LastIndex(output, "\n"); idx > maxBytes-1000 { + output = output[:idx] + } + output += fmt.Sprintf("\n\n... (truncated - repo has %d files, use `codemap --depth N` for full tree)\n", len(files)) + } + // Add hub file summary fg, err := scanner.BuildFileGraph(input.Path) if err == nil { From a45f408273012bf43e356833c2aa3227ded2ea2e Mon Sep 17 00:00:00 2001 From: Jordan Coin Jackson Date: Thu, 29 Jan 2026 12:27:45 -0500 Subject: [PATCH 2/2] Add detailed comments explaining hooks/context architecture Hook output goes directly into Claude's "Messages" context, not system prompt. This means hook output competes with conversation history for the ~200k token limit. A 1.3MB output (like a full tree of a 10k file repo) equals ~500k tokens, causing instant context overflow. The size limits (adaptive depth + 60KB cap) are critical safeguards. Co-Authored-By: Claude Opus 4.5 --- cmd/hooks.go | 11 +++++++++-- mcp/main.go | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cmd/hooks.go b/cmd/hooks.go index 413cb16..1e4c1fa 100644 --- a/cmd/hooks.go +++ b/cmd/hooks.go @@ -94,8 +94,15 @@ func hookSessionStart(root string) error { fmt.Println("📍 Project Context:") fmt.Println() - // Run codemap with adaptive depth based on repo size - // Goal: Keep output under 60KB (~15k tokens, <10% of context) + // IMPORTANT: Hook output goes directly into Claude's "Messages" context, not system prompt. + // This means hook output competes with conversation history for the ~200k token limit. + // A 1.3MB output (like a full tree of a 10k file repo) = ~500k tokens = instant context overflow. + // + // We enforce two limits: + // 1. Adaptive depth: larger repos get shallower trees (depth 2-4 based on file count) + // 2. Hard cap: 60KB max output (~15k tokens, <10% of context window) + // + // Future: Consider structured output that Claude Code can format/truncate intelligently. exe, err := os.Executable() if err == nil { // Count files to determine appropriate depth diff --git a/mcp/main.go b/mcp/main.go index 08dcbfc..a00b33c 100644 --- a/mcp/main.go +++ b/mcp/main.go @@ -188,7 +188,9 @@ func handleGetStructure(ctx context.Context, req *mcp.CallToolRequest, input Pat render.Tree(&buf, project) output := stripANSI(buf.String()) - // Enforce size limit: ~15k tokens max (60KB) + // IMPORTANT: MCP tool output contributes to Claude's context window. + // Large repos can produce megabytes of tree output, causing instant context overflow. + // Cap at 60KB (~15k tokens) to stay under 10% of typical 200k context limit. const maxBytes = 60000 if len(output) > maxBytes { output = output[:maxBytes]