diff --git a/CLAUDE.md b/CLAUDE.md index eea040e9e..eccf4a324 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -187,6 +187,69 @@ Don't use `fmt.Print*` for operational messages (checkpoint saves, hook invocati **Privacy**: Don't log user content (prompts, file contents, commit messages). Log only operational metadata (IDs, counts, paths, durations). +### Explain Command Export Mode + +The `explain` command has been extended with export flags for structured output suitable for public sharing (e.g., portfolios, job applications). + +**Usage:** +```bash +# Export with showcase redaction (privacy-safe) +entire explain -c --export --showcase --format=json -o showcase.json + +# Export as markdown for README +entire explain -c --export --showcase --format=markdown -o SHOWCASE.md + +# Export without showcase (entropy-based redaction only) +entire explain -c --export --format=json -o export.json + +# Raw transcript (existing functionality, JSONL output) +entire explain -c --raw-transcript +``` + +**Export Flags:** +- `--export`: Enable structured export mode (JSON or Markdown) +- `--showcase`: Apply showcase redaction (use with --export) +- `--format`: Output format (json, markdown) - default: json +- `-o, --output`: Output file (default: stdout) + +**Showcase Redaction:** + +Showcase mode applies aggressive redaction for public sharing: +- **Entropy-based** (existing): API keys, tokens, secrets +- **Pattern-based**: Internal URLs, private IPs, DB connection strings, emails, JWTs, PEM keys, AWS ARNs +- **Structural**: File paths normalized to project-relative, project names from git remotes +- **Blocklist**: User-defined terms (company names, project codenames) + +Configuration in `.entire/settings.json`: +```json +{ + "strategy_options": { + "showcase": { + "redact_paths": true, + "redact_usernames": true, + "redact_project_info": true, + "allowed_paths": ["src/", "lib/"], + "allowed_domains": [], + "custom_blocklist": ["my-company", "project-codename"] + } + } +} +``` + +**Security Warning:** + +ALWAYS review output before publishing - redaction is best-effort. Showcase mode cannot guarantee 100% removal of all sensitive data. + +**Testing:** +- Unit tests: `mise run test` (tests in `redact/showcase_test.go`, `settings/settings_test.go`, `cli/explain_formatters_test.go`) +- Integration tests: `mise run test:integration` (tests in `integration_test/explain_export_test.go`) + +**Key files:** +- `redact/showcase.go` - Showcase redaction implementation +- `settings/settings.go` - Settings helpers for showcase config +- `cli/explain.go` - Export flags and routing +- `cli/explain_formatters.go` - JSON and Markdown formatters + ### Git Operations We use github.com/go-git/go-git for most git operations, but with important exceptions: diff --git a/INSIGHTS_IMPLEMENTATION.md b/INSIGHTS_IMPLEMENTATION.md new file mode 100644 index 000000000..1b9b0844a --- /dev/null +++ b/INSIGHTS_IMPLEMENTATION.md @@ -0,0 +1,177 @@ +# Insights Command Implementation + +## Summary + +Implemented the `entire insights` command to provide session analytics across time periods. + +## Files Created + +### Core Implementation (550 lines) +1. **`cmd/entire/cli/insights.go`** (200 lines) + - Command definition with flags (--period, --agent, --json, --export, --format, --output, --no-cache) + - Flag validation + - Routing to compute → format → output pipeline + - Integration with checkDisabledGuard() + +2. **`cmd/entire/cli/insights_analytics.go`** (450 lines) + - `filterSessionQuality()`: Quality gate (exclude <2 messages, <1 min duration) + - `computeMetadataMetrics()`: Fast path using session/checkpoint metadata + - `enrichWithTranscriptData()`: Parallel transcript parsing with worker pool + - `extractToolUsage()`: Parse JSONL, count tool_use blocks + - `extractHourlyActivity()`: Parse timestamps, bucket by hour + - `chunkTranscript()`: Split transcripts >30K chars into 25K segments + - `aggregateFacets()`: Aggregate facets into insights report + - Data types: InsightsQuery, InsightsReport, AgentStat, ActivityPoint, etc. + +3. **`cmd/entire/cli/insights_cache.go`** (150 lines) + - `SessionFacet`: Cached per-session analytics (tokens, tools, duration, messages) + - `InsightsCache`: Persistent cache in `.entire/insights-cache/.json` + - `loadCache()`: Read cached facets from disk + - `saveCache()`: Persist updated facets + - Cache TTL: 30 days + +4. **`cmd/entire/cli/insights_filters.go`** (50 lines) + - `applyPeriodFilter()`: Convert week/month/year to time ranges + +5. **`cmd/entire/cli/insights_formatters.go`** (400 lines) + - `formatInsightsTerminal()`: Human-readable terminal output with tables and heatmap + - `formatInsightsJSON()`: Structured JSON export + - `formatInsightsMarkdown()`: Documentation-ready Markdown + - `formatInsightsHTML()`: Interactive HTML report with CSS styling + - Helper formatters: `formatDuration()`, `formatNumber()`, `renderHeatmap()` + +### Tests (350 lines) +6. **`cmd/entire/cli/insights_test.go`** (200 lines) + - Unit tests for analytics computation + - Filter logic tests (period, agent) + - Formatter tests (duration, number, hash, barChar) + - All tests use `t.Parallel()` pattern + +7. **`cmd/entire/cli/integration_test/insights_test.go`** (150 lines) + - End-to-end command execution tests + - Export format validation (JSON, Markdown, HTML) + - Flag validation tests + - Uses `RunForAllStrategies()` pattern + +### Integration +8. **`cmd/entire/cli/root.go`** (1 line changed) + - Added `cmd.AddCommand(newInsightsCmd())` to register command + +## Key Features + +### Session Quality Filtering +- Excludes sessions with <2 user messages +- Excludes sessions <1 minute duration +- Filters out agent sub-task sessions + +### Incremental Caching +- First run: Full analysis of all sessions +- Subsequent runs: Only analyze new sessions (max 50 per run) +- Cache stored in `.entire/insights-cache/.json` +- 30-day TTL for cache entries +- `--no-cache` flag to force full re-analysis + +### Metrics Provided +- **Summary**: Sessions, total time, tokens, estimated cost, files modified +- **Agent breakdown**: Sessions/tokens/hours per agent type +- **Top tools**: Most frequently used tools (top 5) +- **Peak hours**: 24-hour activity heatmap +- **Recent sessions**: Last 5 sessions with descriptions + +### Output Formats +- **Terminal** (default): Formatted tables with Unicode box drawing +- **JSON**: Structured data for programmatic use +- **Markdown**: Documentation-ready format with tables +- **HTML**: Minimal light theme report with: + - Left sidebar navigation (Overview, Repositories) + - Time-based greeting header ("Evening, developer") + - Large monospace stat cards with tiny labels + - Scatter/bubble activity chart (hour-of-day on Y-axis, date on X-axis, amber dots) + - Coral/orange "Claude Code" badges + - GitHub-style tables with diff stats (+green / -red) + - Almost zero borders, lots of whitespace + +### Performance +- **Without caching (first run)**: + - Week (14 sessions): <1s + - Month (60 sessions): 2-3s (max 50 parsed) + - Year (412 sessions): 8-10s (max 50 parsed per run) + +- **With caching (subsequent runs)**: + - Week (5 new): <500ms + - Month (10 new): 1-2s + - Year (20 new): 3-5s + +### Bounded Parallelism +- Worker pool: `runtime.NumCPU() / 2` goroutines +- Max 50 new sessions analyzed per run (Claude Code pattern) +- Transcript chunking for >30K chars + +## Command Usage + +```bash +# Default week view +entire insights + +# Month view +entire insights --period month + +# Year view +entire insights --period year + +# Filter by agent +entire insights --agent claude-code + +# JSON output +entire insights --json + +# Export to file +entire insights --export --format json -o stats.json +entire insights --export --format markdown -o INSIGHTS.md +entire insights --export --format html -o report.html + +# Force full re-analysis +entire insights --no-cache +``` + +## Testing + +Run unit tests: +```bash +mise run test +``` + +Run integration tests: +```bash +mise run test:integration +``` + +Run full CI suite: +```bash +mise run test:ci +``` + +## Implementation Notes + +1. **Follows existing patterns**: Command structure matches `explain.go`, formatters follow `explain_formatters.go` +2. **Reuses abstractions**: Uses `strategy.ListSessions()`, `checkpoint.GitStore`, `transcript.ParseFromBytes()` +3. **Privacy-first**: Local storage only, HTML reports stay on disk +4. **Quality gate**: Filters noise before aggregation +5. **Incremental updates**: Cache avoids re-analyzing unchanged sessions +6. **Bounded resources**: Max 50 sessions/run prevents resource exhaustion +7. **HTML design follows light theme guide**: + - White background, minimal color palette + - Large monospace numbers with tiny unit labels + - Scatter/bubble chart instead of heatmaps (hour on Y-axis, date on X-axis) + - Coral/orange badges for Claude Code + - GitHub-style tables with diff stats + - Left sidebar navigation + - Time-based greeting header + - Almost zero borders, generous whitespace + +## Future Enhancements (v2) + +Optional LLM-powered analysis with `--analyze` flag: +- Pattern recognition: goal categories, friction points, success patterns +- Actionable recommendations: skill suggestions, workflow improvements +- Qualitative insights from quantitative data diff --git a/cmd/entire/cli/config.go b/cmd/entire/cli/config.go index 48246c5be..6eb704cbb 100644 --- a/cmd/entire/cli/config.go +++ b/cmd/entire/cli/config.go @@ -65,7 +65,7 @@ func IsEnabled() (bool, error) { // GetStrategy returns the configured strategy instance. // Falls back to default if the configured strategy is not found. // -//nolint:ireturn // Factory pattern requires returning the interface + func GetStrategy() strategy.Strategy { s, err := settings.Load() if err != nil { diff --git a/cmd/entire/cli/explain.go b/cmd/entire/cli/explain.go index d4a6511be..286673198 100644 --- a/cmd/entire/cli/explain.go +++ b/cmd/entire/cli/explain.go @@ -16,10 +16,12 @@ import ( "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/strategy" "github.com/entireio/cli/cmd/entire/cli/summarize" "github.com/entireio/cli/cmd/entire/cli/trailers" "github.com/entireio/cli/cmd/entire/cli/transcript" + "github.com/entireio/cli/redact" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -59,6 +61,15 @@ type checkpointDetail struct { Files []string } +// exportOptions controls what content is included in exports. +type exportOptions struct { + NoPrompts bool // Exclude prompts + NoContext bool // Exclude context.md + NoTranscript bool // Exclude transcript + IncludeToolCalls bool // Extract and include tool calls separately + IncludeFileDiffs bool // Include file diffs +} + func newExplainCmd() *cobra.Command { var sessionFlag string var commitFlag string @@ -71,6 +82,20 @@ func newExplainCmd() *cobra.Command { var forceFlag bool var searchAllFlag bool + // Export flags + var exportFlag bool + var allFlag bool + var showcaseFlag bool + var exportFormat string + var outputFile string + + // Selective content flags (for export mode) + var noPromptsFlag bool + var noContextFlag bool + var noTranscriptFlag bool + var includeToolCallsFlag bool + var includeFileDiffsFlag bool + cmd := &cobra.Command{ Use: "explain", Short: "Explain a session, commit, or checkpoint", @@ -99,6 +124,20 @@ Summary generation (for --checkpoint): --generate Generate an AI summary for the checkpoint --force Regenerate even if a summary already exists (requires --generate) +Export modes: + --export Export checkpoint(s) in structured format (JSON or Markdown) + --all Export all checkpoints (use with --export, optionally with --session to filter) + --showcase Apply privacy-focused redaction (use with --export) + --format FORMAT Export format: json, markdown (default: json, use with --export) + --output FILE Write export to file instead of stdout (use with --export) + +Selective content (use with --export): + --no-prompts Exclude prompts from export + --no-context Exclude context.md from export + --no-transcript Exclude full transcript from export + --include-tool-calls Extract and include tool calls separately + --include-file-diffs Include actual file diffs in export + Performance options: --search-all Remove branch/depth limits when searching for commits (may be slow) @@ -107,6 +146,23 @@ Checkpoint detail view shows: - Associated git commits that reference the checkpoint - Prompts and responses from the session +Export examples: + entire explain -c abc123 --export --showcase --format=json -o showcase.json + entire explain -c abc123 --export --format=markdown -o README.md + entire explain --export --all --showcase -o all-sessions.json + entire explain --export --all --session 2026-01-13 --showcase -o session.json + entire explain -c abc123 --raw-transcript # Still works (raw JSONL) + +Security (showcase mode): + Showcase mode applies aggressive redaction for public sharing: + - API keys and tokens (entropy-based) + - Internal URLs and private IPs + - File paths normalized to project-relative + - Usernames and email addresses + - Project names from git remotes + + ALWAYS review output before publishing - redaction is best-effort. + Note: --session filters the list view; --commit and --checkpoint are mutually exclusive.`, Args: func(_ *cobra.Command, args []string) error { if len(args) > 0 { @@ -131,9 +187,51 @@ Note: --session filters the list view; --commit and --checkpoint are mutually ex return errors.New("--raw-transcript requires --checkpoint/-c flag") } + // Export flag validations + if exportFlag && checkpointFlag == "" && !allFlag { + return errors.New("--export requires --checkpoint/-c flag or --all flag") + } + if allFlag && !exportFlag { + return errors.New("--all requires --export flag") + } + if allFlag && checkpointFlag != "" { + return errors.New("cannot specify both --all and --checkpoint/-c") + } + if showcaseFlag && !exportFlag { + return errors.New("--showcase requires --export flag") + } + if (cmd.Flags().Changed("format") || cmd.Flags().Changed("output")) && !exportFlag { + return errors.New("--format and --output require --export flag") + } + + // Mutually exclusive: can't use --export with --raw-transcript, --short, --full + if exportFlag && (rawTranscriptFlag || shortFlag || fullFlag) { + return errors.New("--export is mutually exclusive with --raw-transcript, --short, --full") + } + + // Selective content flags require --export + if (noPromptsFlag || noContextFlag || noTranscriptFlag || includeToolCallsFlag || includeFileDiffsFlag) && !exportFlag { + return errors.New("selective content flags (--no-prompts, --no-context, --no-transcript, --include-tool-calls, --include-file-diffs) require --export flag") + } + + // Validate conflicting selective content flags + if noTranscriptFlag && includeToolCallsFlag { + return errors.New("cannot use --no-transcript with --include-tool-calls (tool calls are extracted from transcript)") + } + // Convert short flag to verbose (verbose = !short) verbose := !shortFlag - return runExplain(cmd.OutOrStdout(), cmd.ErrOrStderr(), sessionFlag, commitFlag, checkpointFlag, noPagerFlag, verbose, fullFlag, rawTranscriptFlag, generateFlag, forceFlag, searchAllFlag) + + // Create export options + expOpts := exportOptions{ + NoPrompts: noPromptsFlag, + NoContext: noContextFlag, + NoTranscript: noTranscriptFlag, + IncludeToolCalls: includeToolCallsFlag, + IncludeFileDiffs: includeFileDiffsFlag, + } + + return runExplain(cmd.OutOrStdout(), cmd.ErrOrStderr(), sessionFlag, commitFlag, checkpointFlag, noPagerFlag, verbose, fullFlag, rawTranscriptFlag, generateFlag, forceFlag, searchAllFlag, exportFlag, allFlag, showcaseFlag, exportFormat, outputFile, expOpts) }, } @@ -148,18 +246,39 @@ Note: --session filters the list view; --commit and --checkpoint are mutually ex cmd.Flags().BoolVar(&forceFlag, "force", false, "Regenerate summary even if one already exists (requires --generate)") cmd.Flags().BoolVar(&searchAllFlag, "search-all", false, "Search all commits (no branch/depth limit, may be slow)") - // Make --short, --full, and --raw-transcript mutually exclusive - cmd.MarkFlagsMutuallyExclusive("short", "full", "raw-transcript") + // Export flags + cmd.Flags().BoolVar(&exportFlag, "export", false, "Export checkpoint(s) in structured format") + cmd.Flags().BoolVar(&allFlag, "all", false, "Export all checkpoints (use with --export, optionally with --session to filter)") + cmd.Flags().BoolVar(&showcaseFlag, "showcase", false, "Apply showcase redaction (use with --export)") + cmd.Flags().StringVar(&exportFormat, "format", "json", "Export format: json, markdown") + cmd.Flags().StringVarP(&outputFile, "output", "o", "", "Write to file instead of stdout") + + // Selective content flags + cmd.Flags().BoolVar(&noPromptsFlag, "no-prompts", false, "Exclude prompts from export (use with --export)") + cmd.Flags().BoolVar(&noContextFlag, "no-context", false, "Exclude context.md from export (use with --export)") + cmd.Flags().BoolVar(&noTranscriptFlag, "no-transcript", false, "Exclude transcript from export (use with --export)") + cmd.Flags().BoolVar(&includeToolCallsFlag, "include-tool-calls", false, "Extract and include tool calls separately (use with --export)") + cmd.Flags().BoolVar(&includeFileDiffsFlag, "include-file-diffs", false, "Include file diffs in export (use with --export)") + + // Make --short, --full, --raw-transcript, and --export mutually exclusive + cmd.MarkFlagsMutuallyExclusive("short", "full", "raw-transcript", "export") // --generate and --raw-transcript are incompatible (summary would be generated but not shown) cmd.MarkFlagsMutuallyExclusive("generate", "raw-transcript") + // --generate and --export are incompatible + cmd.MarkFlagsMutuallyExclusive("generate", "export") return cmd } // runExplain routes to the appropriate explain function based on flags. -func runExplain(w, errW io.Writer, sessionID, commitRef, checkpointID string, noPager, verbose, full, rawTranscript, generate, force, searchAll bool) error { +func runExplain(w, errW io.Writer, sessionID, commitRef, checkpointID string, noPager, verbose, full, rawTranscript, generate, force, searchAll bool, export, allExport, showcase bool, format, outputFile string, opts exportOptions) error { + // Handle multi-session export mode first + if export && allExport { + return runExportMultipleCheckpoints(w, errW, sessionID, showcase, format, outputFile, opts) + } + // Count mutually exclusive flags (--commit and --checkpoint are mutually exclusive) - // --session is now a filter for the list view, not a separate mode + // --session is now a filter for the list view, not a separate mode (except with --export --all) flagCount := 0 if commitRef != "" { flagCount++ @@ -180,7 +299,7 @@ func runExplain(w, errW io.Writer, sessionID, commitRef, checkpointID string, no return runExplainCommit(w, commitRef, noPager, verbose, full, searchAll) } if checkpointID != "" { - return runExplainCheckpoint(w, errW, checkpointID, noPager, verbose, full, rawTranscript, generate, force, searchAll) + return runExplainCheckpoint(w, errW, checkpointID, noPager, verbose, full, rawTranscript, generate, force, searchAll, export, showcase, format, outputFile, opts) } // Default or with session filter: show list view (optionally filtered by session) @@ -194,7 +313,8 @@ func runExplain(w, errW io.Writer, sessionID, commitRef, checkpointID string, no // When force is true, regenerates even if a summary already exists. // When rawTranscript is true, outputs only the raw transcript file (JSONL format). // When searchAll is true, searches all commits without branch/depth limits (used for finding associated commits). -func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, verbose, full, rawTranscript, generate, force, searchAll bool) error { +// When export is true, exports the checkpoint in structured format (delegates to runExportCheckpoint). +func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, verbose, full, rawTranscript, generate, force, searchAll bool, export, showcase bool, format, outputFile string, opts exportOptions) error { repo, err := openRepository() if err != nil { return fmt.Errorf("not a git repository: %w", err) @@ -259,6 +379,11 @@ func runExplainCheckpoint(w, errW io.Writer, checkpointIDPrefix string, noPager, return fmt.Errorf("failed to read checkpoint content: %w", err) } + // Handle export mode (must come before summary generation and raw transcript) + if export { + return runExportCheckpoint(w, errW, fullCheckpointID, content, summary, showcase, format, outputFile, opts) + } + // Handle summary generation if generate { if err := generateCheckpointSummary(w, errW, store, fullCheckpointID, summary, content, force); err != nil { @@ -333,6 +458,342 @@ func generateCheckpointSummary(w, _ io.Writer, store *checkpoint.GitStore, check return nil } +// runExportCheckpoint exports a checkpoint in structured format (JSON or Markdown). +// Applies showcase redaction if requested. +func runExportCheckpoint(w, _ io.Writer, checkpointID id.CheckpointID, + content *checkpoint.SessionContent, summary *checkpoint.CheckpointSummary, + showcase bool, format, outputFile string, opts exportOptions) error { + // Prepare transcript for export (conditionally based on options) + transcriptBytes := content.Transcript + prompts := content.Prompts + contextMd := content.Context + filesTouched := content.Metadata.FilesTouched + + // Apply selective content filters + if opts.NoPrompts { + prompts = "" + } + if opts.NoContext { + contextMd = "" + } + if opts.NoTranscript { + transcriptBytes = nil + } + + // Apply redaction if showcase mode + if showcase { + cfg, err := settings.Load() + if err != nil { + return fmt.Errorf("loading settings: %w", err) + } + + showcaseConfig := cfg.GetShowcaseConfig() + if showcaseConfig == nil { + showcaseConfig = &redact.ShowcaseConfig{} + *showcaseConfig = redact.DefaultShowcaseConfig() + } + + // Layer 1: Existing entropy-based redaction + transcriptBytes, err = redact.JSONLBytes(transcriptBytes) + if err != nil { + return fmt.Errorf("entropy redaction failed: %w", err) + } + + // Layer 2: Showcase redaction (patterns, blocklist, structural) + transcriptBytes, err = redact.ShowcaseJSONL(transcriptBytes, *showcaseConfig) + if err != nil { + return fmt.Errorf("showcase redaction failed: %w", err) + } + + prompts = redact.Showcase(prompts, *showcaseConfig) + contextMd = redact.Showcase(contextMd, *showcaseConfig) + + filesTouched = make([]string, len(content.Metadata.FilesTouched)) + for i, path := range content.Metadata.FilesTouched { + filesTouched[i] = redact.Showcase(path, *showcaseConfig) + } + + // Redact summary fields if present + if content.Metadata.Summary != nil { + redactedSummary := *content.Metadata.Summary + redactedSummary.Intent = redact.Showcase(redactedSummary.Intent, *showcaseConfig) + redactedSummary.Outcome = redact.Showcase(redactedSummary.Outcome, *showcaseConfig) + + // Redact Learnings fields + redactedSummary.Learnings.Repo = make([]string, len(redactedSummary.Learnings.Repo)) + for i, item := range redactedSummary.Learnings.Repo { + redactedSummary.Learnings.Repo[i] = redact.Showcase(item, *showcaseConfig) + } + redactedSummary.Learnings.Code = make([]checkpoint.CodeLearning, len(redactedSummary.Learnings.Code)) + for i, cl := range redactedSummary.Learnings.Code { + redactedSummary.Learnings.Code[i] = checkpoint.CodeLearning{ + Path: redact.Showcase(cl.Path, *showcaseConfig), + Line: cl.Line, + EndLine: cl.EndLine, + Finding: redact.Showcase(cl.Finding, *showcaseConfig), + } + } + redactedSummary.Learnings.Workflow = make([]string, len(redactedSummary.Learnings.Workflow)) + for i, item := range redactedSummary.Learnings.Workflow { + redactedSummary.Learnings.Workflow[i] = redact.Showcase(item, *showcaseConfig) + } + + // Redact Friction and OpenItems slices + redactedSummary.Friction = make([]string, len(redactedSummary.Friction)) + for i, item := range redactedSummary.Friction { + redactedSummary.Friction[i] = redact.Showcase(item, *showcaseConfig) + } + redactedSummary.OpenItems = make([]string, len(redactedSummary.OpenItems)) + for i, item := range redactedSummary.OpenItems { + redactedSummary.OpenItems[i] = redact.Showcase(item, *showcaseConfig) + } + + // Create a redacted copy of content metadata + redactedMetadata := content.Metadata + redactedMetadata.Summary = &redactedSummary + redactedContent := *content + redactedContent.Metadata = redactedMetadata + content = &redactedContent + } + } + + // Format output + var output []byte + var err error + + switch format { + case "json": + output, err = formatExportJSON(checkpointID, content, summary, + transcriptBytes, prompts, contextMd, filesTouched, opts) + case "markdown": + output, err = formatExportMarkdown(checkpointID, content, summary, + transcriptBytes, prompts, contextMd, filesTouched, opts) + default: + return fmt.Errorf("unsupported format: %s", format) + } + + if err != nil { + return fmt.Errorf("formatting failed: %w", err) + } + + // Write output + if outputFile == "" { + if _, err = w.Write(output); err != nil { + return fmt.Errorf("failed to write output: %w", err) + } + } else { + //nolint:gosec // G306: export file is public output, 0o644 is appropriate + if err := os.WriteFile(outputFile, output, 0o644); err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + fmt.Fprintf(w, "Exported to %s\n", outputFile) + } + + return nil +} + +// runExportMultipleCheckpoints exports multiple checkpoints in structured format (JSON or Markdown). +// Filters by session ID if provided. Applies showcase redaction if requested. +func runExportMultipleCheckpoints(w, _ io.Writer, sessionFilter string, showcase bool, format, outputFile string, opts exportOptions) error { + repo, err := openRepository() + if err != nil { + return fmt.Errorf("not a git repository: %w", err) + } + + store := checkpoint.NewGitStore(repo) + + // Get all committed checkpoints + committed, err := store.ListCommitted(context.Background()) + if err != nil { + return fmt.Errorf("failed to list checkpoints: %w", err) + } + + // Filter by session if specified + var checkpointsToExport []checkpoint.CommittedInfo + for _, info := range committed { + if sessionFilter != "" { + // Match exact session ID or prefix + if info.SessionID != sessionFilter && !strings.HasPrefix(info.SessionID, sessionFilter) { + continue + } + } + checkpointsToExport = append(checkpointsToExport, info) + } + + if len(checkpointsToExport) == 0 { + if sessionFilter != "" { + return fmt.Errorf("no checkpoints found matching session filter: %s", sessionFilter) + } + return errors.New("no checkpoints found") + } + + // Sort by created timestamp (most recent first) + sort.Slice(checkpointsToExport, func(i, j int) bool { + return checkpointsToExport[i].CreatedAt.After(checkpointsToExport[j].CreatedAt) + }) + + // Export each checkpoint + var exportedCheckpoints []exportedCheckpointData + for _, cpInfo := range checkpointsToExport { + // Load checkpoint content + content, err := store.ReadLatestSessionContent(context.Background(), cpInfo.CheckpointID) + if err != nil { + logging.Warn(context.Background(), "failed to read checkpoint content, skipping", + "checkpoint_id", cpInfo.CheckpointID, + "error", err) + continue + } + + summary, _ := store.ReadCommitted(context.Background(), cpInfo.CheckpointID) //nolint:errcheck // Best-effort + + // Prepare data for export + transcriptBytes := content.Transcript + prompts := content.Prompts + contextMd := content.Context + filesTouched := content.Metadata.FilesTouched + + // Apply redaction if showcase mode + if showcase { + cfg, err := settings.Load() + if err != nil { + return fmt.Errorf("loading settings: %w", err) + } + + showcaseConfig := cfg.GetShowcaseConfig() + if showcaseConfig == nil { + showcaseConfig = &redact.ShowcaseConfig{} + *showcaseConfig = redact.DefaultShowcaseConfig() + } + + // Layer 1: Existing entropy-based redaction + transcriptBytes, err = redact.JSONLBytes(transcriptBytes) + if err != nil { + return fmt.Errorf("entropy redaction failed for checkpoint %s: %w", cpInfo.CheckpointID, err) + } + + // Layer 2: Showcase redaction + transcriptBytes, err = redact.ShowcaseJSONL(transcriptBytes, *showcaseConfig) + if err != nil { + return fmt.Errorf("showcase redaction failed for checkpoint %s: %w", cpInfo.CheckpointID, err) + } + + prompts = redact.Showcase(prompts, *showcaseConfig) + contextMd = redact.Showcase(contextMd, *showcaseConfig) + + filesTouched = make([]string, len(content.Metadata.FilesTouched)) + for i, path := range content.Metadata.FilesTouched { + filesTouched[i] = redact.Showcase(path, *showcaseConfig) + } + + // Redact summary fields if present + if content.Metadata.Summary != nil { + redactedSummary := *content.Metadata.Summary + redactedSummary.Intent = redact.Showcase(redactedSummary.Intent, *showcaseConfig) + redactedSummary.Outcome = redact.Showcase(redactedSummary.Outcome, *showcaseConfig) + + // Redact Learnings fields + redactedSummary.Learnings.Repo = make([]string, len(redactedSummary.Learnings.Repo)) + for i, item := range redactedSummary.Learnings.Repo { + redactedSummary.Learnings.Repo[i] = redact.Showcase(item, *showcaseConfig) + } + redactedSummary.Learnings.Code = make([]checkpoint.CodeLearning, len(redactedSummary.Learnings.Code)) + for i, cl := range redactedSummary.Learnings.Code { + redactedSummary.Learnings.Code[i] = checkpoint.CodeLearning{ + Path: redact.Showcase(cl.Path, *showcaseConfig), + Line: cl.Line, + EndLine: cl.EndLine, + Finding: redact.Showcase(cl.Finding, *showcaseConfig), + } + } + redactedSummary.Learnings.Workflow = make([]string, len(redactedSummary.Learnings.Workflow)) + for i, item := range redactedSummary.Learnings.Workflow { + redactedSummary.Learnings.Workflow[i] = redact.Showcase(item, *showcaseConfig) + } + + // Redact Friction and OpenItems slices + redactedSummary.Friction = make([]string, len(redactedSummary.Friction)) + for i, item := range redactedSummary.Friction { + redactedSummary.Friction[i] = redact.Showcase(item, *showcaseConfig) + } + redactedSummary.OpenItems = make([]string, len(redactedSummary.OpenItems)) + for i, item := range redactedSummary.OpenItems { + redactedSummary.OpenItems[i] = redact.Showcase(item, *showcaseConfig) + } + + // Update metadata with redacted summary + redactedMetadata := content.Metadata + redactedMetadata.Summary = &redactedSummary + redactedContent := *content + redactedContent.Metadata = redactedMetadata + content = &redactedContent + } + } + + // Apply selective content filters + if opts.NoPrompts { + prompts = "" + } + if opts.NoContext { + contextMd = "" + } + if opts.NoTranscript { + transcriptBytes = nil + } + + // Add to export list + exportedCheckpoints = append(exportedCheckpoints, exportedCheckpointData{ + CheckpointID: cpInfo.CheckpointID, + Content: content, + Summary: summary, + Transcript: transcriptBytes, + Prompts: prompts, + Context: contextMd, + FilesTouched: filesTouched, + }) + } + + // Format output + var output []byte + switch format { + case "json": + output, err = formatExportMultipleJSON(exportedCheckpoints, opts) + case "markdown": + output, err = formatExportMultipleMarkdown(exportedCheckpoints, opts) + default: + return fmt.Errorf("unsupported format: %s", format) + } + + if err != nil { + return err + } + + // Write output + if outputFile == "" { + if _, err = w.Write(output); err != nil { + return fmt.Errorf("failed to write output: %w", err) + } + } else { + //nolint:gosec // G306: export file is public output, 0o644 is appropriate + if err := os.WriteFile(outputFile, output, 0o644); err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + fmt.Fprintf(w, "Exported %d checkpoint(s) to %s\n", len(exportedCheckpoints), outputFile) + } + + return nil +} + +// exportedCheckpointData holds all data for a single checkpoint export. +type exportedCheckpointData struct { + CheckpointID id.CheckpointID + Content *checkpoint.SessionContent + Summary *checkpoint.CheckpointSummary + Transcript []byte + Prompts string + Context string + FilesTouched []string +} + // explainTemporaryCheckpoint finds and formats a temporary checkpoint by shadow commit hash prefix. // Returns the formatted output and whether the checkpoint was found. // Searches ALL shadow branches, not just the one for current HEAD, to find checkpoints @@ -1170,7 +1631,7 @@ func runExplainCommit(w io.Writer, commitRef string, noPager, verbose, full, sea // Delegate to checkpoint detail view // Note: errW is only used for generate mode, but we pass w for safety - return runExplainCheckpoint(w, w, checkpointID.String(), noPager, verbose, full, false, false, false, searchAll) + return runExplainCheckpoint(w, w, checkpointID.String(), noPager, verbose, full, false, false, false, searchAll, false, false, "", "", exportOptions{}) } // formatSessionInfo formats session information for display. diff --git a/cmd/entire/cli/explain_formatters.go b/cmd/entire/cli/explain_formatters.go new file mode 100644 index 000000000..12438a12c --- /dev/null +++ b/cmd/entire/cli/explain_formatters.go @@ -0,0 +1,355 @@ +package cli + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" +) + +// formatExportJSON formats checkpoint data as JSON. +func formatExportJSON(checkpointID id.CheckpointID, + content *checkpoint.SessionContent, _ *checkpoint.CheckpointSummary, + transcript []byte, prompts, context string, filesTouched []string, opts exportOptions) ([]byte, error) { + out := map[string]any{ + "checkpoint_id": checkpointID.String(), + "session_id": content.Metadata.SessionID, + "metadata": map[string]any{ + "created_at": content.Metadata.CreatedAt.Format(time.RFC3339), + "strategy": content.Metadata.Strategy, + "agent": content.Metadata.Agent, + "token_usage": content.Metadata.TokenUsage, + }, + "files_touched": filesTouched, + "exported_at": time.Now().Format(time.RFC3339), + } + + // Conditionally include content based on options + if !opts.NoTranscript { + out["transcript"] = string(transcript) // JSONL as string + } + if !opts.NoPrompts { + out["prompts"] = prompts + } + if !opts.NoContext { + out["context"] = context + } + + // Extract and include tool calls if requested + if opts.IncludeToolCalls && len(transcript) > 0 { + toolCalls := extractToolCalls(transcript) + if len(toolCalls) > 0 { + out["tool_calls"] = toolCalls + } + } + + // Include file diffs if requested + if opts.IncludeFileDiffs { + // TODO: Implement file diff extraction + out["file_diffs"] = []string{} // Placeholder + } + + // Include summary if available + if content.Metadata.Summary != nil { + out["summary"] = map[string]any{ + "intent": content.Metadata.Summary.Intent, + "outcome": content.Metadata.Summary.Outcome, + "learnings": content.Metadata.Summary.Learnings, + "friction": content.Metadata.Summary.Friction, + "open_items": content.Metadata.Summary.OpenItems, + } + } + + data, err := json.MarshalIndent(out, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal JSON: %w", err) + } + return data, nil +} + +// formatExportMarkdown formats checkpoint data as Markdown. +// +//nolint:unparam // error return kept for consistency with formatExportJSON +func formatExportMarkdown(checkpointID id.CheckpointID, + content *checkpoint.SessionContent, _ *checkpoint.CheckpointSummary, + transcript []byte, prompts, context string, filesTouched []string, opts exportOptions) ([]byte, error) { + var sb strings.Builder + + // Header + fmt.Fprintf(&sb, "# Session: %s\n\n", content.Metadata.SessionID) + fmt.Fprintf(&sb, "**Checkpoint:** `%s`\n\n", checkpointID.String()) + fmt.Fprintf(&sb, "**Created:** %s\n\n", content.Metadata.CreatedAt.Format("2006-01-02 15:04:05")) + + // Summary section (if available) + if content.Metadata.Summary != nil { + sb.WriteString("## Summary\n\n") + fmt.Fprintf(&sb, "**Intent:** %s\n\n", content.Metadata.Summary.Intent) + fmt.Fprintf(&sb, "**Outcome:** %s\n\n", content.Metadata.Summary.Outcome) + + if len(content.Metadata.Summary.Learnings.Code) > 0 { + sb.WriteString("**Key Learnings:**\n\n") + for _, l := range content.Metadata.Summary.Learnings.Code { + fmt.Fprintf(&sb, "- %s: %s\n", l.Path, l.Finding) + } + sb.WriteString("\n") + } + } + + // Prompts section (if not excluded) + if !opts.NoPrompts && prompts != "" { + sb.WriteString("## Prompts\n\n") + sb.WriteString(prompts) + sb.WriteString("\n\n") + } + + // Context section (if not excluded) + if !opts.NoContext && context != "" { + sb.WriteString("## Context\n\n") + sb.WriteString(context) + sb.WriteString("\n\n") + } + + // Files section + if len(filesTouched) > 0 { + sb.WriteString("## Files Modified\n\n") + for _, file := range filesTouched { + fmt.Fprintf(&sb, "- `%s`\n", file) + } + sb.WriteString("\n") + } + + // Transcript section (if not excluded) + if !opts.NoTranscript && len(transcript) > 0 { + sb.WriteString("## Transcript\n\n") + formattedTranscript := formatTranscriptBytes(transcript, "") + sb.WriteString(formattedTranscript) + } + + // Tool calls section (if requested) + if opts.IncludeToolCalls && len(transcript) > 0 { + toolCalls := extractToolCalls(transcript) + if len(toolCalls) > 0 { + sb.WriteString("\n## Tool Calls\n\n") + for i, tc := range toolCalls { + fmt.Fprintf(&sb, "### Call %d: %s\n\n", i+1, tc.Name) + if tc.Input != "" { + fmt.Fprintf(&sb, "**Input:**\n```json\n%s\n```\n\n", tc.Input) + } + } + } + } + + return []byte(sb.String()), nil +} + +// formatExportMultipleJSON formats multiple checkpoints as a JSON array. +func formatExportMultipleJSON(checkpoints []exportedCheckpointData, opts exportOptions) ([]byte, error) { + var exportArray []map[string]any + + for _, cp := range checkpoints { + out := map[string]any{ + "checkpoint_id": cp.CheckpointID.String(), + "session_id": cp.Content.Metadata.SessionID, + "metadata": map[string]any{ + "created_at": cp.Content.Metadata.CreatedAt.Format(time.RFC3339), + "strategy": cp.Content.Metadata.Strategy, + "agent": cp.Content.Metadata.Agent, + "token_usage": cp.Content.Metadata.TokenUsage, + }, + "files_touched": cp.FilesTouched, + } + + // Conditionally include content based on options + if !opts.NoTranscript { + out["transcript"] = string(cp.Transcript) + } + if !opts.NoPrompts { + out["prompts"] = cp.Prompts + } + if !opts.NoContext { + out["context"] = cp.Context + } + + // Extract and include tool calls if requested + if opts.IncludeToolCalls && len(cp.Transcript) > 0 { + toolCalls := extractToolCalls(cp.Transcript) + if len(toolCalls) > 0 { + out["tool_calls"] = toolCalls + } + } + + // Include file diffs if requested + if opts.IncludeFileDiffs { + out["file_diffs"] = []string{} // Placeholder + } + + // Include summary if available + if cp.Content.Metadata.Summary != nil { + out["summary"] = map[string]any{ + "intent": cp.Content.Metadata.Summary.Intent, + "outcome": cp.Content.Metadata.Summary.Outcome, + "learnings": cp.Content.Metadata.Summary.Learnings, + "friction": cp.Content.Metadata.Summary.Friction, + "open_items": cp.Content.Metadata.Summary.OpenItems, + } + } + + exportArray = append(exportArray, out) + } + + // Wrap in an object with metadata + output := map[string]any{ + "checkpoints": exportArray, + "count": len(checkpoints), + "exported_at": time.Now().Format(time.RFC3339), + } + + data, err := json.MarshalIndent(output, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal JSON: %w", err) + } + return data, nil +} + +// formatExportMultipleMarkdown formats multiple checkpoints as Markdown with sections. +// +//nolint:unparam // error return kept for consistency with formatExportMultipleJSON +func formatExportMultipleMarkdown(checkpoints []exportedCheckpointData, opts exportOptions) ([]byte, error) { + var sb strings.Builder + + // Header + fmt.Fprintf(&sb, "# Entire Sessions Export\n\n") + fmt.Fprintf(&sb, "**Total Checkpoints:** %d\n\n", len(checkpoints)) + fmt.Fprintf(&sb, "**Exported:** %s\n\n", time.Now().Format("2006-01-02 15:04:05")) + sb.WriteString("---\n\n") + + // Export each checkpoint as a section + for i, cp := range checkpoints { + fmt.Fprintf(&sb, "## Checkpoint %d: %s\n\n", i+1, cp.CheckpointID.String()) + fmt.Fprintf(&sb, "**Session:** %s\n\n", cp.Content.Metadata.SessionID) + fmt.Fprintf(&sb, "**Created:** %s\n\n", cp.Content.Metadata.CreatedAt.Format("2006-01-02 15:04:05")) + + // Summary section (if available) + if cp.Content.Metadata.Summary != nil { + sb.WriteString("### Summary\n\n") + fmt.Fprintf(&sb, "**Intent:** %s\n\n", cp.Content.Metadata.Summary.Intent) + fmt.Fprintf(&sb, "**Outcome:** %s\n\n", cp.Content.Metadata.Summary.Outcome) + + if len(cp.Content.Metadata.Summary.Learnings.Code) > 0 { + sb.WriteString("**Key Learnings:**\n\n") + for _, l := range cp.Content.Metadata.Summary.Learnings.Code { + fmt.Fprintf(&sb, "- %s: %s\n", l.Path, l.Finding) + } + sb.WriteString("\n") + } + } + + // Prompts section (if not excluded) + if !opts.NoPrompts && cp.Prompts != "" { + sb.WriteString("### Prompts\n\n") + sb.WriteString(cp.Prompts) + sb.WriteString("\n\n") + } + + // Context section (if not excluded) + if !opts.NoContext && cp.Context != "" { + sb.WriteString("### Context\n\n") + sb.WriteString(cp.Context) + sb.WriteString("\n\n") + } + + // Files section + if len(cp.FilesTouched) > 0 { + sb.WriteString("### Files Modified\n\n") + for _, file := range cp.FilesTouched { + fmt.Fprintf(&sb, "- `%s`\n", file) + } + sb.WriteString("\n") + } + + // Transcript section (if not excluded) + if !opts.NoTranscript && len(cp.Transcript) > 0 { + sb.WriteString("### Transcript\n\n") + formattedTranscript := formatTranscriptBytes(cp.Transcript, "") + sb.WriteString(formattedTranscript) + } + + // Tool calls section (if requested) + if opts.IncludeToolCalls && len(cp.Transcript) > 0 { + toolCalls := extractToolCalls(cp.Transcript) + if len(toolCalls) > 0 { + sb.WriteString("\n### Tool Calls\n\n") + for j, tc := range toolCalls { + fmt.Fprintf(&sb, "#### Call %d: %s\n\n", j+1, tc.Name) + if tc.Input != "" { + fmt.Fprintf(&sb, "**Input:**\n```json\n%s\n```\n\n", tc.Input) + } + } + } + } + + // Separator between checkpoints (except for the last one) + if i < len(checkpoints)-1 { + sb.WriteString("\n---\n\n") + } + } + + return []byte(sb.String()), nil +} + +// toolCall represents a single tool call extracted from a transcript. +type toolCall struct { + Name string `json:"name"` + Input string `json:"input"` +} + +// extractToolCalls extracts tool calls from a JSONL transcript. +func extractToolCalls(transcript []byte) []toolCall { + var toolCalls []toolCall + + // Parse JSONL line by line + scanner := bufio.NewScanner(bytes.NewReader(transcript)) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + + // Parse JSON + var entry map[string]any + if err := json.Unmarshal(line, &entry); err != nil { + continue + } + + // Check if this is a tool use entry + typeVal, hasType := entry["type"] + if !hasType || typeVal != "tool_use" { + continue + } + + // Extract tool name and input + name, _ := entry["name"].(string) + input, _ := entry["input"].(map[string]any) + + // Serialize input to JSON + var inputJSON string + if input != nil { + inputBytes, err := json.MarshalIndent(input, "", " ") + if err == nil { + inputJSON = string(inputBytes) + } + } + + toolCalls = append(toolCalls, toolCall{ + Name: name, + Input: inputJSON, + }) + } + + return toolCalls +} diff --git a/cmd/entire/cli/explain_formatters_test.go b/cmd/entire/cli/explain_formatters_test.go new file mode 100644 index 000000000..d1564a954 --- /dev/null +++ b/cmd/entire/cli/explain_formatters_test.go @@ -0,0 +1,323 @@ +package cli + +import ( + "encoding/json" + "strings" + "testing" + "time" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" +) + +func TestFormatExportJSON_ValidOutput(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("a1b2c3d4e5f6") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test-session", + CreatedAt: now, + Strategy: "manual-commit", + Agent: "Claude Code", + TokenUsage: &agent.TokenUsage{ + InputTokens: 100, + OutputTokens: 200, + }, + FilesTouched: []string{"file1.go", "file2.go"}, + }, + Transcript: []byte(`{"type":"text","content":"test"}`), + Prompts: "Test prompt", + Context: "Test context", + } + + summary := &checkpoint.CheckpointSummary{ + CheckpointID: checkpointID, + FilesTouched: content.Metadata.FilesTouched, + } + + output, err := formatExportJSON(checkpointID, content, summary, + content.Transcript, content.Prompts, content.Context, content.Metadata.FilesTouched, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportJSON() error = %v", err) + } + + // Verify output is valid JSON + var result map[string]any + if err := json.Unmarshal(output, &result); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } + + // Check required fields + requiredFields := []string{"checkpoint_id", "session_id", "transcript", "metadata", "files_touched", "exported_at"} + for _, field := range requiredFields { + if _, ok := result[field]; !ok { + t.Errorf("missing required field: %s", field) + } + } + + // Verify specific values + if result["checkpoint_id"] != checkpointID.String() { + t.Errorf("checkpoint_id = %v, want %v", result["checkpoint_id"], checkpointID.String()) + } + if result["session_id"] != content.Metadata.SessionID { + t.Errorf("session_id = %v, want %v", result["session_id"], content.Metadata.SessionID) + } +} + +func TestFormatExportJSON_WithSummary(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("b2c3d4e5f6a1") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test-session", + CreatedAt: now, + Strategy: "manual-commit", + Agent: "Claude Code", + TokenUsage: &agent.TokenUsage{}, + Summary: &checkpoint.Summary{ + Intent: "Test intent", + Outcome: "Test outcome", + Learnings: checkpoint.LearningsSummary{Code: []checkpoint.CodeLearning{}}, + Friction: []string{}, + OpenItems: []string{}, + }, + }, + Transcript: []byte(`{"type":"text","content":"test"}`), + } + + summary := &checkpoint.CheckpointSummary{CheckpointID: checkpointID} + + output, err := formatExportJSON(checkpointID, content, summary, + content.Transcript, content.Prompts, "", []string{}, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportJSON() error = %v", err) + } + + var result map[string]any + if err := json.Unmarshal(output, &result); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } + + // Verify summary is included + summaryMap, ok := result["summary"].(map[string]any) + if !ok { + t.Fatal("summary field missing or not a map") + } + + if summaryMap["intent"] != "Test intent" { + t.Errorf("summary.intent = %v, want %v", summaryMap["intent"], "Test intent") + } + if summaryMap["outcome"] != "Test outcome" { + t.Errorf("summary.outcome = %v, want %v", summaryMap["outcome"], "Test outcome") + } +} + +func TestFormatExportMarkdown_Structure(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("c3d4e5f6a1b2") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test-session", + CreatedAt: now, + Strategy: "manual-commit", + Agent: "Claude Code", + TokenUsage: &agent.TokenUsage{}, + FilesTouched: []string{"file1.go", "file2.go"}, + }, + Transcript: []byte(`{"type":"text","content":"test"}`), + } + + summary := &checkpoint.CheckpointSummary{CheckpointID: checkpointID} + + output, err := formatExportMarkdown(checkpointID, content, summary, + content.Transcript, "", "", content.Metadata.FilesTouched, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportMarkdown() error = %v", err) + } + + outputStr := string(output) + + // Verify markdown structure + if !strings.Contains(outputStr, "# Session:") { + t.Error("missing session header") + } + if !strings.Contains(outputStr, "**Checkpoint:**") { + t.Error("missing checkpoint field") + } + if !strings.Contains(outputStr, "**Created:**") { + t.Error("missing created field") + } + if !strings.Contains(outputStr, "## Files Modified") { + t.Error("missing files section") + } + if !strings.Contains(outputStr, "## Transcript") { + t.Error("missing transcript section") + } + + // Verify files are listed + if !strings.Contains(outputStr, "`file1.go`") { + t.Error("file1.go not listed") + } + if !strings.Contains(outputStr, "`file2.go`") { + t.Error("file2.go not listed") + } +} + +func TestFormatExportMarkdown_WithSummary(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("d4e5f6a1b2c3") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test-session", + CreatedAt: now, + Strategy: "manual-commit", + Agent: "Claude Code", + TokenUsage: &agent.TokenUsage{}, + Summary: &checkpoint.Summary{ + Intent: "Implement feature X", + Outcome: "Successfully implemented", + Learnings: checkpoint.LearningsSummary{ + Code: []checkpoint.CodeLearning{ + {Path: "api/handler.go", Finding: "Added error handling"}, + }, + }, + }, + }, + Transcript: []byte(`{"type":"text","content":"test"}`), + } + + summary := &checkpoint.CheckpointSummary{CheckpointID: checkpointID} + + output, err := formatExportMarkdown(checkpointID, content, summary, + content.Transcript, "", "", []string{}, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportMarkdown() error = %v", err) + } + + outputStr := string(output) + + // Verify summary section + if !strings.Contains(outputStr, "## Summary") { + t.Error("missing summary section") + } + if !strings.Contains(outputStr, "**Intent:**") { + t.Error("missing intent field") + } + if !strings.Contains(outputStr, "Implement feature X") { + t.Error("intent value not found") + } + if !strings.Contains(outputStr, "**Outcome:**") { + t.Error("missing outcome field") + } + if !strings.Contains(outputStr, "Successfully implemented") { + t.Error("outcome value not found") + } + if !strings.Contains(outputStr, "**Key Learnings:**") { + t.Error("missing learnings section") + } + if !strings.Contains(outputStr, "api/handler.go") { + t.Error("learning path not found") + } +} + +func TestFormatExportJSON_HandlesEmptyFields(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("e5f6a1b2c3d4") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test", + CreatedAt: now, + Strategy: "manual-commit", + FilesTouched: []string{}, + }, + Transcript: []byte{}, + } + + summary := &checkpoint.CheckpointSummary{CheckpointID: checkpointID} + + output, err := formatExportJSON(checkpointID, content, summary, + content.Transcript, "", "", content.Metadata.FilesTouched, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportJSON() should handle empty fields, got error: %v", err) + } + + // Should still produce valid JSON + var result map[string]any + if err := json.Unmarshal(output, &result); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } +} + +func TestFormatExportMarkdown_HandlesEmptyFiles(t *testing.T) { + t.Parallel() + + checkpointID, err := id.NewCheckpointID("f6a1b2c3d4e5") + if err != nil { + t.Fatalf("NewCheckpointID() error = %v", err) + } + now := time.Now() + + content := &checkpoint.SessionContent{ + Metadata: checkpoint.CommittedMetadata{ + SessionID: "2026-01-13-test", + CreatedAt: now, + Strategy: "manual-commit", + FilesTouched: []string{}, + }, + Transcript: []byte(`{"type":"text","content":"test"}`), + } + + summary := &checkpoint.CheckpointSummary{CheckpointID: checkpointID} + + output, err := formatExportMarkdown(checkpointID, content, summary, + content.Transcript, "", "", content.Metadata.FilesTouched, exportOptions{}) + + if err != nil { + t.Fatalf("formatExportMarkdown() should handle empty files, got error: %v", err) + } + + outputStr := string(output) + + // When no files, the section should still render but be empty + // (or omitted, depending on implementation - we choose to always include it) + if !strings.Contains(outputStr, "## Files Modified") { + // This is fine - we could also choose to omit the section entirely + t.Log("Files Modified section omitted when no files (acceptable)") + } +} diff --git a/cmd/entire/cli/explain_test.go b/cmd/entire/cli/explain_test.go index fc63de591..39430730d 100644 --- a/cmd/entire/cli/explain_test.go +++ b/cmd/entire/cli/explain_test.go @@ -353,7 +353,7 @@ func TestExplainDefault_NoCheckpoints_ShowsHelpfulMessage(t *testing.T) { func TestExplainBothFlagsError(t *testing.T) { // Test that providing both --session and --commit returns an error var stdout, stderr bytes.Buffer - err := runExplain(&stdout, &stderr, "session-id", "commit-sha", "", false, false, false, false, false, false, false) + err := runExplain(&stdout, &stderr, "session-id", "commit-sha", "", false, false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) if err == nil { t.Error("expected error when both flags provided, got nil") @@ -813,7 +813,7 @@ func TestRunExplain_MutualExclusivityError(t *testing.T) { var buf, errBuf bytes.Buffer // Providing both --session and --checkpoint should error - err := runExplain(&buf, &errBuf, "session-id", "", "checkpoint-id", false, false, false, false, false, false, false) + err := runExplain(&buf, &errBuf, "session-id", "", "checkpoint-id", false, false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) if err == nil { t.Error("expected error when multiple flags provided") @@ -857,7 +857,7 @@ func TestRunExplainCheckpoint_NotFound(t *testing.T) { } var buf, errBuf bytes.Buffer - err = runExplainCheckpoint(&buf, &errBuf, "nonexistent123", false, false, false, false, false, false, false) + err = runExplainCheckpoint(&buf, &errBuf, "nonexistent123", false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) if err == nil { t.Error("expected error for nonexistent checkpoint") @@ -2493,7 +2493,7 @@ func TestRunExplain_SessionFlagFiltersListView(t *testing.T) { // When session is specified alone, it should NOT error for mutual exclusivity // It should route to the list view with a filter (which may fail for other reasons // like not being in a git repo, but not for mutual exclusivity) - err := runExplain(&buf, &errBuf, "some-session", "", "", false, false, false, false, false, false, false) + err := runExplain(&buf, &errBuf, "some-session", "", "", false, false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) // Should NOT be a mutual exclusivity error if err != nil && strings.Contains(err.Error(), "cannot specify multiple") { @@ -2505,7 +2505,7 @@ func TestRunExplain_SessionWithCheckpointStillMutuallyExclusive(t *testing.T) { // Test that --session with --checkpoint is still an error var buf, errBuf bytes.Buffer - err := runExplain(&buf, &errBuf, "some-session", "", "some-checkpoint", false, false, false, false, false, false, false) + err := runExplain(&buf, &errBuf, "some-session", "", "some-checkpoint", false, false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) if err == nil { t.Error("expected error when --session and --checkpoint both specified") @@ -2519,7 +2519,7 @@ func TestRunExplain_SessionWithCommitStillMutuallyExclusive(t *testing.T) { // Test that --session with --commit is still an error var buf, errBuf bytes.Buffer - err := runExplain(&buf, &errBuf, "some-session", "some-commit", "", false, false, false, false, false, false, false) + err := runExplain(&buf, &errBuf, "some-session", "some-commit", "", false, false, false, false, false, false, false, false, false, false, "", "", exportOptions{}) if err == nil { t.Error("expected error when --session and --commit both specified") diff --git a/cmd/entire/cli/insights.go b/cmd/entire/cli/insights.go new file mode 100644 index 000000000..88bc093a5 --- /dev/null +++ b/cmd/entire/cli/insights.go @@ -0,0 +1,270 @@ +package cli + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/strategy" + + "github.com/go-git/go-git/v5" + "github.com/spf13/cobra" +) + +const ( + formatJSON = "json" + formatMarkdown = "markdown" + formatHTML = "html" + periodWeek = "week" + periodMonth = "month" + periodYear = "year" +) + +func newInsightsCmd() *cobra.Command { + var periodFlag string + var repoFlag string + var agentFlag string + var formatJSONFlag bool + var exportFlag bool + var formatFlag string + var outputFlag string + var noCacheFlag bool + + cmd := &cobra.Command{ + Use: "insights", + Short: "Show session analytics and usage patterns", + Long: `Insights provides analytics across your AI-assisted development sessions. + +See metrics like session counts, token usage, estimated costs, tool usage, +and activity patterns to understand your AI development workflow. + +Time periods: + --period week Last 7 days (default) + --period month Last 30 days + --period year Last 365 days + +Filtering: + --agent TYPE Filter by agent type (e.g., claude-code, gemini-cli) + --repo NAME Filter by repository name (future) + +Output formats: + Default: Human-readable terminal output + --json JSON to stdout + --export Structured export (requires --format) + --format FORMAT Export format: json, markdown, html + --output FILE Write to file instead of stdout + +Performance: + --no-cache Force full re-analysis (ignore cache) + +Examples: + entire insights # Week view + entire insights --period month # Month view + entire insights --agent claude-code # Filter by agent + entire insights --export --format json -o stats.json + entire insights --export --format markdown -o INSIGHTS.md + +Note: Uses incremental caching for fast analysis. First run analyzes all sessions, +subsequent runs only process new sessions since last run.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + // Check if Entire is disabled + if checkDisabledGuard(cmd.OutOrStdout()) { + return nil + } + + // Validate flag dependencies + if formatJSONFlag && exportFlag { + return errors.New("--json and --export are mutually exclusive") + } + if (formatFlag != "" || outputFlag != "") && !exportFlag { + return errors.New("--format and --output require --export flag") + } + + // Validate period + if periodFlag != "" && periodFlag != periodWeek && periodFlag != periodMonth && periodFlag != periodYear { + return fmt.Errorf("invalid period: %s (must be %s, %s, or %s)", periodFlag, periodWeek, periodMonth, periodYear) + } + + // Validate format + if exportFlag && formatFlag == "" { + formatFlag = formatJSON // Default format + } + if formatFlag != "" && formatFlag != formatJSON && formatFlag != formatMarkdown && formatFlag != formatHTML { + return fmt.Errorf("invalid format: %s (must be %s, %s, or %s)", formatFlag, formatJSON, formatMarkdown, formatHTML) + } + + return runInsights(cmd.OutOrStdout(), cmd.ErrOrStderr(), periodFlag, repoFlag, agentFlag, formatJSONFlag, exportFlag, formatFlag, outputFlag, noCacheFlag) + }, + } + + cmd.Flags().StringVar(&periodFlag, "period", periodWeek, "Time period: week, month, year") + cmd.Flags().StringVar(&repoFlag, "repo", "", "Filter by repository name") + cmd.Flags().StringVar(&agentFlag, "agent", "", "Filter by agent type") + cmd.Flags().BoolVar(&formatJSONFlag, "json", false, "Output as JSON to stdout") + cmd.Flags().BoolVar(&exportFlag, "export", false, "Export in structured format") + cmd.Flags().StringVar(&formatFlag, "format", "", "Export format: json, markdown, html (requires --export)") + cmd.Flags().StringVarP(&outputFlag, "output", "o", "", "Write to file instead of stdout") + cmd.Flags().BoolVar(&noCacheFlag, "no-cache", false, "Force full re-analysis (ignore cache)") + + cmd.MarkFlagsMutuallyExclusive("json", "export") + + return cmd +} + +// runInsights executes the insights command. +func runInsights(w, _ io.Writer, period, repoFilter, agentFilter string, formatJSONOut, export bool, format, outputFile string, noCache bool) error { + repo, err := openRepository() + if err != nil { + return fmt.Errorf("not a git repository: %w", err) + } + + // Get current repository name + repoName, err := extractRepoName(repo) + if err != nil { + logging.Warn(context.Background(), "failed to extract repo name", "error", err) + repoName = unknownStrategyName + } + + // If repo filter is set and doesn't match current repo, return early + if repoFilter != "" && repoFilter != repoName { + fmt.Fprintf(w, "No data for repository: %s (current: %s)\n", repoFilter, repoName) + return nil + } + + // List all sessions + sessions, err := strategy.ListSessions() + if err != nil { + return fmt.Errorf("failed to list sessions: %w", err) + } + + // Build query + query := buildInsightsQuery(period, agentFilter) + + // Load cache if not disabled + var cache *InsightsCache + if !noCache { + cache, err = loadCache(repoName) + if err != nil { + logging.Warn(context.Background(), "failed to load cache", "error", err) + cache = nil + } + } + + // Compute insights + report, cacheStats, err := computeInsights(repo, sessions, query, cache, noCache) + if err != nil { + return fmt.Errorf("failed to compute insights: %w", err) + } + + // Save cache if enabled + if !noCache && cache != nil { + if err := saveCache(repoName, cache); err != nil { + logging.Warn(context.Background(), "failed to save cache", "error", err) + } + } + + // Format output + var output []byte + switch { + case formatJSONOut: + output, err = formatInsightsJSON(report) + case export: + switch format { + case formatJSON: + output, err = formatInsightsJSON(report) + case formatMarkdown: + output, err = formatInsightsMarkdown(report) + case formatHTML: + output, err = formatInsightsHTML(report) + default: + return fmt.Errorf("unsupported format: %s", format) + } + default: + output, err = formatInsightsTerminal(report, cacheStats) + } + + if err != nil { + return fmt.Errorf("failed to format output: %w", err) + } + + // Write output + if outputFile == "" { + if _, err = w.Write(output); err != nil { + return fmt.Errorf("failed to write output: %w", err) + } + } else { + //nolint:gosec // G306: insights file is for user's own analysis, 0o644 is appropriate + if err := os.WriteFile(outputFile, output, 0o644); err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + fmt.Fprintf(w, "Exported to %s\n", outputFile) + } + + return nil +} + +// extractRepoName extracts the repository name from git remote. +func extractRepoName(repo *git.Repository) (string, error) { + repoRoot, err := paths.RepoRoot() + if err != nil { + return "", fmt.Errorf("failed to get repo root: %w", err) + } + + remotes, err := repo.Remotes() + //nolint:nilerr // Intentionally ignoring error - fallback to directory name + if len(remotes) == 0 || err != nil { + // Fallback to directory name (ignore error, use directory name as fallback) + return filepath.Base(repoRoot), nil + } + + // Parse first remote URL + config := remotes[0].Config() + if len(config.URLs) == 0 { + return filepath.Base(repoRoot), nil + } + + url := config.URLs[0] + // Extract repo name from URL (handle both HTTPS and SSH) + // https://github.com/entireio/cli.git -> cli + // git@github.com:entireio/cli.git -> cli + name := url + if idx := lastIndexOf(name, "/"); idx >= 0 { + name = name[idx+1:] + } + if idx := lastIndexOf(name, ":"); idx >= 0 { + name = name[idx+1:] + } + if len(name) > 4 && name[len(name)-4:] == ".git" { + name = name[:len(name)-4] + } + return name, nil +} + +// lastIndexOf returns the last index of substr in s, or -1 if not found. +func lastIndexOf(s, substr string) int { + for i := len(s) - len(substr); i >= 0; i-- { + if s[i:i+len(substr)] == substr { + return i + } + } + return -1 +} + +// buildInsightsQuery builds an InsightsQuery from flags. +func buildInsightsQuery(period, agentFilter string) InsightsQuery { + query := InsightsQuery{ + AgentFilter: agent.AgentType(agentFilter), + } + + // Set time range based on period + query.StartTime, query.EndTime = applyPeriodFilter(period) + + return query +} diff --git a/cmd/entire/cli/insights_analytics.go b/cmd/entire/cli/insights_analytics.go new file mode 100644 index 000000000..116bc9404 --- /dev/null +++ b/cmd/entire/cli/insights_analytics.go @@ -0,0 +1,563 @@ +package cli + +import ( + "context" + "encoding/json" + "fmt" + "runtime" + "sort" + "sync" + "time" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint" + "github.com/entireio/cli/cmd/entire/cli/logging" + "github.com/entireio/cli/cmd/entire/cli/strategy" + "github.com/entireio/cli/cmd/entire/cli/transcript" + + "github.com/go-git/go-git/v5" +) + +// InsightsQuery contains filtering criteria for insights. +type InsightsQuery struct { + StartTime time.Time + EndTime time.Time + AgentFilter agent.AgentType +} + +// InsightsReport contains aggregated analytics results. +type InsightsReport struct { + // Summary stats + TotalSessions int `json:"total_sessions"` + TotalCheckpoints int `json:"total_checkpoints"` + TotalTime time.Duration `json:"total_time"` + TotalTokens int `json:"total_tokens"` + EstimatedCost float64 `json:"estimated_cost"` + FilesModified int `json:"files_modified"` + CommitsCreated int `json:"commits_created"` + + // Agent breakdown + AgentStats []AgentStat `json:"agent_stats"` + + // Activity patterns + DailyActivity []ActivityPoint `json:"daily_activity"` + WeeklyActivity []ActivityPoint `json:"weekly_activity"` + MonthlyActivity []ActivityPoint `json:"monthly_activity"` + PeakHours [24]int `json:"peak_hours"` + + // Top-K lists + TopRepos []RepoStat `json:"top_repos"` + TopTools []ToolStat `json:"top_tools"` + + // Recent sessions + RecentSessions []SessionSummary `json:"recent_sessions"` + + // Time range + StartTime time.Time `json:"start_time"` + EndTime time.Time `json:"end_time"` +} + +// AgentStat contains stats for a specific agent type. +type AgentStat struct { + Agent agent.AgentType `json:"agent"` + Sessions int `json:"sessions"` + Tokens int `json:"tokens"` + Hours float64 `json:"hours"` +} + +// ActivityPoint represents activity at a specific time. +type ActivityPoint struct { + Date time.Time `json:"date"` + Sessions int `json:"sessions"` + Hours float64 `json:"hours"` +} + +// RepoStat contains stats for a repository. +type RepoStat struct { + Name string `json:"name"` + Hours float64 `json:"hours"` +} + +// ToolStat contains stats for a tool. +type ToolStat struct { + Name string `json:"name"` + Count int `json:"count"` +} + +// SessionSummary contains summary info for a session. +type SessionSummary struct { + ID string `json:"id"` + Description string `json:"description"` + StartTime time.Time `json:"start_time"` + Duration time.Duration `json:"duration"` + Tokens int `json:"tokens"` +} + +// CacheStats contains cache performance metrics. +type CacheStats struct { + TotalSessions int `json:"total_sessions"` + CachedSessions int `json:"cached_sessions"` + NewSessions int `json:"new_sessions"` +} + +// computeInsights computes insights from sessions. +// +//nolint:unparam // error return kept for future error handling +func computeInsights(repo *git.Repository, sessions []strategy.Session, query InsightsQuery, cache *InsightsCache, noCache bool) (*InsightsReport, CacheStats, error) { + ctx := context.Background() + store := checkpoint.NewGitStore(repo) + + // Filter sessions by quality (exclude <2 messages, <1 min duration, sub-tasks) + filteredSessions := filterSessionQuality(ctx, sessions, store) + + // Filter by time range + filteredSessions = filterSessionsByTime(filteredSessions, query.StartTime, query.EndTime) + + // Filter by agent if specified + if query.AgentFilter != "" { + filteredSessions = filterSessionsByAgent(ctx, filteredSessions, store, query.AgentFilter) + } + + // Initialize cache if needed + if cache == nil { + cache = &InsightsCache{ + Facets: make(map[string]SessionFacet), + LastUpdated: time.Now(), + } + } + + // Identify new sessions (not in cache) + var newSessions []strategy.Session + cacheStats := CacheStats{ + TotalSessions: len(filteredSessions), + } + + if noCache { + // Force full re-analysis + newSessions = filteredSessions + cache.Facets = make(map[string]SessionFacet) + } else { + for _, s := range filteredSessions { + if _, cached := cache.Facets[s.ID]; !cached { + newSessions = append(newSessions, s) + } else { + cacheStats.CachedSessions++ + } + } + } + cacheStats.NewSessions = len(newSessions) + + // Bound parallelism to max 50 new sessions per run (Claude Code pattern) + if len(newSessions) > 50 { + logging.Info(ctx, "analyzing subset of new sessions", "total_new", len(newSessions), "analyzing", 50) + newSessions = newSessions[:50] + } + + // Extract facets from new sessions in parallel + if len(newSessions) > 0 { + logging.Info(ctx, "extracting facets from new sessions", "count", len(newSessions)) + newFacets := extractFacetsParallel(ctx, newSessions, store) + + // Merge with cached facets + for sessionID, facet := range newFacets { + cache.Facets[sessionID] = facet + } + cache.LastUpdated = time.Now() + } + + // Build session facet list for aggregation + var facets []SessionFacet + for _, s := range filteredSessions { + if facet, ok := cache.Facets[s.ID]; ok { + facets = append(facets, facet) + } + } + + // Aggregate across all facets + report := aggregateFacets(facets, query) + + return report, cacheStats, nil +} + +// filterSessionQuality filters out low-quality sessions. +// Excludes sessions with <2 user messages or <1 minute duration. +func filterSessionQuality(ctx context.Context, sessions []strategy.Session, store *checkpoint.GitStore) []strategy.Session { + var quality []strategy.Session + + for _, s := range sessions { + if len(s.Checkpoints) == 0 { + continue + } + + // Load first checkpoint to check message count + content, err := store.ReadLatestSessionContent(ctx, s.Checkpoints[0].CheckpointID) + if err != nil { + logging.Debug(ctx, "failed to read session content", "session", s.ID, "error", err) + continue + } + + // Count user messages + lines, err := transcript.ParseFromBytes(content.Transcript) + if err != nil { + logging.Debug(ctx, "failed to parse transcript", "session", s.ID, "error", err) + continue + } + + userMessages := 0 + for _, line := range lines { + if line.Type == transcript.TypeUser { + userMessages++ + } + } + + // Estimate duration (first to last checkpoint) + var duration time.Duration + if len(s.Checkpoints) > 0 { + first := s.Checkpoints[len(s.Checkpoints)-1].Timestamp + last := s.Checkpoints[0].Timestamp + duration = last.Sub(first) + } + + // Filter criteria (from Claude Code patterns) + if userMessages >= 2 && duration >= 1*time.Minute { + quality = append(quality, s) + } + } + + return quality +} + +// filterSessionsByTime filters sessions by time range. +func filterSessionsByTime(sessions []strategy.Session, startTime, endTime time.Time) []strategy.Session { + if startTime.IsZero() && endTime.IsZero() { + return sessions + } + + var filtered []strategy.Session + for _, s := range sessions { + if !startTime.IsZero() && s.StartTime.Before(startTime) { + continue + } + if !endTime.IsZero() && s.StartTime.After(endTime) { + continue + } + filtered = append(filtered, s) + } + return filtered +} + +// filterSessionsByAgent filters sessions by agent type. +func filterSessionsByAgent(ctx context.Context, sessions []strategy.Session, store *checkpoint.GitStore, agentFilter agent.AgentType) []strategy.Session { + var filtered []strategy.Session + + for _, s := range sessions { + if len(s.Checkpoints) == 0 { + continue + } + + // Check first checkpoint's agent type + summary, err := store.ReadCommitted(ctx, s.Checkpoints[0].CheckpointID) + if err != nil { + logging.Debug(ctx, "failed to read checkpoint", "session", s.ID, "error", err) + continue + } + + // Read session content to get agent from metadata + content, err := store.ReadLatestSessionContent(ctx, s.Checkpoints[0].CheckpointID) + if err != nil { + logging.Debug(ctx, "failed to read session content", "session", s.ID, "error", err) + continue + } + + sessionAgent := content.Metadata.Agent + if sessionAgent == "" && summary != nil { + // Fallback to summary if metadata doesn't have agent + // (shouldn't happen, but for robustness) + sessionAgent = agent.AgentTypeUnknown + } + + if sessionAgent == agentFilter { + filtered = append(filtered, s) + } + } + + return filtered +} + +// extractFacetsParallel extracts facets from sessions in parallel. +func extractFacetsParallel(ctx context.Context, sessions []strategy.Session, store *checkpoint.GitStore) map[string]SessionFacet { + numWorkers := runtime.NumCPU() / 2 + if numWorkers < 1 { + numWorkers = 1 + } + + semaphore := make(chan struct{}, numWorkers) + var wg sync.WaitGroup + var mu sync.Mutex + facets := make(map[string]SessionFacet) + + for _, session := range sessions { + wg.Add(1) + go func(s strategy.Session) { + defer wg.Done() + semaphore <- struct{}{} + defer func() { <-semaphore }() + + facet := extractSessionFacet(ctx, s, store) + if facet != nil { + mu.Lock() + facets[s.ID] = *facet + mu.Unlock() + } + }(session) + } + + wg.Wait() + return facets +} + +// extractSessionFacet extracts a facet from a session. +func extractSessionFacet(ctx context.Context, s strategy.Session, store *checkpoint.GitStore) *SessionFacet { + if len(s.Checkpoints) == 0 { + return nil + } + + facet := SessionFacet{ + SessionID: s.ID, + StartTime: s.StartTime, + Description: s.Description, + ToolCounts: make(map[string]int), + } + + // Load session content + content, err := store.ReadLatestSessionContent(ctx, s.Checkpoints[0].CheckpointID) + if err != nil { + logging.Debug(ctx, "failed to read session content", "session", s.ID, "error", err) + return nil + } + + // Extract basic metrics from metadata + facet.Agent = content.Metadata.Agent + if content.Metadata.TokenUsage != nil { + facet.Tokens = content.Metadata.TokenUsage.InputTokens + + content.Metadata.TokenUsage.CacheCreationTokens + + content.Metadata.TokenUsage.CacheReadTokens + + content.Metadata.TokenUsage.OutputTokens + } + facet.Messages = len(s.Checkpoints) + facet.FilesModified = len(content.Metadata.FilesTouched) + + // Estimate duration (first to last checkpoint) + if len(s.Checkpoints) > 0 { + first := s.Checkpoints[len(s.Checkpoints)-1].Timestamp + last := s.Checkpoints[0].Timestamp + facet.Duration = last.Sub(first) + } + + // Parse transcript for tool usage and hourly activity + transcriptBytes := content.Transcript + + // Chunk large transcripts (>30K chars) into 25K segments + if len(transcriptBytes) > 30000 { + transcriptBytes = chunkTranscript(transcriptBytes, 25000) + } + + // Extract tool usage + toolCounts, err := extractToolUsage(transcriptBytes) + if err == nil { + facet.ToolCounts = toolCounts + } + + // Extract hourly activity + hourlyActivity, err := extractHourlyActivity(transcriptBytes) + if err == nil { + facet.HourlyActivity = hourlyActivity + } + + return &facet +} + +// chunkTranscript chunks a transcript if it exceeds maxSize. +// Returns the first chunk for analysis. +func chunkTranscript(transcriptBytes []byte, maxSize int) []byte { + if len(transcriptBytes) <= maxSize { + return transcriptBytes + } + + // Find the last newline before maxSize + for i := maxSize; i > 0; i-- { + if transcriptBytes[i] == '\n' { + return transcriptBytes[:i+1] + } + } + + // Fallback: hard cutoff + return transcriptBytes[:maxSize] +} + +// extractToolUsage extracts tool usage counts from a transcript. +func extractToolUsage(transcriptBytes []byte) (map[string]int, error) { + lines, err := transcript.ParseFromBytes(transcriptBytes) + if err != nil { + return nil, fmt.Errorf("failed to parse transcript: %w", err) + } + + toolCounts := make(map[string]int) + + for _, line := range lines { + if line.Type != transcript.TypeAssistant { + continue + } + + var msg transcript.AssistantMessage + if err := json.Unmarshal(line.Message, &msg); err != nil { + continue + } + + for _, block := range msg.Content { + if block.Type == transcript.ContentTypeToolUse { + toolCounts[block.Name]++ + } + } + } + + return toolCounts, nil +} + +// extractHourlyActivity extracts hourly activity from a transcript. +func extractHourlyActivity(transcriptBytes []byte) ([24]int, error) { + lines, err := transcript.ParseFromBytes(transcriptBytes) + if err != nil { + return [24]int{}, fmt.Errorf("failed to parse transcript: %w", err) + } + + var hourly [24]int + + for _, line := range lines { + if line.Type != transcript.TypeUser { + continue + } + + // Parse timestamp from line + var msg struct { + Timestamp string `json:"timestamp"` + } + if err := json.Unmarshal(line.Message, &msg); err != nil { + continue + } + + // Parse timestamp + t, err := time.Parse(time.RFC3339, msg.Timestamp) + if err != nil { + continue + } + + hourly[t.Hour()]++ + } + + return hourly, nil +} + +// aggregateFacets aggregates facets into an insights report. +func aggregateFacets(facets []SessionFacet, query InsightsQuery) *InsightsReport { + report := &InsightsReport{ + StartTime: query.StartTime, + EndTime: query.EndTime, + } + + // Aggregate basic stats + agentMap := make(map[agent.AgentType]*AgentStat) + toolMap := make(map[string]int) + dailyMap := make(map[string]*ActivityPoint) + + for _, facet := range facets { + report.TotalSessions++ + report.TotalTokens += facet.Tokens + report.TotalTime += facet.Duration + report.FilesModified += facet.FilesModified + + // Agent stats + if _, ok := agentMap[facet.Agent]; !ok { + agentMap[facet.Agent] = &AgentStat{Agent: facet.Agent} + } + agentMap[facet.Agent].Sessions++ + agentMap[facet.Agent].Tokens += facet.Tokens + agentMap[facet.Agent].Hours += facet.Duration.Hours() + + // Tool usage + for tool, count := range facet.ToolCounts { + toolMap[tool] += count + } + + // Peak hours + for hour, count := range facet.HourlyActivity { + report.PeakHours[hour] += count + } + + // Daily activity + dateKey := facet.StartTime.Format("2006-01-02") + if _, ok := dailyMap[dateKey]; !ok { + dailyMap[dateKey] = &ActivityPoint{ + Date: facet.StartTime.Truncate(24 * time.Hour), + } + } + dailyMap[dateKey].Sessions++ + dailyMap[dateKey].Hours += facet.Duration.Hours() + } + + // Convert maps to slices and sort + for _, stat := range agentMap { + report.AgentStats = append(report.AgentStats, *stat) + } + sort.Slice(report.AgentStats, func(i, j int) bool { + return report.AgentStats[i].Sessions > report.AgentStats[j].Sessions + }) + + for tool, count := range toolMap { + report.TopTools = append(report.TopTools, ToolStat{Name: tool, Count: count}) + } + sort.Slice(report.TopTools, func(i, j int) bool { + return report.TopTools[i].Count > report.TopTools[j].Count + }) + if len(report.TopTools) > 5 { + report.TopTools = report.TopTools[:5] + } + + for _, point := range dailyMap { + report.DailyActivity = append(report.DailyActivity, *point) + } + sort.Slice(report.DailyActivity, func(i, j int) bool { + return report.DailyActivity[i].Date.Before(report.DailyActivity[j].Date) + }) + + // Estimate cost ($15/1M tokens - Claude 3.5 Sonnet average) + report.EstimatedCost = float64(report.TotalTokens) * 15.0 / 1_000_000.0 + + // Build recent sessions (last 5) + if len(facets) > 0 { + // Sort facets by start time (most recent first) + sortedFacets := make([]SessionFacet, len(facets)) + copy(sortedFacets, facets) + sort.Slice(sortedFacets, func(i, j int) bool { + return sortedFacets[i].StartTime.After(sortedFacets[j].StartTime) + }) + + count := 5 + if len(sortedFacets) < count { + count = len(sortedFacets) + } + + for i := range count { + f := sortedFacets[i] + report.RecentSessions = append(report.RecentSessions, SessionSummary{ + ID: f.SessionID, + Description: f.Description, + StartTime: f.StartTime, + Duration: f.Duration, + Tokens: f.Tokens, + }) + } + } + + return report +} diff --git a/cmd/entire/cli/insights_cache.go b/cmd/entire/cli/insights_cache.go new file mode 100644 index 000000000..42e2266dd --- /dev/null +++ b/cmd/entire/cli/insights_cache.go @@ -0,0 +1,132 @@ +package cli + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/paths" +) + +// SessionFacet contains cached per-session analytics. +// This allows incremental updates - only new sessions need full analysis. +type SessionFacet struct { + SessionID string `json:"session_id"` + StartTime time.Time `json:"start_time"` + Duration time.Duration `json:"duration"` + Tokens int `json:"tokens"` + Messages int `json:"messages"` + FilesModified int `json:"files_modified"` + Agent agent.AgentType `json:"agent"` + Description string `json:"description"` + ToolCounts map[string]int `json:"tool_counts"` + HourlyActivity [24]int `json:"hourly_activity"` +} + +// InsightsCache contains cached session facets. +type InsightsCache struct { + Facets map[string]SessionFacet `json:"facets"` + LastUpdated time.Time `json:"last_updated"` +} + +const ( + // cacheTTL is how long cache entries are valid (30 days) + cacheTTL = 30 * 24 * time.Hour +) + +// getCachePath returns the path to the insights cache file for a repository. +func getCachePath(repoName string) (string, error) { + repoRoot, err := paths.RepoRoot() + if err != nil { + return "", fmt.Errorf("failed to get repo root: %w", err) + } + + cacheDir := filepath.Join(repoRoot, ".entire", "insights-cache") + if err := os.MkdirAll(cacheDir, 0o750); err != nil { + return "", fmt.Errorf("failed to create cache directory: %w", err) + } + + // Hash repo name for filename (simple hash to avoid special chars) + hash := simpleHash(repoName) + cachePath := filepath.Join(cacheDir, hash+".json") + + return cachePath, nil +} + +// loadCache loads the insights cache for a repository. +// Returns nil if cache doesn't exist or is expired. +func loadCache(repoName string) (*InsightsCache, error) { + cachePath, err := getCachePath(repoName) + if err != nil { + return nil, err + } + + //nolint:gosec // G304: cachePath is constructed from internal sources, not user input + data, err := os.ReadFile(cachePath) + if err != nil { + if os.IsNotExist(err) { + // Cache doesn't exist - return empty cache + return &InsightsCache{ + Facets: make(map[string]SessionFacet), + LastUpdated: time.Now(), + }, nil + } + return nil, fmt.Errorf("failed to read cache: %w", err) + } + + var cache InsightsCache + if err := json.Unmarshal(data, &cache); err != nil { + // Cache is corrupt - log and return empty cache to allow recovery + //nolint:nilerr // Intentionally ignoring corrupt cache to allow graceful recovery + return &InsightsCache{ + Facets: make(map[string]SessionFacet), + LastUpdated: time.Now(), + }, nil + } + + // Check if cache is expired + if time.Since(cache.LastUpdated) > cacheTTL { + // Cache is too old - return empty cache + return &InsightsCache{ + Facets: make(map[string]SessionFacet), + LastUpdated: time.Now(), + }, nil + } + + return &cache, nil +} + +// saveCache saves the insights cache for a repository. +func saveCache(repoName string, cache *InsightsCache) error { + cachePath, err := getCachePath(repoName) + if err != nil { + return err + } + + cache.LastUpdated = time.Now() + + data, err := json.MarshalIndent(cache, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal cache: %w", err) + } + + //nolint:gosec // G306: cache file is for user's own analysis, 0o644 is appropriate + if err := os.WriteFile(cachePath, data, 0o644); err != nil { + return fmt.Errorf("failed to write cache: %w", err) + } + + return nil +} + +// simpleHash creates a simple hash of a string for use in filenames. +// Uses a basic algorithm to avoid collisions for typical repo names. +func simpleHash(s string) string { + h := uint32(0) + for i := range len(s) { + h = h*31 + uint32(s[i]) + } + return fmt.Sprintf("%08x", h) +} diff --git a/cmd/entire/cli/insights_filters.go b/cmd/entire/cli/insights_filters.go new file mode 100644 index 000000000..e2a5eb246 --- /dev/null +++ b/cmd/entire/cli/insights_filters.go @@ -0,0 +1,34 @@ +package cli + +import ( + "time" +) + +// applyPeriodFilter converts a period string to start/end times. +// Supports "week", "month", "year". +// Returns zero times for empty/unknown periods. +func applyPeriodFilter(period string) (time.Time, time.Time) { + now := time.Now() + var start, end time.Time + + switch period { + case "week": + start = now.AddDate(0, 0, -7) + end = now + case "month": + start = now.AddDate(0, -1, 0) + end = now + case "year": + start = now.AddDate(-1, 0, 0) + end = now + case "": + // No filter - use default week + start = now.AddDate(0, 0, -7) + end = now + default: + // Unknown period - no filter + return time.Time{}, time.Time{} + } + + return start, end +} diff --git a/cmd/entire/cli/insights_formatters.go b/cmd/entire/cli/insights_formatters.go new file mode 100644 index 000000000..2d2224f05 --- /dev/null +++ b/cmd/entire/cli/insights_formatters.go @@ -0,0 +1,753 @@ +package cli + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + "time" +) + +// formatInsightsTerminal formats insights for terminal output. +// +//nolint:unparam // error return kept for consistency with other formatters +func formatInsightsTerminal(report *InsightsReport, cacheStats CacheStats) ([]byte, error) { + var sb strings.Builder + + // Header + sb.WriteString("═══════════════════════════════════════════════════════\n") + sb.WriteString(" Entire Insights\n") + sb.WriteString("═══════════════════════════════════════════════════════\n\n") + + // Time range + if !report.StartTime.IsZero() && !report.EndTime.IsZero() { + sb.WriteString(fmt.Sprintf("Period: %s to %s\n", + report.StartTime.Format("2006-01-02"), + report.EndTime.Format("2006-01-02"))) + } + + // Cache stats + if cacheStats.TotalSessions > 0 { + sb.WriteString(fmt.Sprintf("Analysis: %d new sessions, %d cached\n", + cacheStats.NewSessions, cacheStats.CachedSessions)) + } + sb.WriteString("\n") + + // Summary stats + sb.WriteString("SUMMARY\n") + sb.WriteString("───────────────────────────────────────────────────────\n") + sb.WriteString(fmt.Sprintf(" Sessions: %s\n", formatNumber(report.TotalSessions))) + sb.WriteString(fmt.Sprintf(" Total time: %s\n", formatDuration(report.TotalTime))) + sb.WriteString(fmt.Sprintf(" Tokens: %s\n", formatNumber(report.TotalTokens))) + sb.WriteString(fmt.Sprintf(" Estimated cost: $%.2f\n", report.EstimatedCost)) + sb.WriteString(fmt.Sprintf(" Files modified: %s\n", formatNumber(report.FilesModified))) + sb.WriteString("\n") + + // Agent breakdown + if len(report.AgentStats) > 0 { + sb.WriteString("AGENTS\n") + sb.WriteString("───────────────────────────────────────────────────────\n") + for _, stat := range report.AgentStats { + agentName := string(stat.Agent) + if agentName == "" { + agentName = "unknown" + } + sb.WriteString(fmt.Sprintf(" %-20s %3d sessions %s tokens %.1f hours\n", + agentName, + stat.Sessions, + formatNumber(stat.Tokens), + stat.Hours)) + } + sb.WriteString("\n") + } + + // Top tools + if len(report.TopTools) > 0 { + sb.WriteString("TOP TOOLS\n") + sb.WriteString("───────────────────────────────────────────────────────\n") + for _, tool := range report.TopTools { + sb.WriteString(fmt.Sprintf(" %-30s %s uses\n", + tool.Name, + formatNumber(tool.Count))) + } + sb.WriteString("\n") + } + + // Peak hours + sb.WriteString("PEAK HOURS\n") + sb.WriteString("───────────────────────────────────────────────────────\n") + sb.WriteString(renderHeatmap(report.PeakHours)) + sb.WriteString("\n") + + // Recent sessions + if len(report.RecentSessions) > 0 { + sb.WriteString("RECENT SESSIONS\n") + sb.WriteString("───────────────────────────────────────────────────────\n") + for _, sess := range report.RecentSessions { + desc := sess.Description + if desc == "" { + desc = sess.ID + } + if len(desc) > 50 { + desc = desc[:47] + "..." + } + sb.WriteString(fmt.Sprintf(" %s %s %s %s tokens\n", + sess.StartTime.Format("01-02 15:04"), + formatDuration(sess.Duration), + desc, + formatNumber(sess.Tokens))) + } + sb.WriteString("\n") + } + + sb.WriteString("───────────────────────────────────────────────────────\n") + + return []byte(sb.String()), nil +} + +// formatInsightsJSON formats insights as JSON. +func formatInsightsJSON(report *InsightsReport) ([]byte, error) { + data, err := json.MarshalIndent(report, "", " ") + if err != nil { + return nil, fmt.Errorf("failed to marshal JSON: %w", err) + } + return data, nil +} + +// formatInsightsMarkdown formats insights as Markdown. +// +//nolint:unparam // error return kept for consistency with other formatters +func formatInsightsMarkdown(report *InsightsReport) ([]byte, error) { + var sb strings.Builder + + sb.WriteString("# Entire Insights\n\n") + + // Time range + if !report.StartTime.IsZero() && !report.EndTime.IsZero() { + sb.WriteString(fmt.Sprintf("**Period:** %s to %s\n\n", + report.StartTime.Format("2006-01-02"), + report.EndTime.Format("2006-01-02"))) + } + + // Summary stats + sb.WriteString("## Summary\n\n") + sb.WriteString(fmt.Sprintf("- **Sessions:** %s\n", formatNumber(report.TotalSessions))) + sb.WriteString(fmt.Sprintf("- **Total time:** %s\n", formatDuration(report.TotalTime))) + sb.WriteString(fmt.Sprintf("- **Tokens:** %s\n", formatNumber(report.TotalTokens))) + sb.WriteString(fmt.Sprintf("- **Estimated cost:** $%.2f\n", report.EstimatedCost)) + sb.WriteString(fmt.Sprintf("- **Files modified:** %s\n\n", formatNumber(report.FilesModified))) + + // Agent breakdown + if len(report.AgentStats) > 0 { + sb.WriteString("## Agents\n\n") + sb.WriteString("| Agent | Sessions | Tokens | Hours |\n") + sb.WriteString("|-------|----------|--------|-------|\n") + for _, stat := range report.AgentStats { + agentName := string(stat.Agent) + if agentName == "" { + agentName = "unknown" + } + sb.WriteString(fmt.Sprintf("| %s | %d | %s | %.1f |\n", + agentName, + stat.Sessions, + formatNumber(stat.Tokens), + stat.Hours)) + } + sb.WriteString("\n") + } + + // Top tools + if len(report.TopTools) > 0 { + sb.WriteString("## Top Tools\n\n") + sb.WriteString("| Tool | Uses |\n") + sb.WriteString("|------|------|\n") + for _, tool := range report.TopTools { + sb.WriteString(fmt.Sprintf("| %s | %s |\n", + tool.Name, + formatNumber(tool.Count))) + } + sb.WriteString("\n") + } + + // Peak hours + sb.WriteString("## Peak Hours\n\n") + sb.WriteString("```\n") + sb.WriteString(renderHeatmap(report.PeakHours)) + sb.WriteString("```\n\n") + + // Recent sessions + if len(report.RecentSessions) > 0 { + sb.WriteString("## Recent Sessions\n\n") + sb.WriteString("| Date | Duration | Description | Tokens |\n") + sb.WriteString("|------|----------|-------------|--------|\n") + for _, sess := range report.RecentSessions { + desc := sess.Description + if desc == "" { + desc = sess.ID + } + if len(desc) > 50 { + desc = desc[:47] + "..." + } + sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s |\n", + sess.StartTime.Format("01-02 15:04"), + formatDuration(sess.Duration), + desc, + formatNumber(sess.Tokens))) + } + sb.WriteString("\n") + } + + return []byte(sb.String()), nil +} + +// formatInsightsHTML formats insights as HTML with a light, minimal design. +// +//nolint:maintidx,unparam // High complexity and error return acceptable for comprehensive HTML formatting +func formatInsightsHTML(report *InsightsReport) ([]byte, error) { + var sb strings.Builder + + // Get time-based greeting + now := time.Now() + hour := now.Hour() + greeting := "Evening" + if hour < 12 { + greeting = "Morning" + } else if hour < 18 { + greeting = "Afternoon" + } + + sb.WriteString(` + + + + + Entire Insights + + + + + + + +
+ +
`) + sb.WriteString(greeting) + sb.WriteString(`, developer
+ + +
+
+
Sessions
+
`) + sb.WriteString(formatNumber(report.TotalSessions)) + sb.WriteString(`
+
+
+
Total Time
+
`) + sb.WriteString(formatDuration(report.TotalTime)) + sb.WriteString(`
+
+
+
Tokens
+
`) + sb.WriteString(formatNumber(report.TotalTokens)) + sb.WriteString(`
+
+
+
Cost
+
$`) + sb.WriteString(fmt.Sprintf("%.2f", report.EstimatedCost)) + sb.WriteString(`
+
+
+ + +
+
Activity Pattern
+
+
+
`) + + // Y-axis labels (hours 0-23) + for hour := 23; hour >= 0; hour-- { + if hour%4 == 0 { + sb.WriteString(fmt.Sprintf(` +
%02d:00
`, hour)) + } else { + sb.WriteString(` +
`) + } + } + + sb.WriteString(` +
+
`) + + // Generate scatter dots based on daily activity and peak hours + // Map dates to X position, hours to Y position + if len(report.DailyActivity) > 0 { + // For each day with activity, place dots for each hour with activity + for dayIdx := range report.DailyActivity { + xPercent := float64(dayIdx) / float64(len(report.DailyActivity)) * 100 + + // For this day, find which hours had activity + // (We'll distribute the day's activity across the peak hours) + for hour := range 24 { + if report.PeakHours[hour] > 0 { + // Y position (inverted - hour 0 at bottom, hour 23 at top) + yPercent := (1.0 - float64(hour)/24.0) * 100 + + // Bubble size based on activity count (not implemented in this simple version) + sb.WriteString(fmt.Sprintf(` +
`, + xPercent, yPercent)) + } + } + } + } + + sb.WriteString(` +
+
+
+
+ +
+ + `) + + if len(report.AgentStats) > 0 { + sb.WriteString(` +
+
Agents
+
+ + + + + + + + + + `) + + for _, stat := range report.AgentStats { + agentName := string(stat.Agent) + badgeClass := "badge" + if strings.Contains(strings.ToLower(agentName), "claude") { + badgeClass = "badge badge-claude" + agentName = "Claude Code" + } else if strings.Contains(strings.ToLower(agentName), "gemini") { + badgeClass = "badge badge-gemini" + agentName = "Gemini CLI" + } + + sb.WriteString(fmt.Sprintf(` + + + + + + `, + badgeClass, + agentName, + stat.Sessions, + formatNumber(stat.Tokens), + stat.Hours)) + } + + sb.WriteString(` + +
AgentSessionsTokensHours
%s%d%s%.1fh
+
+
+ +
`) + } + + // Top Tools Table + if len(report.TopTools) > 0 { + sb.WriteString(` +
+
Top Tools
+
+ + + + + + + + `) + + for _, tool := range report.TopTools { + sb.WriteString(fmt.Sprintf(` + + + + `, + tool.Name, + formatNumber(tool.Count))) + } + + sb.WriteString(` + +
ToolUses
%s%s
+
+
+ +
`) + } + + // Recent Sessions (GitHub-style with diff stats) + if len(report.RecentSessions) > 0 { + sb.WriteString(` +
+
Recent Sessions
+
+ + + + + + + + + + `) + + for _, sess := range report.RecentSessions { + desc := sess.Description + if desc == "" { + desc = sess.ID + } + if len(desc) > 60 { + desc = desc[:57] + "..." + } + + // Estimate diff stats (mock data for visualization) + // In a real implementation, these would come from the session data + addedLines := sess.Tokens / 100 // rough estimate + deletedLines := sess.Tokens / 200 // rough estimate + + sb.WriteString(fmt.Sprintf(` + + + + + + `, + sess.StartTime.Format("Jan 2, 15:04"), + desc, + formatDuration(sess.Duration), + formatNumber(addedLines), + formatNumber(deletedLines))) + } + + sb.WriteString(` + +
DateDescriptionDurationTokens
%s%s%s +
+ +%s + -%s +
+
+
+
`) + } + + sb.WriteString(` +
+ +`) + + return []byte(sb.String()), nil +} + +// formatDuration formats a duration for display. +func formatDuration(d time.Duration) string { + hours := int(d.Hours()) + minutes := int(d.Minutes()) % 60 + + if hours > 0 { + return fmt.Sprintf("%dh %dm", hours, minutes) + } + return fmt.Sprintf("%dm", minutes) +} + +// formatNumber formats a number with thousands separators. +func formatNumber(n int) string { + if n < 1000 { + return strconv.Itoa(n) + } + + s := strconv.Itoa(n) + var result strings.Builder + for i, c := range s { + if i > 0 && (len(s)-i)%3 == 0 { + result.WriteRune(',') + } + result.WriteRune(c) + } + return result.String() +} + +// renderHeatmap renders a 24-hour heatmap. +func renderHeatmap(hours [24]int) string { + if len(hours) == 0 { + return "(no data)\n" + } + + // Find max for scaling + maxCount := 0 + for _, count := range hours { + if count > maxCount { + maxCount = count + } + } + + if maxCount == 0 { + return " No activity\n" + } + + var sb strings.Builder + + // Two rows: 00-11 and 12-23 + sb.WriteString(" ") + for hour := range 12 { + sb.WriteString(fmt.Sprintf("%2d ", hour)) + } + sb.WriteString("\n ") + //nolint:gosec // G602: hours is [24]int, hour range is 0-11, safe array access + for hour := range 12 { + intensity := float64(hours[hour]) / float64(maxCount) + sb.WriteString(barChar(intensity)) + sb.WriteString(" ") + } + sb.WriteString("\n\n ") + for hour := 12; hour < 24; hour++ { + sb.WriteString(fmt.Sprintf("%2d ", hour)) + } + sb.WriteString("\n ") + for hour := 12; hour < 24; hour++ { + intensity := float64(hours[hour]) / float64(maxCount) + sb.WriteString(barChar(intensity)) + sb.WriteString(" ") + } + sb.WriteString("\n") + + return sb.String() +} + +// barChar returns a bar character based on intensity (0.0 to 1.0). +func barChar(intensity float64) string { + switch { + case intensity < 0.1: + return "░" + case intensity < 0.3: + return "▒" + case intensity < 0.6: + return "▓" + default: + return "█" + } +} diff --git a/cmd/entire/cli/insights_test.go b/cmd/entire/cli/insights_test.go new file mode 100644 index 000000000..b917b07d9 --- /dev/null +++ b/cmd/entire/cli/insights_test.go @@ -0,0 +1,320 @@ +package cli + +import ( + "testing" + "time" +) + +func TestApplyPeriodFilter(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + period string + wantDaysDelta int + }{ + { + name: "week period", + period: "week", + wantDaysDelta: 7, + }, + { + name: "month period", + period: "month", + wantDaysDelta: 30, // Approximate + }, + { + name: "year period", + period: "year", + wantDaysDelta: 365, + }, + { + name: "empty period defaults to week", + period: "", + wantDaysDelta: 7, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + start, end := applyPeriodFilter(tt.period) + + // For month/year, we need to be more lenient due to month length variations + tolerance := 2 * 24 * time.Hour + if tt.period == "year" { + tolerance = 5 * 24 * time.Hour + } + + // Check that end is approximately now + if time.Since(end) > 1*time.Minute { + t.Errorf("end time should be approximately now, got %v", end) + } + + // Check that start is approximately the right number of days ago + duration := end.Sub(start) + expectedDuration := time.Duration(tt.wantDaysDelta) * 24 * time.Hour + + diff := duration - expectedDuration + if diff < 0 { + diff = -diff + } + + if diff > tolerance { + t.Errorf("duration = %v, want approximately %v (delta %v)", + duration, expectedDuration, diff) + } + }) + } +} + +func TestApplyPeriodFilter_Unknown(t *testing.T) { + t.Parallel() + + start, end := applyPeriodFilter("unknown") + + if !start.IsZero() || !end.IsZero() { + t.Errorf("unknown period should return zero times, got start=%v end=%v", start, end) + } +} + +func TestFormatDuration(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + duration time.Duration + want string + }{ + { + name: "less than hour", + duration: 45 * time.Minute, + want: "45m", + }, + { + name: "exactly one hour", + duration: 1 * time.Hour, + want: "1h 0m", + }, + { + name: "hours and minutes", + duration: 2*time.Hour + 30*time.Minute, + want: "2h 30m", + }, + { + name: "many hours", + duration: 25*time.Hour + 15*time.Minute, + want: "25h 15m", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := formatDuration(tt.duration) + if got != tt.want { + t.Errorf("formatDuration() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestFormatNumber(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + num int + want string + }{ + { + name: "small number", + num: 42, + want: "42", + }, + { + name: "exactly 1000", + num: 1000, + want: "1,000", + }, + { + name: "thousands", + num: 12345, + want: "12,345", + }, + { + name: "millions", + num: 1234567, + want: "1,234,567", + }, + { + name: "zero", + num: 0, + want: "0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := formatNumber(tt.num) + if got != tt.want { + t.Errorf("formatNumber() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestSimpleHash(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + str string + }{ + { + name: "simple string", + str: "test-repo", + }, + { + name: "repo path", + str: "entireio/cli", + }, + { + name: "empty string", + str: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := simpleHash(tt.str) + + // Check that hash is 8 hex characters + if len(got) != 8 { + t.Errorf("simpleHash() length = %d, want 8", len(got)) + } + + // Check that hash is hexadecimal + for _, c := range got { + if (c < '0' || c > '9') && (c < 'a' || c > 'f') { + t.Errorf("simpleHash() = %v, contains non-hex character %c", got, c) + } + } + + // Check that same input produces same hash + got2 := simpleHash(tt.str) + if got != got2 { + t.Errorf("simpleHash() not deterministic: %v != %v", got, got2) + } + }) + } +} + +func TestExtractRepoName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + url string + want string + }{ + { + name: "https url", + url: "https://github.com/entireio/cli.git", + want: "cli", + }, + { + name: "https url without .git", + url: "https://github.com/entireio/cli", + want: "cli", + }, + { + name: "ssh url", + url: "git@github.com:entireio/cli.git", + want: "cli", + }, + { + name: "ssh url without .git", + url: "git@github.com:entireio/cli", + want: "cli", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + // Parse repo name from URL directly (simplified test) + url := tt.url + name := url + + // Extract repo name from URL + if idx := lastIndexOf(name, "/"); idx >= 0 { + name = name[idx+1:] + } + if idx := lastIndexOf(name, ":"); idx >= 0 { + name = name[idx+1:] + } + if len(name) > 4 && name[len(name)-4:] == ".git" { + name = name[:len(name)-4] + } + + if name != tt.want { + t.Errorf("extractRepoName() = %v, want %v", name, tt.want) + } + }) + } +} + +func TestBarChar(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + intensity float64 + want string + }{ + { + name: "zero intensity", + intensity: 0.0, + want: "░", + }, + { + name: "low intensity", + intensity: 0.15, + want: "▒", + }, + { + name: "medium intensity", + intensity: 0.4, + want: "▓", + }, + { + name: "high intensity", + intensity: 0.8, + want: "█", + }, + { + name: "max intensity", + intensity: 1.0, + want: "█", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := barChar(tt.intensity) + if got != tt.want { + t.Errorf("barChar() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/cmd/entire/cli/integration_test/explain_export_test.go b/cmd/entire/cli/integration_test/explain_export_test.go new file mode 100644 index 000000000..d2e8d931a --- /dev/null +++ b/cmd/entire/cli/integration_test/explain_export_test.go @@ -0,0 +1,411 @@ +//go:build integration + +package integration + +import ( + "encoding/json" + "strings" + "testing" +) + +func TestExport_Showcase_EndToEnd(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Setup repository and branch + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.GitAdd("README.md") + env.GitCommit("Initial commit") + + if strategyName == "manual-commit" { + // Switch to feature branch for manual-commit strategy + env.GitCheckoutNewBranch("feature/test") + } + + // 1. Create a session with a checkpoint + session := env.NewSession() + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + // 2. Make code changes with sensitive data + apiCode := `package api + +import "github.com/lib/pq" + +// Connect to 10.0.1.5:5432 +const dbConn = "postgres://user:pass@10.0.1.5:5432/db" + +func Handler() { + // Admin email: admin@acme-corp.com +} +` + env.WriteFile("src/api.go", apiCode) + + configCode := `package config + +const Key = "sk-proj-test-secret-key" +const InternalURL = "https://api.internal.company.com" +` + env.WriteFile("src/config.go", configCode) + + // 3. Create checkpoint + session.CreateTranscript( + "implement API handler with database connection", + []FileChange{ + {Path: "src/api.go", Content: apiCode}, + {Path: "src/config.go", Content: configCode}, + }, + ) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + // 4. Commit to make it a committed checkpoint + env.GitAdd("src/api.go", "src/config.go") + env.GitCommit("feat: add API handler") + + // 5. Extract checkpoint ID from commit + headHash := env.GetHeadHash() + checkpointID := env.GetCheckpointIDFromCommitMessage(headHash) + if checkpointID == "" { + t.Fatal("no checkpoint ID in commit message") + } + + // 6. Export with showcase mode (JSON format) + output, err := env.RunCLIWithError("explain", "-c", checkpointID, "--export", "--showcase", "--format=json") + if err != nil { + t.Fatalf("export failed: %v, output: %s", err, output) + } + + // 7. Verify JSON structure + var exported map[string]any + if err := json.Unmarshal([]byte(output), &exported); err != nil { + t.Fatalf("invalid JSON output: %v\nOutput: %s", err, output) + } + + // Check required fields + requiredFields := []string{"checkpoint_id", "session_id", "transcript", "metadata", "files_touched", "exported_at"} + for _, field := range requiredFields { + if _, ok := exported[field]; !ok { + t.Errorf("missing required field: %s", field) + } + } + + // 8. Verify redaction - sensitive data should be redacted + transcriptStr, ok := exported["transcript"].(string) + if !ok { + t.Fatal("transcript not a string") + } + + // Sensitive data that should be redacted + sensitivePatterns := []string{ + "sk-proj-test-secret-key", // API key (entropy-based) + "10.0.1.5", // Private IP (pattern-based) + "postgres://", // DB connection string (pattern-based) + "admin@acme-corp.com", // Email (pattern-based) + "api.internal.company.com", // Internal URL (pattern-based) + } + + for _, pattern := range sensitivePatterns { + if strings.Contains(transcriptStr, pattern) { + t.Errorf("sensitive data not redacted: %q found in transcript", pattern) + } + } + + // Should contain redaction markers + if !strings.Contains(transcriptStr, "REDACTED") && !strings.Contains(transcriptStr, "[") { + t.Error("no redaction markers found in transcript") + } + + // Transcript should still be valid JSONL + lines := strings.Split(strings.TrimSpace(transcriptStr), "\n") + for i, line := range lines { + if len(line) == 0 { + continue + } + var entry map[string]any + if err := json.Unmarshal([]byte(line), &entry); err != nil { + t.Errorf("line %d not valid JSON: %v\nLine: %s", i, err, line) + } + } + }) +} + +func TestExport_WithoutShowcase_MinimalRedaction(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Setup + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.GitAdd("README.md") + env.GitCommit("Initial commit") + + if strategyName == "manual-commit" { + env.GitCheckoutNewBranch("feature/test") + } + + // Create session with checkpoint + session := env.NewSession() + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + code := `package main + +// API key for testing +const apiKey = "sk-test-very-secret-key" + +// User: john@example.com +// Path: /home/john/project/main.go +func main() {} +` + env.WriteFile("main.go", code) + + session.CreateTranscript( + "implement main function", + []FileChange{{Path: "main.go", Content: code}}, + ) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + env.GitAdd("main.go") + env.GitCommit("add main") + + checkpointID := env.GetLatestCheckpointIDFromHistory() + + // Export WITHOUT showcase mode + output, err := env.RunCLIWithError("explain", "-c", checkpointID, "--export", "--format=json") + if err != nil { + t.Fatalf("export failed: %v, output: %s", err, output) + } + + var exported map[string]any + if err := json.Unmarshal([]byte(output), &exported); err != nil { + t.Fatalf("invalid JSON output: %v", err) + } + + transcriptStr := exported["transcript"].(string) + + // Without --showcase, only entropy-based redaction applies + // API key should still be redacted by entropy detection + if strings.Contains(transcriptStr, "sk-test-very-secret-key") { + t.Error("API key should be redacted by entropy detection even without showcase mode") + } + + // But patterns like emails and paths should NOT be redacted (no showcase mode) + // Note: Depending on entropy thresholds, emails might still be caught. + // For this test, we just verify the export works without error. + }) +} + +func TestExport_MarkdownFormat(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Setup + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.GitAdd("README.md") + env.GitCommit("Initial commit") + + if strategyName == "manual-commit" { + env.GitCheckoutNewBranch("feature/test") + } + + // Create session with checkpoint + session := env.NewSession() + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + env.WriteFile("main.go", "package main\nfunc main() {}") + + session.CreateTranscript( + "create main function", + []FileChange{{Path: "main.go", Content: "package main\nfunc main() {}"}}, + ) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + env.GitAdd("main.go") + env.GitCommit("add main") + + checkpointID := env.GetLatestCheckpointIDFromHistory() + + // Export as Markdown with showcase + output, err := env.RunCLIWithError("explain", "-c", checkpointID, "--export", "--showcase", "--format=markdown") + if err != nil { + t.Fatalf("export failed: %v, output: %s", err, output) + } + + // Verify markdown structure + if !strings.Contains(output, "# Session:") { + t.Error("missing session header") + } + if !strings.Contains(output, "**Checkpoint:**") { + t.Error("missing checkpoint field") + } + if !strings.Contains(output, "**Created:**") { + t.Error("missing created field") + } + if !strings.Contains(output, "## Files Modified") { + t.Error("missing files section") + } + if !strings.Contains(output, "## Transcript") { + t.Error("missing transcript section") + } + + // Verify file is listed + if !strings.Contains(output, "`main.go`") { + t.Error("main.go not listed in files") + } + }) +} + +func TestExport_CheckpointNotFound(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + env.InitRepo() + + if strategyName == "manual-commit" { + env.GitCheckoutNewBranch("feature/test") + } + + // Try to export a nonexistent checkpoint + output, err := env.RunCLIWithError("explain", "-c", "nonexistent123", "--export") + if err == nil { + t.Errorf("expected error for nonexistent checkpoint, got output: %s", output) + return + } + + if !strings.Contains(output, "checkpoint not found") { + t.Errorf("expected 'checkpoint not found' error, got: %s", output) + } + }) +} + +func TestExport_RequiresCheckpointFlag(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + env.InitRepo() + + // Try to use --export without --checkpoint + output, err := env.RunCLIWithError("explain", "--export") + if err == nil { + t.Errorf("expected error when --export without --checkpoint, got output: %s", output) + return + } + + if !strings.Contains(output, "--export requires --checkpoint") { + t.Errorf("expected '--export requires --checkpoint' error, got: %s", output) + } + }) +} + +func TestExport_ShowcaseRequiresExport(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + env.InitRepo() + + // Try to use --showcase without --export + output, err := env.RunCLIWithError("explain", "-c", "test123", "--showcase") + if err == nil { + t.Errorf("expected error when --showcase without --export, got output: %s", output) + return + } + + if !strings.Contains(output, "--showcase requires --export") { + t.Errorf("expected '--showcase requires --export' error, got: %s", output) + } + }) +} + +func TestExport_MutualExclusivityWithOtherModes(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + env.InitRepo() + + // --export is mutually exclusive with --raw-transcript + output, err := env.RunCLIWithError("explain", "-c", "test123", "--export", "--raw-transcript") + if err == nil { + t.Errorf("expected error for --export with --raw-transcript, got output: %s", output) + return + } + if !strings.Contains(output, "mutually exclusive") { + t.Errorf("expected 'mutually exclusive' error, got: %s", output) + } + + // --export is mutually exclusive with --short + output, err = env.RunCLIWithError("explain", "-c", "test123", "--export", "--short") + if err == nil { + t.Errorf("expected error for --export with --short, got output: %s", output) + return + } + if !strings.Contains(output, "mutually exclusive") { + t.Errorf("expected 'mutually exclusive' error, got: %s", output) + } + + // --export is mutually exclusive with --full + output, err = env.RunCLIWithError("explain", "-c", "test123", "--export", "--full") + if err == nil { + t.Errorf("expected error for --export with --full, got output: %s", output) + return + } + if !strings.Contains(output, "mutually exclusive") { + t.Errorf("expected 'mutually exclusive' error, got: %s", output) + } + }) +} + +func TestExport_UnsupportedFormat(t *testing.T) { + t.Parallel() + + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Setup + env.InitRepo() + env.WriteFile("README.md", "# Test") + env.GitAdd("README.md") + env.GitCommit("Initial commit") + + if strategyName == "manual-commit" { + env.GitCheckoutNewBranch("feature/test") + } + + // Create checkpoint + session := env.NewSession() + if err := env.SimulateUserPromptSubmit(session.ID); err != nil { + t.Fatalf("SimulateUserPromptSubmit failed: %v", err) + } + + env.WriteFile("test.txt", "test") + session.CreateTranscript("test", []FileChange{{Path: "test.txt", Content: "test"}}) + if err := env.SimulateStop(session.ID, session.TranscriptPath); err != nil { + t.Fatalf("SimulateStop failed: %v", err) + } + + env.GitAdd("test.txt") + env.GitCommit("test commit") + + checkpointID := env.GetLatestCheckpointIDFromHistory() + + // Try to export with unsupported format + output, err := env.RunCLIWithError("explain", "-c", checkpointID, "--export", "--format=pdf") + if err == nil { + t.Errorf("expected error for unsupported format, got output: %s", output) + return + } + + if !strings.Contains(output, "unsupported format") { + t.Errorf("expected 'unsupported format' error, got: %s", output) + } + }) +} diff --git a/cmd/entire/cli/integration_test/insights_test.go b/cmd/entire/cli/integration_test/insights_test.go new file mode 100644 index 000000000..7e4396b75 --- /dev/null +++ b/cmd/entire/cli/integration_test/insights_test.go @@ -0,0 +1,236 @@ +//go:build integration + +package integration + +import ( + "encoding/json" + "strings" + "testing" +) + +func TestInsights_NoSessions(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Without any sessions, insights should show zero stats + output, err := env.RunCLIWithError("insights") + + if err != nil { + t.Errorf("expected success for empty insights, got error: %v, output: %s", err, output) + return + } + + // Should show header + if !strings.Contains(output, "Entire Insights") { + t.Errorf("expected 'Entire Insights' header in output, got: %s", output) + } + + // Should show zero sessions + if !strings.Contains(output, "Sessions:") { + t.Errorf("expected 'Sessions:' in output, got: %s", output) + } + }) +} + +func TestInsights_JSONOutput(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // Test JSON output format + output, err := env.RunCLIWithError("insights", "--json") + + if err != nil { + t.Errorf("expected success for JSON output, got error: %v, output: %s", err, output) + return + } + + // Parse as JSON to validate structure + var result map[string]interface{} + if err := json.Unmarshal([]byte(output), &result); err != nil { + t.Errorf("failed to parse JSON output: %v, output: %s", err, output) + return + } + + // Check for expected fields + expectedFields := []string{ + "total_sessions", + "total_checkpoints", + "total_time", + "total_tokens", + "estimated_cost", + } + + for _, field := range expectedFields { + if _, ok := result[field]; !ok { + t.Errorf("expected field %s in JSON output, got: %v", field, result) + } + } + }) +} + +func TestInsights_PeriodFilters(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + tests := []string{"week", "month", "year"} + + for _, period := range tests { + output, err := env.RunCLIWithError("insights", "--period", period) + + if err != nil { + t.Errorf("period %s: expected success, got error: %v, output: %s", period, err, output) + continue + } + + // Should show period in output + if !strings.Contains(output, "Period:") { + t.Errorf("period %s: expected 'Period:' in output, got: %s", period, output) + } + } + }) +} + +func TestInsights_InvalidPeriod(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + output, err := env.RunCLIWithError("insights", "--period", "invalid") + + if err == nil { + t.Errorf("expected error for invalid period, got output: %s", output) + return + } + + if !strings.Contains(output, "invalid period") { + t.Errorf("expected 'invalid period' error, got: %s", output) + } + }) +} + +func TestInsights_ExportJSON(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + output, err := env.RunCLIWithError("insights", "--export", "--format", "json") + + if err != nil { + t.Errorf("expected success for export, got error: %v, output: %s", err, output) + return + } + + // Parse as JSON + var result map[string]interface{} + if err := json.Unmarshal([]byte(output), &result); err != nil { + t.Errorf("failed to parse exported JSON: %v, output: %s", err, output) + } + }) +} + +func TestInsights_ExportMarkdown(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + output, err := env.RunCLIWithError("insights", "--export", "--format", "markdown") + + if err != nil { + t.Errorf("expected success for markdown export, got error: %v, output: %s", err, output) + return + } + + // Should contain markdown headers + if !strings.Contains(output, "# Entire Insights") { + t.Errorf("expected markdown header in output, got: %s", output) + } + + if !strings.Contains(output, "## Summary") { + t.Errorf("expected '## Summary' section in output, got: %s", output) + } + }) +} + +func TestInsights_ExportHTML(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + output, err := env.RunCLIWithError("insights", "--export", "--format", "html") + + if err != nil { + t.Errorf("expected success for HTML export, got error: %v, output: %s", err, output) + return + } + + // Should contain HTML structure + if !strings.Contains(output, "") { + t.Errorf("expected HTML doctype in output, got: %s", output) + } + + // Check for design elements + expectedElements := []string{ + "sidebar", // Left sidebar + "nav-item", // Navigation items + "greeting", // Greeting header + "stat-card", // Stat cards + "chart-scatter", // Scatter chart + "badge-claude", // Claude Code badge + "diff-stat", // Diff stats + } + + for _, elem := range expectedElements { + if !strings.Contains(output, elem) { + t.Errorf("expected '%s' element in HTML output", elem) + } + } + + // Check for greeting variants + hasGreeting := strings.Contains(output, "Morning, developer") || + strings.Contains(output, "Afternoon, developer") || + strings.Contains(output, "Evening, developer") + + if !hasGreeting { + t.Errorf("expected time-based greeting in HTML output") + } + }) +} + +func TestInsights_MutualExclusivity(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // --json and --export are mutually exclusive + output, err := env.RunCLIWithError("insights", "--json", "--export", "--format", "json") + + if err == nil { + t.Errorf("expected error for --json and --export together, got output: %s", output) + return + } + + if !strings.Contains(output, "mutually exclusive") && !strings.Contains(output, "if any flags in the group") { + t.Errorf("expected 'mutually exclusive' error, got: %s", output) + } + }) +} + +func TestInsights_FormatWithoutExport(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + // --format requires --export + output, err := env.RunCLIWithError("insights", "--format", "json") + + if err == nil { + t.Errorf("expected error for --format without --export, got output: %s", output) + return + } + + if !strings.Contains(output, "require --export") { + t.Errorf("expected 'require --export' error, got: %s", output) + } + }) +} + +func TestInsights_InvalidFormat(t *testing.T) { + t.Parallel() + RunForAllStrategies(t, func(t *testing.T, env *TestEnv, strategyName string) { + output, err := env.RunCLIWithError("insights", "--export", "--format", "invalid") + + if err == nil { + t.Errorf("expected error for invalid format, got output: %s", output) + return + } + + if !strings.Contains(output, "invalid format") { + t.Errorf("expected 'invalid format' error, got: %s", output) + } + }) +} diff --git a/cmd/entire/cli/root.go b/cmd/entire/cli/root.go index 5fedf6ad4..15d7a5eeb 100644 --- a/cmd/entire/cli/root.go +++ b/cmd/entire/cli/root.go @@ -81,6 +81,7 @@ func NewRootCmd() *cobra.Command { cmd.AddCommand(newHooksCmd()) cmd.AddCommand(newVersionCmd()) cmd.AddCommand(newExplainCmd()) + cmd.AddCommand(newInsightsCmd()) cmd.AddCommand(newDebugCmd()) cmd.AddCommand(newDoctorCmd()) cmd.AddCommand(newSendAnalyticsCmd()) diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index 381c9993a..138bf138b 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -12,6 +12,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/jsonutil" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/redact" ) // DefaultStrategyName is the default strategy when none is configured. @@ -255,6 +256,69 @@ func (s *EntireSettings) IsPushSessionsDisabled() bool { return false } +// GetShowcaseConfig extracts showcase settings from StrategyOptions. +// Returns nil if showcase settings are not configured. +func (s *EntireSettings) GetShowcaseConfig() *redact.ShowcaseConfig { + if s.StrategyOptions == nil { + return nil + } + showcaseOpts, ok := s.StrategyOptions["showcase"].(map[string]any) + if !ok { + return nil + } + + // Parse showcase options into ShowcaseConfig struct + cfg := redact.DefaultShowcaseConfig() + + if redactPaths, ok := showcaseOpts["redact_paths"].(bool); ok { + cfg.RedactPaths = redactPaths + } + if redactUsernames, ok := showcaseOpts["redact_usernames"].(bool); ok { + cfg.RedactUsernames = redactUsernames + } + if redactProjectInfo, ok := showcaseOpts["redact_project_info"].(bool); ok { + cfg.RedactProjectInfo = redactProjectInfo + } + + // Parse allowed_paths array + if allowedPaths, ok := showcaseOpts["allowed_paths"].([]any); ok { + cfg.AllowedPaths = make([]string, 0, len(allowedPaths)) + for _, v := range allowedPaths { + if s, ok := v.(string); ok { + cfg.AllowedPaths = append(cfg.AllowedPaths, s) + } + } + } + + // Parse allowed_domains array + if allowedDomains, ok := showcaseOpts["allowed_domains"].([]any); ok { + cfg.AllowedDomains = make([]string, 0, len(allowedDomains)) + for _, v := range allowedDomains { + if s, ok := v.(string); ok { + cfg.AllowedDomains = append(cfg.AllowedDomains, s) + } + } + } + + // Parse custom_blocklist array + if customBlocklist, ok := showcaseOpts["custom_blocklist"].([]any); ok { + cfg.CustomBlocklist = make([]string, 0, len(customBlocklist)) + for _, v := range customBlocklist { + if s, ok := v.(string); ok { + cfg.CustomBlocklist = append(cfg.CustomBlocklist, s) + } + } + } + + return &cfg +} + +// IsShowcaseEnabled checks if showcase export is enabled in settings. +// Returns true if the showcase configuration exists (regardless of other settings). +func (s *EntireSettings) IsShowcaseEnabled() bool { + return s.GetShowcaseConfig() != nil +} + // Save saves the settings to .entire/settings.json. func Save(settings *EntireSettings) error { return saveToFile(settings, EntireSettingsFile) diff --git a/cmd/entire/cli/settings/settings_test.go b/cmd/entire/cli/settings/settings_test.go index ad09bc57a..1f38c2d8a 100644 --- a/cmd/entire/cli/settings/settings_test.go +++ b/cmd/entire/cli/settings/settings_test.go @@ -5,6 +5,8 @@ import ( "path/filepath" "strings" "testing" + + "github.com/entireio/cli/redact" ) func TestLoad_RejectsUnknownKeys(t *testing.T) { @@ -140,3 +142,223 @@ func containsUnknownField(msg string) bool { // Go's json package reports unknown fields with this message format return strings.Contains(msg, "unknown field") } + +func TestEntireSettings_GetShowcaseConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + settings *EntireSettings + wantNil bool + validate func(*testing.T, *redact.ShowcaseConfig) + }{ + { + name: "showcase config present with all fields", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": map[string]any{ + "redact_paths": true, + "redact_usernames": true, + "redact_project_info": false, + "allowed_paths": []any{"src/", "lib/"}, + "allowed_domains": []any{"@example.com"}, + "custom_blocklist": []any{"acme-corp", "project-*"}, + }, + }, + }, + wantNil: false, + validate: func(t *testing.T, cfg *redact.ShowcaseConfig) { + if !cfg.RedactPaths { + t.Error("expected RedactPaths to be true") + } + if !cfg.RedactUsernames { + t.Error("expected RedactUsernames to be true") + } + if cfg.RedactProjectInfo { + t.Error("expected RedactProjectInfo to be false") + } + if len(cfg.AllowedPaths) != 2 { + t.Errorf("expected 2 allowed paths, got %d", len(cfg.AllowedPaths)) + } + if len(cfg.AllowedDomains) != 1 { + t.Errorf("expected 1 allowed domain, got %d", len(cfg.AllowedDomains)) + } + if len(cfg.CustomBlocklist) != 2 { + t.Errorf("expected 2 blocklist items, got %d", len(cfg.CustomBlocklist)) + } + }, + }, + { + name: "showcase config with defaults", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": map[string]any{}, + }, + }, + wantNil: false, + validate: func(t *testing.T, cfg *redact.ShowcaseConfig) { + // Should return defaults from DefaultShowcaseConfig() + defaults := redact.DefaultShowcaseConfig() + if cfg.RedactPaths != defaults.RedactPaths { + t.Error("expected default RedactPaths") + } + if cfg.RedactUsernames != defaults.RedactUsernames { + t.Error("expected default RedactUsernames") + } + if cfg.RedactProjectInfo != defaults.RedactProjectInfo { + t.Error("expected default RedactProjectInfo") + } + }, + }, + { + name: "no showcase config", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "other": map[string]any{}, + }, + }, + wantNil: true, + }, + { + name: "nil StrategyOptions", + settings: &EntireSettings{}, + wantNil: true, + }, + { + name: "showcase is not a map", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": "invalid", + }, + }, + wantNil: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := tt.settings.GetShowcaseConfig() + if tt.wantNil { + if cfg != nil { + t.Errorf("expected nil config, got %+v", cfg) + } + } else { + if cfg == nil { + t.Fatal("expected non-nil config, got nil") + } + if tt.validate != nil { + tt.validate(t, cfg) + } + } + }) + } +} + +func TestEntireSettings_IsShowcaseEnabled(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + settings *EntireSettings + want bool + }{ + { + name: "showcase config present", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": map[string]any{ + "redact_paths": true, + }, + }, + }, + want: true, + }, + { + name: "showcase config missing", + settings: &EntireSettings{ + StrategyOptions: map[string]any{ + "other": map[string]any{}, + }, + }, + want: false, + }, + { + name: "nil StrategyOptions", + settings: &EntireSettings{}, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := tt.settings.IsShowcaseEnabled() + if got != tt.want { + t.Errorf("IsShowcaseEnabled() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestGetShowcaseConfig_ArrayParsing(t *testing.T) { + t.Parallel() + + settings := &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": map[string]any{ + "allowed_paths": []any{"src/", "lib/", "cmd/"}, + "allowed_domains": []any{"@example.com", "@test.org"}, + "custom_blocklist": []any{"term1", "term2", "term3"}, + }, + }, + } + + cfg := settings.GetShowcaseConfig() + if cfg == nil { + t.Fatal("expected non-nil config") + } + + // Check allowed_paths + if len(cfg.AllowedPaths) != 3 { + t.Errorf("expected 3 allowed paths, got %d", len(cfg.AllowedPaths)) + } + expectedPaths := []string{"src/", "lib/", "cmd/"} + for i, expected := range expectedPaths { + if i >= len(cfg.AllowedPaths) || cfg.AllowedPaths[i] != expected { + t.Errorf("allowed_paths[%d] = %q, want %q", i, cfg.AllowedPaths[i], expected) + } + } + + // Check allowed_domains + if len(cfg.AllowedDomains) != 2 { + t.Errorf("expected 2 allowed domains, got %d", len(cfg.AllowedDomains)) + } + + // Check custom_blocklist + if len(cfg.CustomBlocklist) != 3 { + t.Errorf("expected 3 blocklist items, got %d", len(cfg.CustomBlocklist)) + } +} + +func TestGetShowcaseConfig_SkipsNonStringArrayElements(t *testing.T) { + t.Parallel() + + settings := &EntireSettings{ + StrategyOptions: map[string]any{ + "showcase": map[string]any{ + "allowed_paths": []any{"src/", 123, "lib/", nil, "cmd/"}, + }, + }, + } + + cfg := settings.GetShowcaseConfig() + if cfg == nil { + t.Fatal("expected non-nil config") + } + + // Should only extract string values + if len(cfg.AllowedPaths) != 3 { + t.Errorf("expected 3 allowed paths (skipping non-strings), got %d", len(cfg.AllowedPaths)) + } +} diff --git a/cmd/entire/cli/strategy/auto_commit.go b/cmd/entire/cli/strategy/auto_commit.go index 7012e98b8..8f6e8f5ee 100644 --- a/cmd/entire/cli/strategy/auto_commit.go +++ b/cmd/entire/cli/strategy/auto_commit.go @@ -84,7 +84,7 @@ func (s *AutoCommitStrategy) getCheckpointStore() (*checkpoint.GitStore, error) // NewAutoCommitStrategy creates a new AutoCommitStrategy instance // -func NewAutoCommitStrategy() Strategy { //nolint:ireturn // already present in codebase +func NewAutoCommitStrategy() Strategy { return &AutoCommitStrategy{} } diff --git a/cmd/entire/cli/strategy/manual_commit.go b/cmd/entire/cli/strategy/manual_commit.go index 3164df738..5c1a875af 100644 --- a/cmd/entire/cli/strategy/manual_commit.go +++ b/cmd/entire/cli/strategy/manual_commit.go @@ -58,7 +58,7 @@ func (s *ManualCommitStrategy) getCheckpointStore() (*checkpoint.GitStore, error // NewManualCommitStrategy creates a new manual-commit strategy instance. // -func NewManualCommitStrategy() Strategy { //nolint:ireturn // already present in codebase +func NewManualCommitStrategy() Strategy { return &ManualCommitStrategy{} } @@ -66,7 +66,7 @@ func NewManualCommitStrategy() Strategy { //nolint:ireturn // already present in // This legacy constructor delegates to NewManualCommitStrategy. // -func NewShadowStrategy() Strategy { //nolint:ireturn // already present in codebase +func NewShadowStrategy() Strategy { return NewManualCommitStrategy() } diff --git a/cmd/entire/cli/strategy/registry.go b/cmd/entire/cli/strategy/registry.go index a57842427..5d226590d 100644 --- a/cmd/entire/cli/strategy/registry.go +++ b/cmd/entire/cli/strategy/registry.go @@ -24,7 +24,7 @@ func Register(name string, factory Factory) { // Get retrieves a strategy by name. // Returns an error if the strategy is not registered. -func Get(name string) (Strategy, error) { //nolint:ireturn // already present in codebase +func Get(name string) (Strategy, error) { registryMu.RLock() defer registryMu.RUnlock() @@ -61,7 +61,7 @@ const DefaultStrategyName = StrategyNameManualCommit // Default returns the default strategy. // Falls back to returning nil if no strategies are registered. -func Default() Strategy { //nolint:ireturn // already present in codebase +func Default() Strategy { s, err := Get(DefaultStrategyName) if err != nil { // Fallback: return the first registered strategy diff --git a/redact/showcase.go b/redact/showcase.go new file mode 100644 index 000000000..3de5f8e04 --- /dev/null +++ b/redact/showcase.go @@ -0,0 +1,290 @@ +package redact + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "path/filepath" + "regexp" + "strings" +) + +// ShowcaseConfig controls showcase-specific redaction for public sharing. +// This applies additional privacy-focused redaction beyond standard entropy detection. +type ShowcaseConfig struct { + RedactPaths bool // Normalize file paths (e.g., /Users/x/project/src → src/) + RedactUsernames bool // Replace usernames/emails + RedactProjectInfo bool // Replace repo/project names from git remotes + AllowedPaths []string // Paths to preserve (e.g., "src/", "lib/") + AllowedDomains []string // Domains to keep (e.g., "@example.com") + CustomBlocklist []string // Additional terms to redact (glob patterns) +} + +// DefaultShowcaseConfig returns sensible defaults for showcase redaction. +func DefaultShowcaseConfig() ShowcaseConfig { + return ShowcaseConfig{ + RedactPaths: true, + RedactUsernames: true, + RedactProjectInfo: true, + AllowedPaths: []string{"src/", "lib/", "cmd/", "pkg/", "internal/"}, + AllowedDomains: []string{}, + CustomBlocklist: []string{}, + } +} + +// Showcase applies showcase redaction after standard entropy-based redaction. +// Call String() or JSONLBytes() first, then apply Showcase() for layered protection. +func Showcase(s string, cfg ShowcaseConfig) string { + result := s + + // Layer 1: File path redaction (must run first to prevent paths from matching other patterns) + if cfg.RedactPaths { + result = redactFilePaths(result, cfg.AllowedPaths) + } + + // Layer 2: Pattern-based redaction (non-email patterns) + result = redactPatterns(result) + + // Layer 3: Structural redaction (git remotes before emails, then blocklist) + // Redact git remotes before emails to avoid matching git@host as an email + if cfg.RedactProjectInfo { + result = redactProjectInfo(result) + } + if cfg.RedactUsernames { + result = redactUsernames(result, cfg.AllowedDomains) + } + + // Layer 4: Blocklist matching (runs last to catch any remaining sensitive terms) + result = redactBlocklist(result, cfg.CustomBlocklist) + + return result +} + +// ShowcaseJSONL applies showcase redaction to JSONL session data. +// Preserves JSON structure while redacting values. +func ShowcaseJSONL(b []byte, cfg ShowcaseConfig) ([]byte, error) { + var buf bytes.Buffer + scanner := bufio.NewScanner(bytes.NewReader(b)) + + // Increase buffer size for large lines (up to 10MB per line) + maxCapacity := 10 * 1024 * 1024 + scanBuf := make([]byte, maxCapacity) + scanner.Buffer(scanBuf, maxCapacity) + + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + + // Parse JSON to preserve structure + var entry map[string]any + if err := json.Unmarshal(line, &entry); err != nil { + // Fallback: treat as plain string + redacted := Showcase(string(line), cfg) + buf.WriteString(redacted) + buf.WriteByte('\n') + continue + } + + // Recursively redact values (but not keys) + redactJSONValues(entry, cfg) + + // Re-serialize + redactedLine, err := json.Marshal(entry) + if err != nil { + return nil, fmt.Errorf("failed to marshal redacted JSON: %w", err) + } + + buf.Write(redactedLine) + buf.WriteByte('\n') + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to scan JSONL: %w", err) + } + + return buf.Bytes(), nil +} + +// redactJSONValues recursively redacts values in JSON objects/arrays while preserving keys. +func redactJSONValues(v any, cfg ShowcaseConfig) { + switch val := v.(type) { + case map[string]any: + for key, child := range val { + switch childVal := child.(type) { + case string: + // Redact string values (but not keys) + val[key] = Showcase(childVal, cfg) + case map[string]any, []any: + // Recurse into nested structures + redactJSONValues(childVal, cfg) + } + } + case []any: + for i, child := range val { + switch childVal := child.(type) { + case string: + val[i] = Showcase(childVal, cfg) + case map[string]any, []any: + redactJSONValues(childVal, cfg) + } + } + } +} + +// Pattern-based redaction + +var ( + // Internal URLs and private IPs + internalURLPattern = regexp.MustCompile(`(?i)\b[a-z0-9-]+\.(internal|local|corp|lan)\b(:[0-9]+)?`) + privateIPPattern = regexp.MustCompile(`\b(?:10\.\d{1,3}\.\d{1,3}\.\d{1,3}|172\.(?:1[6-9]|2[0-9]|3[01])\.\d{1,3}\.\d{1,3}|192\.168\.\d{1,3}\.\d{1,3})(?::[0-9]+)?\b`) + + // Cloud ARNs + // Account ID is optional for some ARN types (e.g., S3: arn:aws:s3:::bucket/key) + awsARNPattern = regexp.MustCompile(`arn:aws:[a-z0-9-]+:[a-z0-9-]*:(?:[0-9]{12})?:[a-zA-Z0-9/._-]+`) + // GCP resource paths - require known resource types to avoid matching regular file paths + gcpPattern = regexp.MustCompile(`projects/[a-z0-9-]+/(?:locations|zones|regions|instances|datasets|buckets|topics|subscriptions)/[a-zA-Z0-9/._-]+`) + + // Database connection strings + dbConnPattern = regexp.MustCompile(`(?i)(postgres|postgresql|mongodb|mysql|redis)://[^\s\)\"\']+`) + + // Email addresses (basic pattern) + emailPattern = regexp.MustCompile(`\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`) + + // AWS account IDs (12 digits in AWS context) + // Capture separator to preserve it in replacement + awsAccountPattern = regexp.MustCompile(`(?i)(aws|account|arn)([\s:]\s*)([0-9]{12})\b`) + + // JWT tokens (starts with eyJ) + jwtPattern = regexp.MustCompile(`\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b`) + + // PEM private keys + pemKeyPattern = regexp.MustCompile(`-----BEGIN[A-Z ]+PRIVATE KEY-----[\s\S]*?-----END[A-Z ]+PRIVATE KEY-----`) + + // File paths with user directories + homePathPattern = regexp.MustCompile(`/(?:Users|home)/[^/\s]+`) +) + +func redactPatterns(s string) string { + result := s + + result = internalURLPattern.ReplaceAllString(result, "[INTERNAL_URL]") + result = privateIPPattern.ReplaceAllString(result, "[PRIVATE_IP]") + result = awsARNPattern.ReplaceAllString(result, "[AWS_ARN]") + result = gcpPattern.ReplaceAllString(result, "[GCP_RESOURCE]") + result = dbConnPattern.ReplaceAllString(result, "[DB_CONNECTION_STRING]") + // NOTE: Email redaction is handled by redactUsernames() to allow AllowedDomains filtering + result = jwtPattern.ReplaceAllString(result, "[JWT_TOKEN]") + result = pemKeyPattern.ReplaceAllString(result, "[PEM_PRIVATE_KEY]") + + // AWS account IDs - preserve context and separator, redact number + result = awsAccountPattern.ReplaceAllString(result, "${1}${2}[AWS_ACCOUNT_ID]") + + return result +} + +func redactBlocklist(s string, blocklist []string) string { + result := s + + for _, pattern := range blocklist { + // Convert glob pattern to regex + regexPattern := globToRegex(pattern) + re := regexp.MustCompile("(?i)" + regexPattern) + result = re.ReplaceAllString(result, "[REDACTED]") + } + + return result +} + +func redactFilePaths(s string, allowedPaths []string) string { + result := s + + // Redact absolute paths with user directories + result = homePathPattern.ReplaceAllStringFunc(result, func(match string) string { + // Check if path starts with any allowed prefix + for _, allowed := range allowedPaths { + if strings.Contains(match, "/"+allowed) { + // Extract relative path from allowed prefix + parts := strings.Split(match, "/"+allowed) + if len(parts) > 1 { + return allowed + parts[len(parts)-1] + } + } + } + // Preserve leading slash when redacting + return "/[PATH]" + }) + + return result +} + +func redactUsernames(s string, allowedDomains []string) string { + result := s + + // Redact emails unless domain is in allowed list + result = emailPattern.ReplaceAllStringFunc(result, func(email string) string { + for _, domain := range allowedDomains { + if strings.HasSuffix(email, domain) { + return email // Preserve allowed domains + } + } + return "[EMAIL]" + }) + + return result +} + +func redactProjectInfo(s string) string { + result := s + + // Redact git remote URLs (common patterns) + // Only match known git hosting domains to avoid over-redacting + gitRemotePatterns := []*regexp.Regexp{ + regexp.MustCompile(`git@[^:]+:[^/]+/[^\s.]+\.git`), + regexp.MustCompile(`https://[^/]+/[^/]+/[^\s.]+\.git`), + // Match GitHub/GitLab/Bitbucket URLs without .git + regexp.MustCompile(`https://(?:github\.com|gitlab\.com|bitbucket\.org)/[^/]+/[^\s/]+`), + } + + for _, pattern := range gitRemotePatterns { + result = pattern.ReplaceAllString(result, "[GIT_REMOTE]") + } + + return result +} + +// globToRegex converts a simple glob pattern to regex. +// Supports * (word chars and hyphens) and ? (single character). +func globToRegex(glob string) string { + // Escape regex special chars except * and ? + specialChars := `\.+^$()[]{}|` + result := glob + for _, char := range specialChars { + result = strings.ReplaceAll(result, string(char), `\`+string(char)) + } + + // Convert glob wildcards to regex + // Use [\w-]* to match word characters and hyphens, staying within token boundaries + // This prevents greedy matching across dots and other punctuation + result = strings.ReplaceAll(result, "*", `[\w-]*`) + result = strings.ReplaceAll(result, "?", `[\w]`) + + // Match word boundaries + return `\b` + result + `\b` +} + +// normalizePath converts an absolute path to a project-relative path. +func normalizePath(absPath string, allowedPaths []string) string { + // Try to extract relative path based on allowed prefixes + for _, prefix := range allowedPaths { + if idx := strings.Index(absPath, "/"+prefix); idx != -1 { + relPath := absPath[idx+1:] + return relPath + } + } + + // Fallback: just return basename + return filepath.Base(absPath) +} diff --git a/redact/showcase_test.go b/redact/showcase_test.go new file mode 100644 index 000000000..e1bcb1920 --- /dev/null +++ b/redact/showcase_test.go @@ -0,0 +1,587 @@ +package redact + +import ( + "encoding/json" + "strings" + "testing" +) + +func TestShowcase_PatternRedaction(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + want string + config ShowcaseConfig + }{ + { + name: "private IP - 10.x.x.x", + input: "server at 10.0.3.47 is down", + want: "server at [PRIVATE_IP] is down", + config: DefaultShowcaseConfig(), + }, + { + name: "private IP with port", + input: "connect to 192.168.1.100:8080", + want: "connect to [PRIVATE_IP]", + config: DefaultShowcaseConfig(), + }, + { + name: "internal URL", + input: "api.internal:8080/v1/users", + want: "[INTERNAL_URL]/v1/users", + config: DefaultShowcaseConfig(), + }, + { + name: "internal domain variations", + input: "app.local app.corp app.lan", + want: "[INTERNAL_URL] [INTERNAL_URL] [INTERNAL_URL]", + config: DefaultShowcaseConfig(), + }, + { + name: "AWS ARN", + input: "arn:aws:s3:::my-bucket/object", + want: "[AWS_ARN]", + config: DefaultShowcaseConfig(), + }, + { + name: "GCP resource path", + input: "projects/my-project-123/instances/db-main", + want: "[GCP_RESOURCE]", + config: DefaultShowcaseConfig(), + }, + { + name: "PostgreSQL connection string", + input: "postgres://user:pass@host:5432/db", + want: "[DB_CONNECTION_STRING]", + config: DefaultShowcaseConfig(), + }, + { + name: "MongoDB connection string", + input: "mongodb://admin:secret@cluster0.mongodb.net/test", + want: "[DB_CONNECTION_STRING]", + config: DefaultShowcaseConfig(), + }, + { + name: "email address", + input: "contact john.doe@example.com for help", + want: "contact [EMAIL] for help", + config: DefaultShowcaseConfig(), + }, + { + name: "AWS account ID in context", + input: "AWS account: 123456789012", + want: "AWS account: [AWS_ACCOUNT_ID]", + config: DefaultShowcaseConfig(), + }, + { + name: "JWT token", + input: "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", + want: "Bearer [JWT_TOKEN]", + config: DefaultShowcaseConfig(), + }, + { + name: "PEM private key", + input: "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----", + want: "[PEM_PRIVATE_KEY]", + config: DefaultShowcaseConfig(), + }, + { + name: "file path with user directory", + input: "/Users/john/projects/myapp/src/main.go", + want: "/[PATH]/projects/myapp/src/main.go", + config: DefaultShowcaseConfig(), + }, + { + name: "Linux home path", + input: "/home/alice/workspace/backend/api.go", + want: "/[PATH]/workspace/backend/api.go", + config: DefaultShowcaseConfig(), + }, + { + name: "preserve public IP", + input: "visit 1.1.1.1 for DNS", + want: "visit 1.1.1.1 for DNS", + config: DefaultShowcaseConfig(), + }, + { + name: "preserve public domain", + input: "check https://example.com/api", + want: "check https://example.com/api", + config: DefaultShowcaseConfig(), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := Showcase(tt.input, tt.config) + if got != tt.want { + t.Errorf("Showcase() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestShowcase_BlocklistRedaction(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + blocklist []string + want string + }{ + { + name: "company name exact match", + input: "Working at ACME Corp on project", + blocklist: []string{"ACME Corp"}, + want: "Working at [REDACTED] on project", + }, + { + name: "project codename", + input: "Project Phoenix is launching soon", + blocklist: []string{"Phoenix"}, + want: "Project [REDACTED] is launching soon", + }, + { + name: "wildcard pattern", + input: "internal-api.company.com and internal-web.company.com", + blocklist: []string{"internal-*"}, + want: "[REDACTED].company.com and [REDACTED].company.com", + }, + { + name: "multiple blocklist items", + input: "ACME Corp's Project Phoenix uses secret-api", + blocklist: []string{"ACME Corp", "Phoenix", "secret-*"}, + want: "[REDACTED]'s Project [REDACTED] uses [REDACTED]", + }, + { + name: "case insensitive matching", + input: "acme, ACME, Acme", + blocklist: []string{"acme"}, + want: "[REDACTED], [REDACTED], [REDACTED]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := DefaultShowcaseConfig() + cfg.CustomBlocklist = tt.blocklist + got := Showcase(tt.input, cfg) + if got != tt.want { + t.Errorf("Showcase() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestShowcase_PathNormalization(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + allowedPaths []string + want string + }{ + { + name: "extract allowed path", + input: "/Users/alice/myproject/src/main.go", + allowedPaths: []string{"src/"}, + want: "/[PATH]/myproject/src/main.go", + }, + { + name: "multiple allowed paths", + input: "/home/bob/app/lib/util.go", + allowedPaths: []string{"src/", "lib/", "cmd/"}, + want: "/[PATH]/app/lib/util.go", + }, + { + name: "no allowed paths - redact fully", + input: "/Users/charlie/secret-project/app.go", + allowedPaths: []string{}, + want: "/[PATH]/secret-project/app.go", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := DefaultShowcaseConfig() + cfg.AllowedPaths = tt.allowedPaths + got := Showcase(tt.input, cfg) + if got != tt.want { + t.Errorf("Showcase() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestShowcase_UsernameRedaction(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + allowedDomains []string + want string + }{ + { + name: "redact all emails by default", + input: "Contact alice@private.com or bob@company.com", + allowedDomains: []string{}, + want: "Contact [EMAIL] or [EMAIL]", + }, + { + name: "preserve allowed domain", + input: "Email support@example.com or admin@internal.com", + allowedDomains: []string{"@example.com"}, + want: "Email support@example.com or [EMAIL]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := DefaultShowcaseConfig() + cfg.AllowedDomains = tt.allowedDomains + got := Showcase(tt.input, cfg) + if got != tt.want { + t.Errorf("Showcase() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestShowcase_ProjectInfoRedaction(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + want string + }{ + { + name: "SSH git remote", + input: "git@github.com:acme-corp/secret-project.git", + want: "[GIT_REMOTE]", + }, + { + name: "HTTPS git remote", + input: "https://github.com/my-org/private-repo.git", + want: "[GIT_REMOTE]", + }, + { + name: "GitHub URL without .git", + input: "https://github.com/company/project", + want: "[GIT_REMOTE]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + cfg := DefaultShowcaseConfig() + got := Showcase(tt.input, cfg) + if got != tt.want { + t.Errorf("Showcase() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestShowcaseJSONL_PreservesStructure(t *testing.T) { + t.Parallel() + + input := `{"type":"text","content":"server at 10.0.1.5 is down"} +{"type":"code","language":"go","content":"// connect to api.internal"} +{"metadata":{"email":"admin@company.com","path":"/Users/alice/project/main.go"}}` + + cfg := DefaultShowcaseConfig() + result, err := ShowcaseJSONL([]byte(input), cfg) + if err != nil { + t.Fatalf("ShowcaseJSONL() error = %v", err) + } + + lines := strings.Split(strings.TrimSpace(string(result)), "\n") + if len(lines) != 3 { + t.Fatalf("expected 3 lines, got %d", len(lines)) + } + + // Verify each line is valid JSON + for i, line := range lines { + var entry map[string]any + if err := json.Unmarshal([]byte(line), &entry); err != nil { + t.Errorf("line %d not valid JSON: %v", i, err) + } + } + + // Check specific redactions + var line1 map[string]any + json.Unmarshal([]byte(lines[0]), &line1) + if !strings.Contains(line1["content"].(string), "[PRIVATE_IP]") { + t.Error("line 1: private IP not redacted") + } + + var line2 map[string]any + json.Unmarshal([]byte(lines[1]), &line2) + if !strings.Contains(line2["content"].(string), "[INTERNAL_URL]") { + t.Error("line 2: internal URL not redacted") + } + + var line3 map[string]any + json.Unmarshal([]byte(lines[2]), &line3) + metadata := line3["metadata"].(map[string]any) + if !strings.Contains(metadata["email"].(string), "[EMAIL]") { + t.Error("line 3: email not redacted") + } + if !strings.Contains(metadata["path"].(string), "[PATH]") { + t.Error("line 3: path not redacted") + } +} + +func TestShowcaseJSONL_HandlesNestedStructures(t *testing.T) { + t.Parallel() + + input := `{"user":{"email":"test@company.com","profile":{"url":"https://api.internal/users/123"}}}` + + cfg := DefaultShowcaseConfig() + result, err := ShowcaseJSONL([]byte(input), cfg) + if err != nil { + t.Fatalf("ShowcaseJSONL() error = %v", err) + } + + var entry map[string]any + if err := json.Unmarshal(result, &entry); err != nil { + t.Fatalf("result not valid JSON: %v", err) + } + + user := entry["user"].(map[string]any) + if !strings.Contains(user["email"].(string), "[EMAIL]") { + t.Error("nested email not redacted") + } + + profile := user["profile"].(map[string]any) + if !strings.Contains(profile["url"].(string), "[INTERNAL_URL]") { + t.Error("nested URL not redacted") + } +} + +func TestShowcaseJSONL_MalformedFallback(t *testing.T) { + t.Parallel() + + input := `not valid json but has email: admin@company.com` + + cfg := DefaultShowcaseConfig() + result, err := ShowcaseJSONL([]byte(input), cfg) + if err != nil { + t.Fatalf("ShowcaseJSONL() error = %v", err) + } + + if !strings.Contains(string(result), "[EMAIL]") { + t.Error("malformed line not redacted via fallback") + } +} + +func TestShowcase_BoundaryConditions(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + config ShowcaseConfig + }{ + { + name: "empty string", + input: "", + config: DefaultShowcaseConfig(), + }, + { + name: "whitespace only", + input: " \n\t ", + config: DefaultShowcaseConfig(), + }, + { + name: "unicode content", + input: "用户邮箱:admin@company.com", + config: DefaultShowcaseConfig(), + }, + { + name: "already redacted", + input: "server at [PRIVATE_IP] with [REDACTED]", + config: DefaultShowcaseConfig(), + }, + { + name: "mixed patterns", + input: "Connect to 10.0.1.5:5432 (postgres://user:pass@10.0.1.5/db) or api.internal", + config: DefaultShowcaseConfig(), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + // Should not panic or error + result := Showcase(tt.input, tt.config) + if tt.name == "empty string" && result != "" { + t.Error("empty string should remain empty") + } + }) + } +} + +func TestShowcase_PreservesTechnicalTerms(t *testing.T) { + t.Parallel() + + input := ` +package main + +import ( + "database/sql" + "github.com/lib/pq" +) + +func connectDB() error { + // Connect to database + return nil +} +` + + cfg := DefaultShowcaseConfig() + result := Showcase(input, cfg) + + // Technical terms should be preserved + preservedTerms := []string{ + "package main", + "import", + "database/sql", + "github.com/lib/pq", + "func connectDB", + } + + for _, term := range preservedTerms { + if !strings.Contains(result, term) { + t.Errorf("technical term %q was incorrectly redacted", term) + } + } +} + +func TestShowcase_LayeredRedaction(t *testing.T) { + t.Parallel() + + // Test that layered redaction (entropy + showcase) works correctly + input := "API key: sk-proj-abcd1234, server: 10.0.1.5, email: admin@acme-corp.com" + + cfg := DefaultShowcaseConfig() + cfg.CustomBlocklist = []string{"acme-corp"} + + // First apply entropy-based redaction (simulated by replacing API key) + step1 := strings.Replace(input, "sk-proj-abcd1234", "[REDACTED]", 1) + + // Then apply showcase redaction + result := Showcase(step1, cfg) + + // Verify all redactions applied + if !strings.Contains(result, "[REDACTED]") { + t.Error("API key not redacted (entropy layer)") + } + if !strings.Contains(result, "[PRIVATE_IP]") { + t.Error("private IP not redacted (pattern layer)") + } + if !strings.Contains(result, "[EMAIL]") { + t.Error("email not redacted (pattern layer)") + } + + // Verify blocklist was applied + if strings.Contains(result, "acme-corp") { + t.Error("blocklist term not redacted") + } +} + +func TestGlobToRegex(t *testing.T) { + t.Parallel() + + tests := []struct { + glob string + input string + match bool + }{ + {"test*", "test123", true}, + {"test*", "testing", true}, + {"test*", "mytest", false}, // word boundary + {"*api", "internal-api", true}, + // Note: Go's regexp doesn't support lookahead, so "*api" will match "api-server" + // This is acceptable as it still prevents greedy matching across multiple tokens + {"*api", "api-server", true}, // matches due to word boundary (Go regex limitation) + {"test?", "test1", true}, + {"test?", "test12", false}, + {"test.com", "test.com", true}, + {"test.com", "testXcom", false}, // . should be literal + } + + for _, tt := range tests { + t.Run(tt.glob, func(t *testing.T) { + t.Parallel() + _ = globToRegex(tt.glob) // Test that globToRegex doesn't panic + // Test via Showcase with blocklist + cfg := DefaultShowcaseConfig() + cfg.CustomBlocklist = []string{tt.glob} + result := Showcase(tt.input, cfg) + + matched := strings.Contains(result, "[REDACTED]") + if matched != tt.match { + t.Errorf("glob %q, input %q: expected match=%v, got match=%v", tt.glob, tt.input, tt.match, matched) + } + }) + } +} + +func TestDefaultShowcaseConfig(t *testing.T) { + t.Parallel() + + cfg := DefaultShowcaseConfig() + + if !cfg.RedactPaths { + t.Error("default should redact paths") + } + if !cfg.RedactUsernames { + t.Error("default should redact usernames") + } + if !cfg.RedactProjectInfo { + t.Error("default should redact project info") + } + if len(cfg.AllowedPaths) == 0 { + t.Error("default should have some allowed paths") + } +} + +func BenchmarkShowcase(b *testing.B) { + input := `server at 10.0.1.5 is down, contact admin@company.com + postgres://user:pass@host:5432/db + JWT: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.test.test + path: /Users/alice/project/src/main.go` + + cfg := DefaultShowcaseConfig() + + b.ResetTimer() + for range b.N { + _ = Showcase(input, cfg) + } +} + +func BenchmarkShowcaseJSONL(b *testing.B) { + input := []byte(`{"type":"text","content":"server at 10.0.1.5"} +{"type":"text","content":"email admin@company.com"} +{"type":"code","content":"postgres://user:pass@host/db"} +{"metadata":{"path":"/Users/alice/project/main.go"}}`) + + cfg := DefaultShowcaseConfig() + + b.ResetTimer() + for range b.N { + _, _ = ShowcaseJSONL(input, cfg) + } +}