🤖 feat: add model selection to /compact command (#275)

ammar-agent · web-flow · commit 9ed91d683b77 · 2025-10-16T03:38:52.000Z
## Overview

Adds support for choosing the compaction model via `/compact -m &lt;model&gt;`
flag. Reuses existing model abbreviation infrastructure for consistency
and shows the model name in the streaming barrier during compaction.

## Changes

- **Command Parser**: Added `-m` flag parsing to `/compact` command with
model abbreviation resolution
- **Type System**: Extended `ParsedCommand` to include optional `model`
field for compact command
- **Message Preparation**: Updated `prepareCompactionMessage()` to use
custom model when specified
- **UI Display**: Updated `StreamingBarrier` to show model name during
compaction (e.g., "claude-haiku-4-5 compacting...")
- **Tests**: Added 7 new tests for `-m` flag, updated 27 existing tests

## Usage

```bash
# Use haiku for fast compaction
/compact -m haiku

# Use opus for detailed summary with token limit
/compact -m opus -t 8000

# Use full model string
/compact -m anthropic:claude-opus-4-1

# Combine with continue message
/compact -m sonnet -t 5000
Continue implementing the auth system

# Default behavior (uses current workspace model)
/compact
```

## Design Notes

- **Reuses model abbreviations**: Leverages existing
`MODEL_ABBREVIATIONS` map for consistency with `/model` command
- **Optional override**: Model parameter defaults to workspace model
when not specified
- **UI consistency**: Compaction barrier now matches streaming barrier
format with model name prefix
- **Robust parsing**: Handles edge cases (empty flag, unknown
abbreviations, etc.)

## Testing

✅ All 34 slash command tests passing  
✅ All 547 unit tests passing  
✅ TypeScript compilation successful  
✅ ESLint checks passing

_Generated with `cmux`_
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,7 +32,8 @@ jobs:
 
       - name: Install and setup shfmt
         run: |
-          if [[ ! -f "$HOME/.local/bin/shfmt" ]]; then
+          # Install shfmt if not cached or if cached binary is broken
+          if [[ ! -f "$HOME/.local/bin/shfmt" ]] || ! "$HOME/.local/bin/shfmt" --version >/dev/null 2>&1; then
             curl -sS https://webinstall.dev/shfmt | bash
           fi
           echo "$HOME/.local/bin" >> $GITHUB_PATH
diff --git a/docs/context-management.md b/docs/context-management.md
@@ -47,13 +47,14 @@ Compress conversation history using AI summarization. Replaces the conversation
 ### Syntax
 
 ```
-/compact [-t <tokens>]
+/compact [-t <tokens>] [-m <model>]
 [continue message on subsequent lines]
 ```
 
 ### Options
 
 - `-t <tokens>` - Maximum output tokens for the summary (default: ~2000 words)
+- `-m <model>` - Model to use for compaction (default: workspace model). Supports abbreviations like `haiku`, `sonnet`, or full model strings
 
 ### Examples
 
@@ -69,6 +70,14 @@ Compress conversation history using AI summarization. Replaces the conversation
 /compact -t 5000
 ```
 
+**Choose compaction model:**
+
+```
+/compact -m haiku
+```
+
+Use Haiku for faster, lower-cost compaction.
+
 **Auto-continue with custom message:**
 
 ```
@@ -88,16 +97,18 @@ Make sure to add tests for the error cases.
 
 Continue messages can span multiple lines for more detailed instructions.
 
-**Combine token limit and auto-continue:**
+**Combine all options:**
 
 ```
-/compact -t 3000
+/compact -m haiku -t 8000
 Keep working on the feature
 ```
 
+Combine custom model, token limit, and auto-continue message.
+
 ### Notes
 
-- Uses the selected LLM to summarize conversation history
+- Uses the specified model (or workspace model by default) to summarize conversation history
 - Preserves actionable context and specific details
 - **Irreversible** - original messages are replaced
 - Continue message is sent once after compaction completes (not persisted)
diff --git a/src/components/AIView.tsx b/src/components/AIView.tsx
@@ -495,7 +495,9 @@ const AIViewInner: React.FC<AIViewProps> = ({
                 <StreamingBarrier
                   statusText={
                     isCompacting
-                      ? "compacting..."
+                      ? currentModel
+                        ? `${getModelName(currentModel)} compacting...`
+                        : "compacting..."
                       : currentModel
                         ? `${getModelName(currentModel)} streaming...`
                         : "streaming..."
diff --git a/src/components/ChatInput.tsx b/src/components/ChatInput.tsx
@@ -26,8 +26,9 @@ import { VimTextArea } from "./VimTextArea";
 import { ImageAttachments, type ImageAttachment } from "./ImageAttachments";
 
 import type { ThinkingLevel } from "@/types/thinking";
-import type { CmuxFrontendMetadata } from "@/types/message";
+import type { CmuxFrontendMetadata, CompactionRequestData } from "@/types/message";
 import type { SendMessageOptions } from "@/types/ipc";
+import { applyCompactionOverrides } from "@/utils/messages/compactionOptions";
 
 const InputSection = styled.div`
   position: relative;
@@ -304,22 +305,22 @@ function prepareCompactionMessage(
 
   const messageText = `Summarize this conversation into a compact form for a new Assistant to continue helping the user. Use approximately ${targetWords} words.`;
 
+  // Create compaction metadata (will be stored in user message)
+  const compactData: CompactionRequestData = {
+    model: parsed.model,
+    maxOutputTokens: parsed.maxOutputTokens,
+    continueMessage: parsed.continueMessage,
+  };
+
   const metadata: CmuxFrontendMetadata = {
     type: "compaction-request",
     rawCommand: command,
-    parsed: {
-      maxOutputTokens: parsed.maxOutputTokens,
-      continueMessage: parsed.continueMessage,
-    },
+    parsed: compactData,
   };
 
-  const isAnthropic = sendMessageOptions.model.startsWith("anthropic:");
-  const options: Partial<SendMessageOptions> = {
-    thinkingLevel: isAnthropic ? "off" : sendMessageOptions.thinkingLevel,
-    toolPolicy: [{ regex_match: "compact_summary", action: "require" }],
-    maxOutputTokens: parsed.maxOutputTokens,
-    mode: "compact" as const,
-  };
+  // Apply compaction overrides using shared transformation function
+  // This same function is used by useResumeManager to ensure consistency
+  const options = applyCompactionOverrides(sendMessageOptions, compactData);
 
   return { messageText, metadata, options };
 }
diff --git a/src/hooks/useResumeManager.ts b/src/hooks/useResumeManager.ts
@@ -5,6 +5,7 @@ import { getAutoRetryKey, getRetryStateKey } from "@/constants/storage";
 import { getSendOptionsFromStorage } from "@/utils/messages/sendOptions";
 import { readPersistedState } from "./usePersistedState";
 import { hasInterruptedStream } from "@/utils/messages/retryEligibility";
+import { applyCompactionOverrides } from "@/utils/messages/compactionOptions";
 
 interface RetryState {
   attempt: number;
@@ -139,7 +140,20 @@ export function useResumeManager() {
     const { attempt } = retryState;
 
     try {
-      const options = getSendOptionsFromStorage(workspaceId);
+      // Start with workspace defaults
+      let options = getSendOptionsFromStorage(workspaceId);
+
+      // Check if last user message was a compaction request
+      const state = workspaceStatesRef.current.get(workspaceId);
+      if (state) {
+        const lastUserMsg = [...state.messages].reverse().find((msg) => msg.type === "user");
+        if (lastUserMsg?.compactionRequest) {
+          // Apply compaction overrides using shared function (same as ChatInput)
+          // This ensures custom model/tokens are preserved across resume
+          options = applyCompactionOverrides(options, lastUserMsg.compactionRequest.parsed);
+        }
+      }
+
       const result = await window.api.workspace.resumeStream(workspaceId, options);
 
       if (!result.success) {
diff --git a/src/services/agentSession.ts b/src/services/agentSession.ts
@@ -12,6 +12,7 @@ import type { SendMessageError } from "@/types/errors";
 import { createUnknownSendMessageError } from "@/services/utils/sendMessageError";
 import type { Result } from "@/types/result";
 import { Ok, Err } from "@/types/result";
+import { enforceThinkingPolicy } from "@/utils/thinking/policy";
 
 interface ImagePart {
   image: string;
@@ -297,11 +298,17 @@ export class AgentSession {
       return Err(createUnknownSendMessageError(historyResult.error));
     }
 
+    // Enforce thinking policy for the specified model (single source of truth)
+    // This ensures model-specific requirements are met regardless of where the request originates
+    const effectiveThinkingLevel = options?.thinkingLevel
+      ? enforceThinkingPolicy(modelString, options.thinkingLevel)
+      : undefined;
+
     const streamResult = await this.aiService.streamMessage(
       historyResult.data,
       this.workspaceId,
       modelString,
-      options?.thinkingLevel,
+      effectiveThinkingLevel,
       options?.toolPolicy,
       undefined,
       options?.additionalSystemInstructions,
diff --git a/src/types/message.ts b/src/types/message.ts
@@ -3,16 +3,20 @@ import type { LanguageModelV2Usage } from "@ai-sdk/provider";
 import type { StreamErrorType } from "./errors";
 import type { ToolPolicy } from "@/utils/tools/toolPolicy";
 
+// Parsed compaction request data (shared type for consistency)
+export interface CompactionRequestData {
+  model?: string; // Custom model override for compaction
+  maxOutputTokens?: number;
+  continueMessage?: string;
+}
+
 // Frontend-specific metadata stored in cmuxMetadata field
 // Backend stores this as-is without interpretation (black-box)
 export type CmuxFrontendMetadata =
   | {
       type: "compaction-request";
       rawCommand: string; // The original /compact command as typed by user (for display)
-      parsed: {
-        maxOutputTokens?: number;
-        continueMessage?: string;
-      };
+      parsed: CompactionRequestData;
     }
   | {
       type: "compaction-result";
diff --git a/src/utils/messages/compactionOptions.test.ts b/src/utils/messages/compactionOptions.test.ts
@@ -0,0 +1,81 @@
+/**
+ * Tests for compaction options transformation
+ */
+
+import { applyCompactionOverrides } from "./compactionOptions";
+import type { SendMessageOptions } from "@/types/ipc";
+import type { CompactionRequestData } from "@/types/message";
+
+describe("applyCompactionOverrides", () => {
+  const baseOptions: SendMessageOptions = {
+    model: "anthropic:claude-sonnet-4-5",
+    thinkingLevel: "medium",
+    toolPolicy: [],
+    mode: "exec",
+  };
+
+  it("uses workspace model when no override specified", () => {
+    const compactData: CompactionRequestData = {};
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.model).toBe("anthropic:claude-sonnet-4-5");
+    expect(result.mode).toBe("compact");
+  });
+
+  it("applies custom model override", () => {
+    const compactData: CompactionRequestData = {
+      model: "anthropic:claude-haiku-4-5",
+    };
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.model).toBe("anthropic:claude-haiku-4-5");
+  });
+
+  it("sets thinking to off for Anthropic models", () => {
+    const compactData: CompactionRequestData = {
+      model: "anthropic:claude-haiku-4-5",
+    };
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.thinkingLevel).toBe("off");
+  });
+
+  it("preserves workspace thinking level for non-Anthropic models", () => {
+    const compactData: CompactionRequestData = {
+      model: "openai:gpt-5-pro",
+    };
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.thinkingLevel).toBe("medium");
+  });
+
+  it("applies maxOutputTokens override", () => {
+    const compactData: CompactionRequestData = {
+      maxOutputTokens: 8000,
+    };
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.maxOutputTokens).toBe(8000);
+  });
+
+  it("sets compact mode and tool policy", () => {
+    const compactData: CompactionRequestData = {};
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.mode).toBe("compact");
+    expect(result.toolPolicy).toEqual([{ regex_match: "compact_summary", action: "require" }]);
+  });
+
+  it("applies all overrides together", () => {
+    const compactData: CompactionRequestData = {
+      model: "openai:gpt-5",
+      maxOutputTokens: 5000,
+    };
+    const result = applyCompactionOverrides(baseOptions, compactData);
+
+    expect(result.model).toBe("openai:gpt-5");
+    expect(result.maxOutputTokens).toBe(5000);
+    expect(result.mode).toBe("compact");
+    expect(result.thinkingLevel).toBe("medium"); // Non-Anthropic preserves original
+  });
+});
diff --git a/src/utils/messages/compactionOptions.ts b/src/utils/messages/compactionOptions.ts
@@ -0,0 +1,41 @@
+/**
+ * Compaction options transformation
+ *
+ * Single source of truth for converting compaction metadata into SendMessageOptions.
+ * Used by both ChatInput (initial send) and useResumeManager (resume after interruption).
+ */
+
+import type { SendMessageOptions } from "@/types/ipc";
+import type { CompactionRequestData } from "@/types/message";
+
+/**
+ * Apply compaction-specific option overrides to base options.
+ *
+ * This function is the single source of truth for how compaction metadata
+ * transforms workspace defaults. Both initial sends and stream resumption
+ * use this function to ensure consistent behavior.
+ *
+ * @param baseOptions - Workspace default options (from localStorage or useSendMessageOptions)
+ * @param compactData - Compaction request metadata from /compact command
+ * @returns Final SendMessageOptions with compaction overrides applied
+ */
+export function applyCompactionOverrides(
+  baseOptions: SendMessageOptions,
+  compactData: CompactionRequestData
+): SendMessageOptions {
+  // Use custom model if specified, otherwise use workspace default
+  const compactionModel = compactData.model ?? baseOptions.model;
+
+  // Anthropic models don't support thinking, always use "off"
+  // Non-Anthropic models keep workspace default (backend will enforce policy)
+  const isAnthropic = compactionModel.startsWith("anthropic:");
+
+  return {
+    ...baseOptions,
+    model: compactionModel,
+    thinkingLevel: isAnthropic ? "off" : baseOptions.thinkingLevel,
+    toolPolicy: [{ regex_match: "compact_summary", action: "require" }],
+    maxOutputTokens: compactData.maxOutputTokens,
+    mode: "compact" as const,
+  };
+}
diff --git a/src/utils/slashCommands/compact.test.ts b/src/utils/slashCommands/compact.test.ts
diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts
diff --git a/src/utils/slashCommands/types.ts b/src/utils/slashCommands/types.ts