From b01ae136de606c6424c5ca0917d60bd1ab29a7e7 Mon Sep 17 00:00:00 2001 From: Christopher Turing Date: Sat, 30 May 2026 11:16:08 -0700 Subject: [PATCH] fix claude modified thinking resume recovery Co-Authored-By: Paperclip --- .../src/server/execute.remote.test.ts | 79 +++++++++++++++++++ .../claude-local/src/server/execute.ts | 5 +- .../adapters/claude-local/src/server/index.ts | 1 + .../claude-local/src/server/parse.test.ts | 27 +++++++ .../adapters/claude-local/src/server/parse.ts | 12 +++ 5 files changed, 122 insertions(+), 2 deletions(-) diff --git a/packages/adapters/claude-local/src/server/execute.remote.test.ts b/packages/adapters/claude-local/src/server/execute.remote.test.ts index ddc3959ad74..7f5cd786e3d 100644 --- a/packages/adapters/claude-local/src/server/execute.remote.test.ts +++ b/packages/adapters/claude-local/src/server/execute.remote.test.ts @@ -328,4 +328,83 @@ describe("claude remote execution", () => { expect(call?.[2]).toContain("session-123"); }); + it("retries modified-thinking resume failures with a fresh Claude session", async () => { + const rootDir = await mkdtemp(path.join(os.tmpdir(), "paperclip-claude-modified-thinking-")); + cleanupDirs.push(rootDir); + const workspaceDir = path.join(rootDir, "workspace"); + await mkdir(workspaceDir, { recursive: true }); + const modifiedThinkingMessage = + "API Error: 400 messages.N.content.M: `thinking` or `redacted_thinking` blocks in the latest assistant message cannot be modified. These blocks must remain as they were in the original response."; + + runChildProcess + .mockResolvedValueOnce({ + exitCode: 1, + signal: null, + timedOut: false, + stdout: JSON.stringify({ + type: "result", + subtype: "error_during_execution", + is_error: true, + result: modifiedThinkingMessage, + errors: [{ message: modifiedThinkingMessage }], + }), + stderr: modifiedThinkingMessage, + pid: 123, + startedAt: new Date().toISOString(), + }) + .mockResolvedValueOnce({ + exitCode: 0, + signal: null, + timedOut: false, + stdout: [ + JSON.stringify({ type: "system", subtype: "init", session_id: "fresh-session", model: "claude-sonnet" }), + JSON.stringify({ + type: "result", + session_id: "fresh-session", + result: "recovered", + usage: { input_tokens: 1, cache_read_input_tokens: 0, output_tokens: 1 }, + }), + ].join("\n"), + stderr: "", + pid: 124, + startedAt: new Date().toISOString(), + }); + + const result = await execute({ + runId: "run-modified-thinking", + agent: { + id: "agent-1", + companyId: "company-1", + name: "Claude Coder", + adapterType: "claude_local", + adapterConfig: {}, + }, + runtime: { + sessionId: "poisoned-session", + sessionParams: null, + sessionDisplayId: "poisoned-session", + taskKey: null, + }, + config: { + command: "claude", + }, + context: { + paperclipWorkspace: { + cwd: workspaceDir, + source: "project_primary", + }, + }, + onLog: async () => {}, + }); + + expect(runChildProcess).toHaveBeenCalledTimes(2); + const initialCall = runChildProcess.mock.calls[0] as unknown as [string, string, string[]] | undefined; + const retryCall = runChildProcess.mock.calls[1] as unknown as [string, string, string[]] | undefined; + expect(initialCall?.[2]).toContain("--resume"); + expect(initialCall?.[2]).toContain("poisoned-session"); + expect(retryCall?.[2]).not.toContain("--resume"); + expect(result.sessionId).toBe("fresh-session"); + expect(result.clearSession).toBe(false); + }); + }); diff --git a/packages/adapters/claude-local/src/server/execute.ts b/packages/adapters/claude-local/src/server/execute.ts index 067f68cdbce..c0732d5e832 100644 --- a/packages/adapters/claude-local/src/server/execute.ts +++ b/packages/adapters/claude-local/src/server/execute.ts @@ -51,6 +51,7 @@ import { describeClaudeFailure, detectClaudeLoginRequired, extractClaudeRetryNotBefore, + isClaudeModifiedThinkingReplayError, isClaudeMaxTurnsResult, isClaudeTransientUpstreamError, isClaudeUnknownSessionError, @@ -948,11 +949,11 @@ export async function execute(ctx: AdapterExecutionContext): Promise { + it("detects Anthropic's backtick-wrapped modified thinking resume failure", () => { + const message = + "API Error: 400 messages.N.content.M: `thinking` or `redacted_thinking` blocks in the latest assistant message cannot be modified. These blocks must remain as they were in the original response."; + + expect( + isClaudeModifiedThinkingReplayError({ + result: message, + }), + ).toBe(true); + expect( + isClaudeModifiedThinkingReplayError({ + errors: [{ message }], + }), + ).toBe(true); + }); + + it("does not classify unrelated validation errors as modified thinking replay failures", () => { + expect( + isClaudeModifiedThinkingReplayError({ + result: "API Error: 400 messages.0.content.0.text is required.", + }), + ).toBe(false); + }); +}); + describe("isClaudeTransientUpstreamError", () => { it("classifies the 'out of extra usage' subscription window failure as transient", () => { expect( diff --git a/packages/adapters/claude-local/src/server/parse.ts b/packages/adapters/claude-local/src/server/parse.ts index f645c4f27aa..203d7124766 100644 --- a/packages/adapters/claude-local/src/server/parse.ts +++ b/packages/adapters/claude-local/src/server/parse.ts @@ -196,6 +196,18 @@ export function isClaudeUnknownSessionError(parsed: Record): bo ); } +const CLAUDE_MODIFIED_THINKING_REPLAY_PATTERN = + /[`'"]?(?:thinking|redacted_thinking)[`'"]?(?:\s+or\s+[`'"]?(?:thinking|redacted_thinking)[`'"]?)?\s+blocks?\s+in\s+the\s+latest\s+assistant\s+message\s+cannot\s+be\s+modified/i; + +export function isClaudeModifiedThinkingReplayError(parsed: Record): boolean { + const resultText = asString(parsed.result, "").trim(); + const allMessages = [resultText, ...extractClaudeErrorMessages(parsed)] + .map((msg) => msg.trim()) + .filter(Boolean); + + return allMessages.some((msg) => CLAUDE_MODIFIED_THINKING_REPLAY_PATTERN.test(msg)); +} + function buildClaudeTransientHaystack(input: { parsed?: Record | null; stdout?: string | null;