Skip to content

Commit dc478d1

Browse files
committed
fix: ensure mediaType is present in MCP image transformation
- Add fallback to 'image/png' when MCP mimeType is undefined - JSON.stringify omits undefined values, which caused mediaType to be missing from tool results, breaking image rendering in Anthropic provider - Add debug logging to trace MCP image content transformation - Add integration test verifying Chrome DevTools MCP screenshot handling
1 parent 9924fff commit dc478d1

File tree

2 files changed

+151
-1
lines changed

2 files changed

+151
-1
lines changed

src/node/services/mcpServerManager.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,25 @@ function transformMCPResult(result: MCPCallToolResult): unknown {
6464
return result;
6565
}
6666

67+
// Debug: log what we received from MCP
68+
log.debug("[MCP] transformMCPResult input", {
69+
contentTypes: result.content.map((c) => c.type),
70+
imageItems: result.content
71+
.filter((c): c is MCPImageContent => c.type === "image")
72+
.map((c) => ({ type: c.type, mimeType: c.mimeType, dataLen: c.data?.length })),
73+
});
74+
6775
// Transform to AI SDK content format
6876
const transformedContent: AISDKContentPart[] = result.content.map((item) => {
6977
if (item.type === "text") {
7078
return { type: "text" as const, text: item.text };
7179
}
7280
if (item.type === "image") {
73-
return { type: "media" as const, data: item.data, mediaType: item.mimeType };
81+
const imageItem = item;
82+
// Ensure mediaType is present - default to image/png if missing
83+
const mediaType = imageItem.mimeType || "image/png";
84+
log.debug("[MCP] Transforming image content", { mimeType: imageItem.mimeType, mediaType });
85+
return { type: "media" as const, data: imageItem.data, mediaType };
7486
}
7587
// For resource type, convert to text representation
7688
if (item.type === "resource") {

tests/ipc/mcpConfig.test.ts

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,144 @@ describeIntegration("MCP project configuration", () => {
7575
});
7676

7777
describeIntegration("MCP server integration with model", () => {
78+
test.concurrent(
79+
"MCP image content is correctly transformed to AI SDK format",
80+
async () => {
81+
console.log("[MCP Image Test] Setting up workspace...");
82+
// Setup workspace with Anthropic provider
83+
const { env, workspaceId, tempGitRepo, cleanup } = await setupWorkspace(
84+
"anthropic",
85+
"mcp-chrome"
86+
);
87+
const client = resolveOrpcClient(env);
88+
console.log("[MCP Image Test] Workspace created:", { workspaceId, tempGitRepo });
89+
90+
try {
91+
// Add the Chrome DevTools MCP server to the project
92+
// Use --headless and --no-sandbox for CI/root environments
93+
console.log("[MCP Image Test] Adding Chrome DevTools MCP server...");
94+
const addResult = await client.projects.mcp.add({
95+
projectPath: tempGitRepo,
96+
name: "chrome",
97+
command:
98+
"npx -y chrome-devtools-mcp@latest --headless --isolated --chromeArg='--no-sandbox'",
99+
});
100+
expect(addResult.success).toBe(true);
101+
console.log("[MCP Image Test] MCP server added");
102+
103+
// Create stream collector to capture events
104+
console.log("[MCP Image Test] Creating stream collector...");
105+
const collector = createStreamCollector(env.orpc, workspaceId);
106+
collector.start();
107+
await collector.waitForSubscription();
108+
console.log("[MCP Image Test] Stream collector ready");
109+
110+
// Send a message that should trigger screenshot
111+
// First navigate to a simple page, then take a screenshot
112+
console.log("[MCP Image Test] Sending message...");
113+
const result = await sendMessageWithModel(
114+
env,
115+
workspaceId,
116+
"Navigate to https://example.com and take a screenshot. Describe what you see in the screenshot.",
117+
HAIKU_MODEL
118+
);
119+
console.log("[MCP Image Test] Message sent, result:", result.success);
120+
121+
expect(result.success).toBe(true);
122+
123+
// Wait for stream to complete (this may take a while with Chrome)
124+
console.log("[MCP Image Test] Waiting for stream-end...");
125+
await collector.waitForEvent("stream-end", 120000); // 2 minutes for Chrome operations
126+
console.log("[MCP Image Test] Stream ended");
127+
assertStreamSuccess(collector);
128+
129+
// Find the screenshot tool call and its result
130+
const events = collector.getEvents();
131+
const toolCallEnds = events.filter(
132+
(e): e is Extract<typeof e, { type: "tool-call-end" }> => e.type === "tool-call-end"
133+
);
134+
console.log(
135+
"[MCP Image Test] Tool call ends:",
136+
toolCallEnds.map((e) => ({ toolName: e.toolName, resultType: typeof e.result }))
137+
);
138+
139+
// Find the screenshot tool result
140+
const screenshotResult = toolCallEnds.find((e) => e.toolName === "take_screenshot");
141+
expect(screenshotResult).toBeDefined();
142+
143+
// Verify the result has correct AI SDK format with mediaType
144+
const result_output = screenshotResult!.result as
145+
| { type: string; value: unknown[] }
146+
| unknown;
147+
// Log media items to verify mediaType presence
148+
if (
149+
typeof result_output === "object" &&
150+
result_output !== null &&
151+
"value" in result_output
152+
) {
153+
const value = (result_output as { value: unknown[] }).value;
154+
const mediaPreview = value
155+
.filter(
156+
(v): v is object =>
157+
typeof v === "object" &&
158+
v !== null &&
159+
"type" in v &&
160+
(v as { type: string }).type === "media"
161+
)
162+
.map((m) => ({
163+
type: (m as { type: string }).type,
164+
mediaType: (m as { mediaType?: string }).mediaType,
165+
dataLen: ((m as { data?: string }).data || "").length,
166+
}));
167+
console.log("[MCP Image Test] Media items:", JSON.stringify(mediaPreview));
168+
}
169+
170+
// If it's properly transformed, it should have { type: "content", value: [...] }
171+
if (
172+
typeof result_output === "object" &&
173+
result_output !== null &&
174+
"type" in result_output
175+
) {
176+
const typedResult = result_output as { type: string; value: unknown[] };
177+
expect(typedResult.type).toBe("content");
178+
expect(Array.isArray(typedResult.value)).toBe(true);
179+
180+
// Check for media content with mediaType
181+
const mediaItems = typedResult.value.filter(
182+
(item): item is { type: "media"; data: string; mediaType: string } =>
183+
typeof item === "object" &&
184+
item !== null &&
185+
"type" in item &&
186+
(item as { type: string }).type === "media"
187+
);
188+
189+
expect(mediaItems.length).toBeGreaterThan(0);
190+
// Verify mediaType is present and is a valid image type
191+
for (const media of mediaItems) {
192+
expect(media.mediaType).toBeDefined();
193+
expect(media.mediaType).toMatch(/^image\//);
194+
expect(media.data).toBeDefined();
195+
expect(media.data.length).toBeGreaterThan(100); // Should have actual image data
196+
}
197+
}
198+
199+
// Verify model's response mentions seeing something (proves it understood the image)
200+
const deltas = collector.getDeltas();
201+
const responseText = extractTextFromEvents(deltas).toLowerCase();
202+
console.log("[MCP Image Test] Response text preview:", responseText.slice(0, 200));
203+
// Model should describe something it sees - domain name, content, or visual elements
204+
expect(responseText).toMatch(/example|domain|website|page|text|heading|title/i);
205+
206+
collector.stop();
207+
} finally {
208+
console.log("[MCP Image Test] Cleaning up...");
209+
await cleanup();
210+
console.log("[MCP Image Test] Done");
211+
}
212+
},
213+
180000 // 3 minutes - Chrome operations can be slow
214+
);
215+
78216
test.concurrent(
79217
"MCP tools are available to the model",
80218
async () => {

0 commit comments

Comments
 (0)