diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 46686947e1fc..057b5eb66abe 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -11,6 +11,7 @@ import { MessageTable, PartTable, SessionTable } from "./session.sql" import { ProviderError } from "@/provider" import { iife } from "@/util/iife" import { errorMessage } from "@/util/error" +import { isMedia } from "@/util/media" import type { SystemError } from "bun" import type { Provider } from "@/provider" import { ModelID, ProviderID } from "@/provider/schema" @@ -25,10 +26,7 @@ interface FetchDecompressionError extends Error { } export const SYNTHETIC_ATTACHMENT_PROMPT = "Attached image(s) from tool result:" - -export function isMedia(mime: string) { - return mime.startsWith("image/") || mime === "application/pdf" -} +export { isMedia } export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({})) export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() })) diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index 18c668ca0701..29d36692c667 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -1,7 +1,6 @@ import z from "zod" -import { Effect, Scope } from "effect" +import { Effect, Option, Scope } from "effect" import { createReadStream } from "fs" -import { open } from "fs/promises" import * as path from "path" import { createInterface } from "readline" import * as Tool from "./tool" @@ -11,12 +10,14 @@ import DESCRIPTION from "./read.txt" import { Instance } from "../project/instance" import { assertExternalDirectoryEffect } from "./external-directory" import { Instruction } from "../session/instruction" +import { isImageAttachment, isPdfAttachment, sniffAttachmentMime } from "@/util/media" const DEFAULT_READ_LIMIT = 2000 const MAX_LINE_LENGTH = 2000 const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)` const MAX_BYTES = 50 * 1024 const MAX_BYTES_LABEL = `${MAX_BYTES / 1024} KB` +const SAMPLE_BYTES = 4096 const parameters = z.object({ filePath: z.string().describe("The absolute path to the file or directory to read"), @@ -77,6 +78,64 @@ export const ReadTool = Tool.define( yield* lsp.touchFile(filepath, false).pipe(Effect.ignore, Effect.forkIn(scope)) }) + const readSample = Effect.fn("ReadTool.readSample")(function* (filepath: string, fileSize: number, sampleSize: number) { + if (fileSize === 0) return new Uint8Array() + + return yield* Effect.scoped( + Effect.gen(function* () { + const file = yield* fs.open(filepath, { flag: "r" }) + return Option.getOrElse(yield* file.readAlloc(Math.min(sampleSize, fileSize)), () => new Uint8Array()) + }), + ) + }) + + const isBinaryFile = (filepath: string, bytes: Uint8Array) => { + const ext = path.extname(filepath).toLowerCase() + switch (ext) { + case ".zip": + case ".tar": + case ".gz": + case ".exe": + case ".dll": + case ".so": + case ".class": + case ".jar": + case ".war": + case ".7z": + case ".doc": + case ".docx": + case ".xls": + case ".xlsx": + case ".ppt": + case ".pptx": + case ".odt": + case ".ods": + case ".odp": + case ".bin": + case ".dat": + case ".obj": + case ".o": + case ".a": + case ".lib": + case ".wasm": + case ".pyc": + case ".pyo": + return true + } + + if (bytes.length === 0) return false + + let nonPrintableCount = 0 + for (let i = 0; i < bytes.length; i++) { + if (bytes[i] === 0) return true + if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) { + nonPrintableCount++ + } + } + + return nonPrintableCount / bytes.length > 0.3 + } + const run = Effect.fn("ReadTool.execute")(function* (params: z.infer, ctx: Tool.Context) { if (params.offset !== undefined && params.offset < 1) { return yield* Effect.fail(new Error("offset must be greater than or equal to 1")) @@ -141,12 +200,12 @@ export const ReadTool = Tool.define( } const loaded = yield* instruction.resolve(ctx.messages, filepath, ctx.messageID) + const sample = yield* readSample(filepath, Number(stat.size), SAMPLE_BYTES) - const mime = AppFileSystem.mimeType(filepath) - const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet" - const isPdf = mime === "application/pdf" - if (isImage || isPdf) { - const msg = `${isImage ? "Image" : "PDF"} read successfully` + const mime = sniffAttachmentMime(sample, AppFileSystem.mimeType(filepath)) + if (isImageAttachment(mime) || isPdfAttachment(mime)) { + const bytes = yield* fs.readFile(filepath) + const msg = isPdfAttachment(mime) ? "PDF read successfully" : "Image read successfully" return { title, output: msg, @@ -159,13 +218,13 @@ export const ReadTool = Tool.define( { type: "file" as const, mime, - url: `data:${mime};base64,${Buffer.from(yield* fs.readFile(filepath)).toString("base64")}`, + url: `data:${mime};base64,${Buffer.from(bytes).toString("base64")}`, }, ], } } - if (yield* Effect.promise(() => isBinaryFile(filepath, Number(stat.size)))) { + if (isBinaryFile(filepath, sample)) { return yield* Effect.fail(new Error(`Cannot read binary file: ${filepath}`)) } @@ -261,63 +320,3 @@ async function lines(filepath: string, opts: { limit: number; offset: number }) return { raw, count, cut, more, offset: opts.offset } } - -async function isBinaryFile(filepath: string, fileSize: number): Promise { - const ext = path.extname(filepath).toLowerCase() - // binary check for common non-text extensions - switch (ext) { - case ".zip": - case ".tar": - case ".gz": - case ".exe": - case ".dll": - case ".so": - case ".class": - case ".jar": - case ".war": - case ".7z": - case ".doc": - case ".docx": - case ".xls": - case ".xlsx": - case ".ppt": - case ".pptx": - case ".odt": - case ".ods": - case ".odp": - case ".bin": - case ".dat": - case ".obj": - case ".o": - case ".a": - case ".lib": - case ".wasm": - case ".pyc": - case ".pyo": - return true - default: - break - } - - if (fileSize === 0) return false - - const fh = await open(filepath, "r") - try { - const sampleSize = Math.min(4096, fileSize) - const bytes = Buffer.alloc(sampleSize) - const result = await fh.read(bytes, 0, sampleSize, 0) - if (result.bytesRead === 0) return false - - let nonPrintableCount = 0 - for (let i = 0; i < result.bytesRead; i++) { - if (bytes[i] === 0) return true - if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) { - nonPrintableCount++ - } - } - // If >30% non-printable characters, consider it binary - return nonPrintableCount / result.bytesRead > 0.3 - } finally { - await fh.close() - } -} diff --git a/packages/opencode/src/tool/webfetch.ts b/packages/opencode/src/tool/webfetch.ts index 6498b871f83a..1d988b8d4f2b 100644 --- a/packages/opencode/src/tool/webfetch.ts +++ b/packages/opencode/src/tool/webfetch.ts @@ -4,6 +4,7 @@ import { HttpClient, HttpClientRequest } from "effect/unstable/http" import * as Tool from "./tool" import TurndownService from "turndown" import DESCRIPTION from "./webfetch.txt" +import { isImageAttachment } from "@/util/media" const MAX_RESPONSE_SIZE = 5 * 1024 * 1024 // 5MB const DEFAULT_TIMEOUT = 30 * 1000 // 30 seconds @@ -104,10 +105,7 @@ export const WebFetchTool = Tool.define( const mime = contentType.split(";")[0]?.trim().toLowerCase() || "" const title = `${params.url} (${contentType})` - // Check if response is an image - const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet" - - if (isImage) { + if (isImageAttachment(mime)) { const base64Content = Buffer.from(arrayBuffer).toString("base64") return { title, diff --git a/packages/opencode/src/util/media.ts b/packages/opencode/src/util/media.ts new file mode 100644 index 000000000000..0e98f53a529c --- /dev/null +++ b/packages/opencode/src/util/media.ts @@ -0,0 +1,29 @@ +const startsWith = (bytes: Uint8Array, prefix: number[]) => prefix.every((value, index) => bytes[index] === value) + +export function isPdfAttachment(mime: string) { + return mime === "application/pdf" +} + +export function isMedia(mime: string) { + return mime.startsWith("image/") || isPdfAttachment(mime) +} + +export function isImageAttachment(mime: string) { + return mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet" +} + +export function sniffAttachmentMime(bytes: Uint8Array, fallback: string) { + if (startsWith(bytes, [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])) return "image/png" + if (startsWith(bytes, [0xff, 0xd8, 0xff])) return "image/jpeg" + if (startsWith(bytes, [0x47, 0x49, 0x46, 0x38])) return "image/gif" + if (startsWith(bytes, [0x42, 0x4d])) return "image/bmp" + if (startsWith(bytes, [0x25, 0x50, 0x44, 0x46, 0x2d])) return "application/pdf" + if ( + startsWith(bytes, [0x52, 0x49, 0x46, 0x46]) && + startsWith(bytes.subarray(8), [0x57, 0x45, 0x42, 0x50]) + ) { + return "image/webp" + } + + return fallback +} diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 6d4e994a8791..55ae65c56029 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from "bun:test" import { APICallError } from "ai" import { MessageV2 } from "../../src/session/message-v2" +import { ProviderTransform } from "../../src/provider" import type { Provider } from "../../src/provider" import { ModelID, ProviderID } from "../../src/provider/schema" import { SessionID, MessageID, PartID } from "../../src/session/schema" @@ -359,6 +360,89 @@ describe("session.message-v2.toModelMessage", () => { ]) }) + test("preserves jpeg tool-result media for anthropic models", async () => { + const anthropicModel: Provider.Model = { + ...model, + id: ModelID.make("anthropic/claude-opus-4-7"), + providerID: ProviderID.make("anthropic"), + api: { + id: "claude-opus-4-7-20250805", + url: "https://api.anthropic.com", + npm: "@ai-sdk/anthropic", + }, + capabilities: { + ...model.capabilities, + attachment: true, + input: { + ...model.capabilities.input, + image: true, + pdf: true, + }, + }, + } + const jpeg = Buffer.from([0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01]).toString( + "base64", + ) + const userID = "m-user-anthropic" + const assistantID = "m-assistant-anthropic" + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [ + { + ...basePart(userID, "u1-anthropic"), + type: "text", + text: "run tool", + }, + ] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID), + parts: [ + { + ...basePart(assistantID, "a1-anthropic"), + type: "tool", + callID: "call-anthropic-1", + tool: "read", + state: { + status: "completed", + input: { filePath: "/tmp/rails-demo.png" }, + output: "Image read successfully", + title: "Read", + metadata: {}, + time: { start: 0, end: 1 }, + attachments: [ + { + ...basePart(assistantID, "file-anthropic-1"), + type: "file", + mime: "image/jpeg", + filename: "rails-demo.png", + url: `data:image/jpeg;base64,${jpeg}`, + }, + ], + }, + }, + ] as MessageV2.Part[], + }, + ] + + const result = ProviderTransform.message(await MessageV2.toModelMessages(input, anthropicModel), anthropicModel, {}) + expect(result).toHaveLength(3) + expect(result[2].role).toBe("tool") + expect(result[2].content[0]).toMatchObject({ + type: "tool-result", + toolCallId: "call-anthropic-1", + toolName: "read", + output: { + type: "content", + value: [ + { type: "text", text: "Image read successfully" }, + { type: "media", mediaType: "image/jpeg", data: jpeg }, + ], + }, + }) + }) + test("omits provider metadata when assistant model differs", async () => { const userID = "m-user" const assistantID = "m-assistant" diff --git a/packages/opencode/test/tool/read.test.ts b/packages/opencode/test/tool/read.test.ts index 7456990ad0ee..42817d15dfa4 100644 --- a/packages/opencode/test/tool/read.test.ts +++ b/packages/opencode/test/tool/read.test.ts @@ -394,6 +394,19 @@ describe("tool.read truncation", () => { }), ) + it.live("detects attachment media from file contents", () => + Effect.gen(function* () { + const dir = yield* tmpdirScoped() + const jpeg = Buffer.from([0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 0x4a, 0x46, 0x49, 0x46, 0x00, 0x01]) + yield* put(path.join(dir, "image.bin"), jpeg) + + const result = yield* exec(dir, { filePath: path.join(dir, "image.bin") }) + expect(result.output).toBe("Image read successfully") + expect(result.attachments?.[0].mime).toBe("image/jpeg") + expect(result.attachments?.[0].url.startsWith("data:image/jpeg;base64,")).toBe(true) + }), + ) + it.live("large image files are properly attached without error", () => Effect.gen(function* () { const result = yield* exec(FIXTURES_DIR, { filePath: path.join(FIXTURES_DIR, "large-image.png") })