diff --git a/packages/opencode/src/provider/models.ts b/packages/opencode/src/provider/models.ts index 514203e9..97310dd1 100644 --- a/packages/opencode/src/provider/models.ts +++ b/packages/opencode/src/provider/models.ts @@ -28,6 +28,12 @@ export namespace ModelsDev { context: z.number(), output: z.number(), }), + modalities: z + .object({ + input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])), + output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])), + }) + .optional(), experimental: z.boolean().optional(), options: z.record(z.string(), z.any()), provider: z.object({ npm: z.string() }).optional(), diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index c18bc489..e0fe4be2 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -279,6 +279,11 @@ export namespace Provider { context: 0, output: 0, }, + modalities: model.modalities ?? + existing?.modalities ?? { + input: ["text"], + output: ["text"], + }, provider: model.provider ?? existing?.provider, } parsed.models[modelID] = parsedModel diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 7704a615..8dc059ca 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -17,71 +17,6 @@ export namespace MessageV2 { }), ) - export const ToolStatePending = z - .object({ - status: z.literal("pending"), - }) - .meta({ - ref: "ToolStatePending", - }) - - export type ToolStatePending = z.infer - - export const ToolStateRunning = z - .object({ - status: z.literal("running"), - input: z.any(), - title: z.string().optional(), - metadata: z.record(z.string(), z.any()).optional(), - time: z.object({ - start: z.number(), - }), - }) - .meta({ - ref: "ToolStateRunning", - }) - export type ToolStateRunning = z.infer - - export const ToolStateCompleted = z - .object({ - status: z.literal("completed"), - input: z.record(z.string(), z.any()), - output: z.string(), - title: z.string(), - metadata: z.record(z.string(), z.any()), - time: z.object({ - start: z.number(), - end: z.number(), - compacted: z.number().optional(), - }), - }) - .meta({ - ref: "ToolStateCompleted", - }) - export type ToolStateCompleted = z.infer - - export const ToolStateError = z - .object({ - status: z.literal("error"), - input: z.record(z.string(), z.any()), - error: z.string(), - metadata: z.record(z.string(), z.any()).optional(), - time: z.object({ - start: z.number(), - end: z.number(), - }), - }) - .meta({ - ref: "ToolStateError", - }) - export type ToolStateError = z.infer - - export const ToolState = z - .discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError]) - .meta({ - ref: "ToolState", - }) - const PartBase = z.object({ id: z.string(), sessionID: z.string(), @@ -134,17 +69,6 @@ export namespace MessageV2 { }) export type ReasoningPart = z.infer - export const ToolPart = PartBase.extend({ - type: z.literal("tool"), - callID: z.string(), - tool: z.string(), - state: ToolState, - metadata: z.record(z.string(), z.any()).optional(), - }).meta({ - ref: "ToolPart", - }) - export type ToolPart = z.infer - const FilePartSourceBase = z.object({ text: z .object({ @@ -228,6 +152,83 @@ export namespace MessageV2 { }) export type StepFinishPart = z.infer + export const ToolStatePending = z + .object({ + status: z.literal("pending"), + }) + .meta({ + ref: "ToolStatePending", + }) + + export type ToolStatePending = z.infer + + export const ToolStateRunning = z + .object({ + status: z.literal("running"), + input: z.any(), + title: z.string().optional(), + metadata: z.record(z.string(), z.any()).optional(), + time: z.object({ + start: z.number(), + }), + }) + .meta({ + ref: "ToolStateRunning", + }) + export type ToolStateRunning = z.infer + + export const ToolStateCompleted = z + .object({ + status: z.literal("completed"), + input: z.record(z.string(), z.any()), + output: z.string(), + title: z.string(), + metadata: z.record(z.string(), z.any()), + time: z.object({ + start: z.number(), + end: z.number(), + compacted: z.number().optional(), + }), + attachments: FilePart.array().optional(), + }) + .meta({ + ref: "ToolStateCompleted", + }) + export type ToolStateCompleted = z.infer + + export const ToolStateError = z + .object({ + status: z.literal("error"), + input: z.record(z.string(), z.any()), + error: z.string(), + metadata: z.record(z.string(), z.any()).optional(), + time: z.object({ + start: z.number(), + end: z.number(), + }), + }) + .meta({ + ref: "ToolStateError", + }) + export type ToolStateError = z.infer + + export const ToolState = z + .discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError]) + .meta({ + ref: "ToolState", + }) + + export const ToolPart = PartBase.extend({ + type: z.literal("tool"), + callID: z.string(), + tool: z.string(), + state: ToolState, + metadata: z.record(z.string(), z.any()).optional(), + }).meta({ + ref: "ToolPart", + }) + export type ToolPart = z.infer + const Base = z.object({ id: z.string(), sessionID: z.string(), @@ -531,7 +532,25 @@ export namespace MessageV2 { }, ] if (part.type === "tool") { - if (part.state.status === "completed") + if (part.state.status === "completed") { + if (part.state.attachments?.length) { + result.push({ + id: Identifier.ascending("message"), + role: "user", + parts: [ + { + type: "text", + text: `Tool ${part.tool} returned an attachment:`, + }, + ...part.state.attachments.map((attachment) => ({ + type: "file" as const, + url: attachment.url, + mediaType: attachment.mime, + filename: attachment.filename, + })), + ], + }) + } return [ { type: ("tool-" + part.tool) as `tool-${string}`, @@ -542,6 +561,7 @@ export namespace MessageV2 { callProviderMetadata: part.metadata, }, ] + } if (part.state.status === "error") return [ { diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index b65309d9..0ccb208c 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -457,6 +457,10 @@ export namespace SessionPrompt { abort: options.abortSignal!, messageID: input.processor.message.id, callID: options.toolCallId, + extra: { + modelID: input.modelID, + providerID: input.providerID, + }, agent: input.agent.name, metadata: async (val) => { const match = input.processor.partFromToolCall(options.toolCallId) @@ -989,6 +993,7 @@ export namespace SessionPrompt { start: match.state.time.start, end: Date.now(), }, + attachments: value.output.attachments, }, }) delete toolcalls[value.toolCallId] diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts index 2ed3accb..5e8cecaf 100644 --- a/packages/opencode/src/tool/read.ts +++ b/packages/opencode/src/tool/read.ts @@ -7,6 +7,8 @@ import { FileTime } from "../file/time" import DESCRIPTION from "./read.txt" import { Filesystem } from "../util/filesystem" import { Instance } from "../project/instance" +import { Provider } from "../provider/provider" +import { Identifier } from "../id/id" const DEFAULT_READ_LIMIT = 2000 const MAX_LINE_LENGTH = 2000 @@ -23,6 +25,8 @@ export const ReadTool = Tool.define("read", { if (!path.isAbsolute(filepath)) { filepath = path.join(process.cwd(), filepath) } + const title = path.relative(Instance.worktree, filepath) + if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) { throw new Error(`File ${filepath} is not in the current working directory`) } @@ -48,12 +52,45 @@ export const ReadTool = Tool.define("read", { throw new Error(`File not found: ${filepath}`) } - const limit = params.limit ?? DEFAULT_READ_LIMIT - const offset = params.offset || 0 const isImage = isImageFile(filepath) - if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`) + const supportsImages = await (async () => { + if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false + const providerID = ctx.extra["providerID"] as string + const modelID = ctx.extra["modelID"] as string + const model = await Provider.getModel(providerID, modelID).catch(() => undefined) + if (!model) return false + return model.info.modalities?.input?.includes("image") ?? false + })() + if (isImage) { + if (!supportsImages) { + throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`) + } + const mime = file.type + const msg = "Image read successfully" + return { + title, + output: msg, + metadata: { + preview: msg, + }, + attachments: [ + { + id: Identifier.ascending("part"), + sessionID: ctx.sessionID, + messageID: ctx.messageID, + type: "file", + mime, + url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`, + }, + ], + } + } + const isBinary = await isBinaryFile(filepath, file) if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`) + + const limit = params.limit ?? DEFAULT_READ_LIMIT + const offset = params.offset || 0 const lines = await file.text().then((text) => text.split("\n")) const raw = lines.slice(offset, offset + limit).map((line) => { return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line @@ -76,7 +113,7 @@ export const ReadTool = Tool.define("read", { FileTime.read(ctx.sessionID, filepath) return { - title: path.relative(Instance.worktree, filepath), + title, output, metadata: { preview, diff --git a/packages/opencode/src/tool/read.txt b/packages/opencode/src/tool/read.txt index 3904c093..b5bffee2 100644 --- a/packages/opencode/src/tool/read.txt +++ b/packages/opencode/src/tool/read.txt @@ -7,6 +7,6 @@ Usage: - You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters - Any lines longer than 2000 characters will be truncated - Results are returned using cat -n format, with line numbers starting at 1 -- This tool cannot read binary files, including images -- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. +- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful. - If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents. +- You can read image files using this tool. diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts index a372a69d..2fc72274 100644 --- a/packages/opencode/src/tool/tool.ts +++ b/packages/opencode/src/tool/tool.ts @@ -1,9 +1,11 @@ import z from "zod/v4" +import type { MessageV2 } from "../session/message-v2" export namespace Tool { interface Metadata { [key: string]: any } + export type Context = { sessionID: string messageID: string @@ -25,6 +27,7 @@ export namespace Tool { title: string metadata: M output: string + attachments?: MessageV2.FilePart[] }> }> }