diff --git a/cloud/app/src/routes/zen/v1/chat/completions.ts b/cloud/app/src/routes/zen/v1/chat/completions.ts index 0a3b0e75..b68264cc 100644 --- a/cloud/app/src/routes/zen/v1/chat/completions.ts +++ b/cloud/app/src/routes/zen/v1/chat/completions.ts @@ -2,14 +2,19 @@ import type { APIEvent } from "@solidjs/start/server" import { handler } from "~/util/zen" export function POST(input: APIEvent) { + let usage: any return handler(input, { - transformBody: (body: any) => ({ + modifyBody: (body: any) => ({ ...body, stream_options: { include_usage: true, }, }), - parseUsageChunk: (chunk: string) => { + setAuthHeader: (headers: Headers, apiKey: string) => { + headers.set("authorization", `Bearer ${apiKey}`) + }, + parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1], + onStreamPart: (chunk: string) => { if (!chunk.startsWith("data: ")) return let json @@ -19,15 +24,15 @@ export function POST(input: APIEvent) { return } - return json.usage + if (!json.usage) return + usage = json.usage }, - buildUsage: (usage: any) => ({ + getStreamUsage: () => usage, + normalizeUsage: (usage: any) => ({ inputTokens: usage.prompt_tokens ?? 0, outputTokens: usage.completion_tokens ?? 0, reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0, cacheReadTokens: usage.prompt_tokens_details?.cached_tokens ?? 0, - //cacheWriteTokens = usage.providerMetadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? 0 - cacheWriteTokens: 0, }), }) } diff --git a/cloud/app/src/routes/zen/v1/messages.ts b/cloud/app/src/routes/zen/v1/messages.ts new file mode 100644 index 00000000..ff399b03 --- /dev/null +++ b/cloud/app/src/routes/zen/v1/messages.ts @@ -0,0 +1,61 @@ +import type { APIEvent } from "@solidjs/start/server" +import { handler } from "~/util/zen" + +type Usage = { + cache_creation?: { + ephemeral_5m_input_tokens?: number + ephemeral_1h_input_tokens?: number + } + cache_creation_input_tokens?: number + cache_read_input_tokens?: number + input_tokens?: number + output_tokens?: number + server_tool_use?: { + web_search_requests?: number + } +} + +export function POST(input: APIEvent) { + let usage: Usage + return handler(input, { + modifyBody: (body: any) => ({ + ...body, + service_tier: "standard_only", + }), + setAuthHeader: (headers: Headers, apiKey: string) => headers.set("x-api-key", apiKey), + parseApiKey: (headers: Headers) => headers.get("x-api-key") ?? undefined, + onStreamPart: (chunk: string) => { + const data = chunk.split("\n")[1] + if (!data.startsWith("data: ")) return + + let json + try { + json = JSON.parse(data.slice(6)) as { usage?: Usage } + } catch (e) { + return + } + + if (!json.usage) return + usage = { + ...usage, + ...json.usage, + cache_creation: { + ...usage?.cache_creation, + ...json.usage.cache_creation, + }, + server_tool_use: { + ...usage?.server_tool_use, + ...json.usage.server_tool_use, + }, + } + }, + getStreamUsage: () => usage, + normalizeUsage: (usage: Usage) => ({ + inputTokens: usage.input_tokens ?? 0, + outputTokens: usage.output_tokens ?? 0, + cacheReadTokens: usage.cache_read_input_tokens ?? 0, + cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens, + cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens, + }), + }) +} diff --git a/cloud/app/src/routes/zen/v1/responses.ts b/cloud/app/src/routes/zen/v1/responses.ts index 844b4fef..518a431a 100644 --- a/cloud/app/src/routes/zen/v1/responses.ts +++ b/cloud/app/src/routes/zen/v1/responses.ts @@ -2,8 +2,13 @@ import type { APIEvent } from "@solidjs/start/server" import { handler } from "~/util/zen" export function POST(input: APIEvent) { + let usage: any return handler(input, { - parseUsageChunk: (chunk: string) => { + setAuthHeader: (headers: Headers, apiKey: string) => { + headers.set("authorization", `Bearer ${apiKey}`) + }, + parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1], + onStreamPart: (chunk: string) => { const [event, data] = chunk.split("\n") if (event !== "event: response.completed") return if (!data.startsWith("data: ")) return @@ -15,9 +20,11 @@ export function POST(input: APIEvent) { return } - return json.response?.usage + if (!json.response?.usage) return + usage = json.response.usage }, - buildUsage: (usage: any) => { + getStreamUsage: () => usage, + normalizeUsage: (usage: any) => { const inputTokens = usage.input_tokens ?? 0 const outputTokens = usage.output_tokens ?? 0 const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? 0 @@ -27,7 +34,6 @@ export function POST(input: APIEvent) { outputTokens: outputTokens - reasoningTokens, reasoningTokens, cacheReadTokens, - cacheWriteTokens: 0, } }, }) diff --git a/cloud/app/src/util/zen.ts b/cloud/app/src/util/zen.ts index edaac144..9ccf9e56 100644 --- a/cloud/app/src/util/zen.ts +++ b/cloud/app/src/util/zen.ts @@ -9,14 +9,18 @@ import { Resource } from "@opencode/cloud-resource" export async function handler( input: APIEvent, opts: { - transformBody?: (body: any) => any - parseUsageChunk: (chunk: string) => string | undefined - buildUsage: (body: any) => { + modifyBody?: (body: any) => any + setAuthHeader: (headers: Headers, apiKey: string) => void + parseApiKey: (headers: Headers) => string | undefined + onStreamPart: (chunk: string) => void + getStreamUsage: () => any + normalizeUsage: (body: any) => { inputTokens: number outputTokens: number - reasoningTokens: number + reasoningTokens?: number cacheReadTokens: number - cacheWriteTokens: number + cacheWrite5mTokens?: number + cacheWrite1hTokens?: number } }, ) { @@ -25,20 +29,63 @@ export async function handler( class ModelError extends Error {} const MODELS = { - // "anthropic/claude-sonnet-4": { - // auth: true, - // api: "https://api.anthropic.com", - // apiKey: Resource.ANTHROPIC_API_KEY.value, - // model: "claude-sonnet-4-20250514", - // cost: { - // input: 0.0000015, - // output: 0.000006, - // reasoning: 0.0000015, - // cacheRead: 0.0000001, - // cacheWrite: 0.0000001, - // }, - // headerMappings: {}, - // }, + "claude-opus-4-1": { + id: "claude-opus-4-1" as const, + auth: true, + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-opus-4-1-20250805", + cost: { + input: 0.000015, + output: 0.000075, + cacheRead: 0.0000015, + cacheWrite5m: 0.00001875, + cacheWrite1h: 0.00003, + }, + headerMappings: {}, + }, + "claude-sonnet-4": { + id: "claude-sonnet-4" as const, + auth: true, + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-sonnet-4-20250514", + cost: (usage: any) => { + const totalInputTokens = + usage.inputTokens + usage.cacheReadTokens + usage.cacheWrite5mTokens + usage.cacheWrite1hTokens + return totalInputTokens <= 200_000 + ? { + input: 0.000003, + output: 0.000015, + cacheRead: 0.0000003, + cacheWrite5m: 0.00000375, + cacheWrite1h: 0.000006, + } + : { + input: 0.000006, + output: 0.0000225, + cacheRead: 0.0000006, + cacheWrite5m: 0.0000075, + cacheWrite1h: 0.000012, + } + }, + headerMappings: {}, + }, + "claude-3-5-haiku": { + id: "claude-3-5-haiku" as const, + auth: true, + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-3-5-haiku-20241022", + cost: { + input: 0.0000008, + output: 0.000004, + cacheRead: 0.00000008, + cacheWrite5m: 0.000001, + cacheWrite1h: 0.0000016, + }, + headerMappings: {}, + }, "gpt-5": { id: "gpt-5" as const, auth: true, @@ -48,9 +95,9 @@ export async function handler( cost: { input: 0.00000125, output: 0.00001, - reasoning: 0.00001, cacheRead: 0.000000125, - cacheWrite: 0, + cacheWrite5m: 0, + cacheWrite1h: 0, }, headerMappings: {}, }, @@ -63,9 +110,9 @@ export async function handler( cost: { input: 0.00000038, output: 0.00000153, - reasoning: 0, cacheRead: 0, - cacheWrite: 0, + cacheWrite5m: 0, + cacheWrite1h: 0, }, headerMappings: {}, }, @@ -78,9 +125,9 @@ export async function handler( cost: { input: 0.0000006, output: 0.0000025, - reasoning: 0, cacheRead: 0, - cacheWrite: 0, + cacheWrite5m: 0, + cacheWrite1h: 0, }, headerMappings: {}, }, @@ -93,9 +140,9 @@ export async function handler( cost: { input: 0, output: 0, - reasoning: 0, cacheRead: 0, - cacheWrite: 0, + cacheWrite5m: 0, + cacheWrite1h: 0, }, headerMappings: { "x-grok-conv-id": "x-opencode-session", @@ -112,9 +159,9 @@ export async function handler( cost: { input: 0.00000038, output: 0.00000153, - reasoning: 0, cacheRead: 0, - cacheWrite: 0, + cacheWrite5m: 0, + cacheWrite1h: 0, }, headerMappings: {}, }, @@ -156,14 +203,14 @@ export async function handler( const headers = input.request.headers headers.delete("host") headers.delete("content-length") - headers.set("authorization", `Bearer ${MODEL.apiKey}`) + opts.setAuthHeader(headers, MODEL.apiKey) Object.entries(MODEL.headerMappings ?? {}).forEach(([k, v]) => { headers.set(k, headers.get(v)!) }) return headers })(), body: JSON.stringify({ - ...(opts.transformBody?.(body) ?? body), + ...(opts.modifyBody?.(body) ?? body), model: MODEL.model, }), }) @@ -199,32 +246,31 @@ export async function handler( let buffer = "" let responseLength = 0 let startTimestamp = Date.now() - let receivedFirstByte = false function pump(): Promise { return ( reader?.read().then(async ({ done, value }) => { if (done) { logger.metric({ response_length: responseLength }) + const usage = opts.getStreamUsage() + if (usage) await trackUsage(usage) c.close() return } - if (!receivedFirstByte) { - receivedFirstByte = true + if (responseLength === 0) { logger.metric({ time_to_first_byte: Date.now() - startTimestamp }) } + responseLength += value.length buffer += decoder.decode(value, { stream: true }) - responseLength += value.length const parts = buffer.split("\n\n") buffer = parts.pop() ?? "" for (const part of parts) { logger.debug(part) - const usage = opts.parseUsageChunk(part.trim()) - if (usage) await trackUsage(usage) + opts.onStreamPart(part.trim()) } c.enqueue(value) @@ -255,10 +301,9 @@ export async function handler( async function authenticate() { try { - const authHeader = input.request.headers.get("authorization") - if (!authHeader || !authHeader.startsWith("Bearer ")) throw new AuthError("Missing API key.") + const apiKey = opts.parseApiKey(input.request.headers) + if (!apiKey) throw new AuthError("Missing API key.") - const apiKey = authHeader.split(" ")[1] const key = await Database.use((tx) => tx .select({ @@ -300,26 +345,38 @@ export async function handler( } async function trackUsage(usage: any) { - const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWriteTokens } = opts.buildUsage(usage) + const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } = + opts.normalizeUsage(usage) - const inputCost = MODEL.cost.input * inputTokens * 100 - const outputCost = MODEL.cost.output * outputTokens * 100 - const reasoningCost = MODEL.cost.reasoning * reasoningTokens * 100 - const cacheReadCost = MODEL.cost.cacheRead * cacheReadTokens * 100 - const cacheWriteCost = MODEL.cost.cacheWrite * cacheWriteTokens * 100 - const totalCostInCent = inputCost + outputCost + reasoningCost + cacheReadCost + cacheWriteCost + const modelCost = typeof MODEL.cost === "function" ? MODEL.cost(usage) : MODEL.cost + + const inputCost = modelCost.input * inputTokens * 100 + const outputCost = modelCost.output * outputTokens * 100 + const reasoningCost = reasoningTokens ? modelCost.output * reasoningTokens * 100 : undefined + const cacheReadCost = modelCost.cacheRead * cacheReadTokens * 100 + const cacheWrite5mCost = cacheWrite5mTokens ? modelCost.cacheWrite5m * cacheWrite5mTokens * 100 : undefined + const cacheWrite1hCost = cacheWrite1hTokens ? modelCost.cacheWrite1h * cacheWrite1hTokens * 100 : undefined + const totalCostInCent = + inputCost + + outputCost + + (reasoningCost ?? 0) + + cacheReadCost + + (cacheWrite5mCost ?? 0) + + (cacheWrite1hCost ?? 0) logger.metric({ "tokens.input": inputTokens, "tokens.output": outputTokens, "tokens.reasoning": reasoningTokens, "tokens.cache_read": cacheReadTokens, - "tokens.cache_write": cacheWriteTokens, + "tokens.cache_write_5m": cacheWrite5mTokens, + "tokens.cache_write_1h": cacheWrite1hTokens, "cost.input": Math.round(inputCost), "cost.output": Math.round(outputCost), - "cost.reasoning": Math.round(reasoningCost), + "cost.reasoning": reasoningCost ? Math.round(reasoningCost) : undefined, "cost.cache_read": Math.round(cacheReadCost), - "cost.cache_write": Math.round(cacheWriteCost), + "cost.cache_write_5m": cacheWrite5mCost ? Math.round(cacheWrite5mCost) : undefined, + "cost.cache_write_1h": cacheWrite1hCost ? Math.round(cacheWrite1hCost) : undefined, "cost.total": Math.round(totalCostInCent), }) @@ -335,7 +392,7 @@ export async function handler( outputTokens, reasoningTokens, cacheReadTokens, - cacheWriteTokens, + cacheWriteTokens: (cacheWrite5mTokens ?? 0) + (cacheWrite1hTokens ?? 0), cost, }) await tx