From dedfa563c2e6fcb902b4a9b865d34422ce23d822 Mon Sep 17 00:00:00 2001 From: Frank Date: Thu, 11 Sep 2025 01:31:49 -0400 Subject: [PATCH] wip: zen --- .../app/src/routes/zen/v1/chat/completions.ts | 4 +- cloud/app/src/util/zen.ts | 170 ++++++++++++++---- infra/cloud.ts | 2 + 3 files changed, 139 insertions(+), 37 deletions(-) diff --git a/cloud/app/src/routes/zen/v1/chat/completions.ts b/cloud/app/src/routes/zen/v1/chat/completions.ts index b68264cc..dc69bb51 100644 --- a/cloud/app/src/routes/zen/v1/chat/completions.ts +++ b/cloud/app/src/routes/zen/v1/chat/completions.ts @@ -6,9 +6,7 @@ export function POST(input: APIEvent) { return handler(input, { modifyBody: (body: any) => ({ ...body, - stream_options: { - include_usage: true, - }, + ...(body.stream ? { stream_options: { include_usage: true } } : {}), }), setAuthHeader: (headers: Headers, apiKey: string) => { headers.set("authorization", `Bearer ${apiKey}`) diff --git a/cloud/app/src/util/zen.ts b/cloud/app/src/util/zen.ts index 9ccf9e56..47cec9f6 100644 --- a/cloud/app/src/util/zen.ts +++ b/cloud/app/src/util/zen.ts @@ -1,4 +1,5 @@ import type { APIEvent } from "@solidjs/start/server" +import path from "node:path" import { Database, eq, sql } from "@opencode/cloud-core/drizzle/index.js" import { KeyTable } from "@opencode/cloud-core/schema/key.sql.js" import { BillingTable, UsageTable } from "@opencode/cloud-core/schema/billing.sql.js" @@ -6,6 +7,29 @@ import { centsToMicroCents } from "@opencode/cloud-core/util/price.js" import { Identifier } from "@opencode/cloud-core/identifier.js" import { Resource } from "@opencode/cloud-resource" +type ModelCost = { + input: number + output: number + cacheRead: number + cacheWrite5m: number + cacheWrite1h: number +} +type Model = { + id: string + auth: boolean + cost: ModelCost | ((usage: any) => ModelCost) + headerMappings: Record + providers: Record< + string, + { + api: string + apiKey: string + model: string + weight?: number + } + > +} + export async function handler( input: APIEvent, opts: { @@ -28,13 +52,10 @@ export async function handler( class CreditsError extends Error {} class ModelError extends Error {} - const MODELS = { + const MODELS: Record = { "claude-opus-4-1": { id: "claude-opus-4-1" as const, auth: true, - api: "https://api.anthropic.com", - apiKey: Resource.ANTHROPIC_API_KEY.value, - model: "claude-opus-4-1-20250805", cost: { input: 0.000015, output: 0.000075, @@ -43,13 +64,17 @@ export async function handler( cacheWrite1h: 0.00003, }, headerMappings: {}, + providers: { + anthropic: { + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-opus-4-1-20250805", + }, + }, }, "claude-sonnet-4": { id: "claude-sonnet-4" as const, auth: true, - api: "https://api.anthropic.com", - apiKey: Resource.ANTHROPIC_API_KEY.value, - model: "claude-sonnet-4-20250514", cost: (usage: any) => { const totalInputTokens = usage.inputTokens + usage.cacheReadTokens + usage.cacheWrite5mTokens + usage.cacheWrite1hTokens @@ -70,13 +95,17 @@ export async function handler( } }, headerMappings: {}, + providers: { + anthropic: { + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-sonnet-4-20250514", + }, + }, }, "claude-3-5-haiku": { id: "claude-3-5-haiku" as const, auth: true, - api: "https://api.anthropic.com", - apiKey: Resource.ANTHROPIC_API_KEY.value, - model: "claude-3-5-haiku-20241022", cost: { input: 0.0000008, output: 0.000004, @@ -85,13 +114,17 @@ export async function handler( cacheWrite1h: 0.0000016, }, headerMappings: {}, + providers: { + anthropic: { + api: "https://api.anthropic.com", + apiKey: Resource.ANTHROPIC_API_KEY.value, + model: "claude-3-5-haiku-20241022", + }, + }, }, "gpt-5": { id: "gpt-5" as const, auth: true, - api: "https://api.openai.com", - apiKey: Resource.OPENAI_API_KEY.value, - model: "gpt-5", cost: { input: 0.00000125, output: 0.00001, @@ -100,28 +133,43 @@ export async function handler( cacheWrite1h: 0, }, headerMappings: {}, + providers: { + openai: { + api: "https://api.openai.com", + apiKey: Resource.OPENAI_API_KEY.value, + model: "gpt-5", + }, + }, }, "qwen3-coder": { id: "qwen3-coder" as const, auth: true, - api: "https://inference.baseten.co", - apiKey: Resource.BASETEN_API_KEY.value, - model: "Qwen/Qwen3-Coder-480B-A35B-Instruct", cost: { - input: 0.00000038, - output: 0.00000153, + input: 0.00000045, + output: 0.0000018, cacheRead: 0, cacheWrite5m: 0, cacheWrite1h: 0, }, headerMappings: {}, + providers: { + baseten: { + api: "https://inference.baseten.co", + apiKey: Resource.BASETEN_API_KEY.value, + model: "Qwen/Qwen3-Coder-480B-A35B-Instruct", + weight: 4, + }, + fireworks: { + api: "https://api.fireworks.ai/inference", + apiKey: Resource.FIREWORKS_API_KEY.value, + model: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", + weight: 1, + }, + }, }, "kimi-k2": { id: "kimi-k2" as const, auth: true, - api: "https://inference.baseten.co", - apiKey: Resource.BASETEN_API_KEY.value, - model: "moonshotai/Kimi-K2-Instruct-0905", cost: { input: 0.0000006, output: 0.0000025, @@ -130,13 +178,24 @@ export async function handler( cacheWrite1h: 0, }, headerMappings: {}, + providers: { + baseten: { + api: "https://inference.baseten.co", + apiKey: Resource.BASETEN_API_KEY.value, + model: "moonshotai/Kimi-K2-Instruct-0905", + weight: 4, + }, + fireworks: { + api: "https://api.fireworks.ai/inference", + apiKey: Resource.FIREWORKS_API_KEY.value, + model: "accounts/fireworks/models/kimi-k2-instruct-0905", + weight: 1, + }, + }, }, "grok-code": { id: "grok-code" as const, auth: false, - api: "https://api.x.ai", - apiKey: Resource.XAI_API_KEY.value, - model: "grok-code", cost: { input: 0, output: 0, @@ -148,14 +207,18 @@ export async function handler( "x-grok-conv-id": "x-opencode-session", "x-grok-req-id": "x-opencode-request", }, + providers: { + xai: { + api: "https://api.x.ai", + apiKey: Resource.XAI_API_KEY.value, + model: "grok-code", + }, + }, }, // deprecated "qwen/qwen3-coder": { id: "qwen/qwen3-coder" as const, auth: true, - api: "https://inference.baseten.co", - apiKey: Resource.BASETEN_API_KEY.value, - model: "Qwen/Qwen3-Coder-480B-A35B-Instruct", cost: { input: 0.00000038, output: 0.00000153, @@ -164,6 +227,20 @@ export async function handler( cacheWrite1h: 0, }, headerMappings: {}, + providers: { + baseten: { + api: "https://inference.baseten.co", + apiKey: Resource.BASETEN_API_KEY.value, + model: "Qwen/Qwen3-Coder-480B-A35B-Instruct", + weight: 5, + }, + fireworks: { + api: "https://api.fireworks.ai/inference", + apiKey: Resource.FIREWORKS_API_KEY.value, + model: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", + weight: 1, + }, + }, }, } @@ -195,15 +272,19 @@ export async function handler( const apiKey = await authenticate() const isFree = FREE_WORKSPACES.includes(apiKey?.workspaceID ?? "") await checkCredits() + const providerName = selectProvider() + const providerData = MODEL.providers[providerName] + logger.metric({ provider: providerName }) // Request to model provider - const res = await fetch(new URL(url.pathname.replace(/^\/zen/, "") + url.search, MODEL.api), { + const startTimestamp = Date.now() + const res = await fetch(path.posix.join(providerData.api, url.pathname.replace(/^\/zen/, "") + url.search), { method: "POST", headers: (() => { const headers = input.request.headers headers.delete("host") headers.delete("content-length") - opts.setAuthHeader(headers, MODEL.apiKey) + opts.setAuthHeader(headers, providerData.apiKey) Object.entries(MODEL.headerMappings ?? {}).forEach(([k, v]) => { headers.set(k, headers.get(v)!) }) @@ -211,7 +292,7 @@ export async function handler( })(), body: JSON.stringify({ ...(opts.modifyBody?.(body) ?? body), - model: MODEL.model, + model: providerData.model, }), }) @@ -245,7 +326,6 @@ export async function handler( const decoder = new TextDecoder() let buffer = "" let responseLength = 0 - let startTimestamp = Date.now() function pump(): Promise { return ( @@ -262,7 +342,6 @@ export async function handler( logger.metric({ time_to_first_byte: Date.now() - startTimestamp }) } responseLength += value.length - buffer += decoder.decode(value, { stream: true }) const parts = buffer.split("\n\n") @@ -344,6 +423,13 @@ export async function handler( if (billing.balance <= 0) throw new CreditsError("Insufficient balance") } + function selectProvider() { + const picks = Object.entries(MODEL.providers).flatMap(([name, provider]) => + Array(provider.weight ?? 1).fill(name), + ) + return picks[Math.floor(Math.random() * picks.length)] + } + async function trackUsage(usage: any) { const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } = opts.normalizeUsage(usage) @@ -416,9 +502,25 @@ export async function handler( "error.message": error.message, }) + // Note: both top level "type" and "error.type" fields are used by the @ai-sdk/anthropic client to render the error message. if (error instanceof AuthError || error instanceof CreditsError || error instanceof ModelError) - return new Response(JSON.stringify({ error: { message: error.message } }), { status: 401 }) + return new Response( + JSON.stringify({ + type: "error", + error: { type: error.constructor.name, message: error.message }, + }), + { status: 401 }, + ) - return new Response(JSON.stringify({ error: { message: error.message } }), { status: 500 }) + return new Response( + JSON.stringify({ + type: "error", + error: { + type: "error", + message: error.message, + }, + }), + { status: 500 }, + ) } } diff --git a/infra/cloud.ts b/infra/cloud.ts index d1d8d293..9c58150c 100644 --- a/infra/cloud.ts +++ b/infra/cloud.ts @@ -103,6 +103,7 @@ const ANTHROPIC_API_KEY = new sst.Secret("ANTHROPIC_API_KEY") const OPENAI_API_KEY = new sst.Secret("OPENAI_API_KEY") const XAI_API_KEY = new sst.Secret("XAI_API_KEY") const BASETEN_API_KEY = new sst.Secret("BASETEN_API_KEY") +const FIREWORKS_API_KEY = new sst.Secret("FIREWORKS_API_KEY") const STRIPE_SECRET_KEY = new sst.Secret("STRIPE_SECRET_KEY") const AUTH_API_URL = new sst.Linkable("AUTH_API_URL", { properties: { value: auth.url.apply((url) => url!) }, @@ -136,6 +137,7 @@ new sst.cloudflare.x.SolidStart("Console", { OPENAI_API_KEY, XAI_API_KEY, BASETEN_API_KEY, + FIREWORKS_API_KEY, ], environment: { //VITE_DOCS_URL: web.url.apply((url) => url!),