wip: zen

2026-01-21 08:44:53 +01:00 · 2025-09-18 10:59:01 -04:00
parent c87480cf93
commit 4ceabdffa0
156 changed files with 427 additions and 429 deletions
--- a/packages/console/app/src/routes/zen/handler.ts
+++ b/packages/console/app/src/routes/zen/handler.ts
@@ -0,0 +1,594 @@
+import type { APIEvent } from "@solidjs/start/server"
+import path from "node:path"
+import { and, Database, eq, isNull, lt, or, sql } from "@opencode/console-core/drizzle/index.js"
+import { KeyTable } from "@opencode/console-core/schema/key.sql.js"
+import { BillingTable, PaymentTable, UsageTable } from "@opencode/console-core/schema/billing.sql.js"
+import { centsToMicroCents } from "@opencode/console-core/util/price.js"
+import { Identifier } from "@opencode/console-core/identifier.js"
+import { Resource } from "@opencode/console-resource"
+import { Billing } from "../../../../core/src/billing"
+import { Actor } from "@opencode/console-core/actor.js"
+
+type ModelCost = {
+  input: number
+  output: number
+  cacheRead?: number
+  cacheWrite5m?: number
+  cacheWrite1h?: number
+}
+
+type Model = {
+  id: string
+  auth: boolean
+  cost: ModelCost | ((usage: any) => ModelCost)
+  headerMappings: Record<string, string>
+  providers: Record<
+    string,
+    {
+      api: string
+      apiKey: string
+      model: string
+      weight?: number
+    }
+  >
+}
+
+export async function handler(
+  input: APIEvent,
+  opts: {
+    modifyBody?: (body: any) => any
+    setAuthHeader: (headers: Headers, apiKey: string) => void
+    parseApiKey: (headers: Headers) => string | undefined
+    onStreamPart: (chunk: string) => void
+    getStreamUsage: () => any
+    normalizeUsage: (body: any) => {
+      inputTokens: number
+      outputTokens: number
+      reasoningTokens?: number
+      cacheReadTokens?: number
+      cacheWrite5mTokens?: number
+      cacheWrite1hTokens?: number
+    }
+  },
+) {
+  class AuthError extends Error {}
+  class CreditsError extends Error {}
+  class MonthlyLimitError extends Error {}
+  class ModelError extends Error {}
+
+  const MODELS: Record<string, Model> = {
+    "claude-opus-4-1": {
+      id: "claude-opus-4-1" as const,
+      auth: true,
+      cost: {
+        input: 0.000015,
+        output: 0.000075,
+        cacheRead: 0.0000015,
+        cacheWrite5m: 0.00001875,
+        cacheWrite1h: 0.00003,
+      },
+      headerMappings: {},
+      providers: {
+        anthropic: {
+          api: "https://api.anthropic.com",
+          apiKey: Resource.ANTHROPIC_API_KEY.value,
+          model: "claude-opus-4-1-20250805",
+        },
+      },
+    },
+    "claude-sonnet-4": {
+      id: "claude-sonnet-4" as const,
+      auth: true,
+      cost: (usage: any) => {
+        const totalInputTokens =
+          usage.inputTokens + usage.cacheReadTokens + usage.cacheWrite5mTokens + usage.cacheWrite1hTokens
+        return totalInputTokens <= 200_000
+          ? {
+              input: 0.000003,
+              output: 0.000015,
+              cacheRead: 0.0000003,
+              cacheWrite5m: 0.00000375,
+              cacheWrite1h: 0.000006,
+            }
+          : {
+              input: 0.000006,
+              output: 0.0000225,
+              cacheRead: 0.0000006,
+              cacheWrite5m: 0.0000075,
+              cacheWrite1h: 0.000012,
+            }
+      },
+      headerMappings: {},
+      providers: {
+        anthropic: {
+          api: "https://api.anthropic.com",
+          apiKey: Resource.ANTHROPIC_API_KEY.value,
+          model: "claude-sonnet-4-20250514",
+        },
+      },
+    },
+    "claude-3-5-haiku": {
+      id: "claude-3-5-haiku" as const,
+      auth: true,
+      cost: {
+        input: 0.0000008,
+        output: 0.000004,
+        cacheRead: 0.00000008,
+        cacheWrite5m: 0.000001,
+        cacheWrite1h: 0.0000016,
+      },
+      headerMappings: {},
+      providers: {
+        anthropic: {
+          api: "https://api.anthropic.com",
+          apiKey: Resource.ANTHROPIC_API_KEY.value,
+          model: "claude-3-5-haiku-20241022",
+        },
+      },
+    },
+    "gpt-5": {
+      id: "gpt-5" as const,
+      auth: true,
+      cost: {
+        input: 0.00000125,
+        output: 0.00001,
+        cacheRead: 0.000000125,
+      },
+      headerMappings: {},
+      providers: {
+        openai: {
+          api: "https://api.openai.com",
+          apiKey: Resource.OPENAI_API_KEY.value,
+          model: "gpt-5",
+        },
+      },
+    },
+    "qwen3-coder": {
+      id: "qwen3-coder" as const,
+      auth: true,
+      cost: {
+        input: 0.00000045,
+        output: 0.0000018,
+      },
+      headerMappings: {},
+      providers: {
+        baseten: {
+          api: "https://inference.baseten.co",
+          apiKey: Resource.BASETEN_API_KEY.value,
+          model: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+          weight: 4,
+        },
+        fireworks: {
+          api: "https://api.fireworks.ai/inference",
+          apiKey: Resource.FIREWORKS_API_KEY.value,
+          model: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          weight: 1,
+        },
+      },
+    },
+    "kimi-k2": {
+      id: "kimi-k2" as const,
+      auth: true,
+      cost: {
+        input: 0.0000006,
+        output: 0.0000025,
+      },
+      headerMappings: {},
+      providers: {
+        baseten: {
+          api: "https://inference.baseten.co",
+          apiKey: Resource.BASETEN_API_KEY.value,
+          model: "moonshotai/Kimi-K2-Instruct-0905",
+          //weight: 4,
+        },
+        //fireworks: {
+        //  api: "https://api.fireworks.ai/inference",
+        //  apiKey: Resource.FIREWORKS_API_KEY.value,
+        //  model: "accounts/fireworks/models/kimi-k2-instruct-0905",
+        //  weight: 1,
+        //},
+      },
+    },
+    "grok-code": {
+      id: "grok-code" as const,
+      auth: false,
+      cost: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+      },
+      headerMappings: {
+        "x-grok-conv-id": "x-opencode-session",
+        "x-grok-req-id": "x-opencode-request",
+      },
+      providers: {
+        xai: {
+          api: "https://api.x.ai",
+          apiKey: Resource.XAI_API_KEY.value,
+          model: "grok-code",
+        },
+      },
+    },
+    // deprecated
+    "qwen/qwen3-coder": {
+      id: "qwen/qwen3-coder" as const,
+      auth: true,
+      cost: {
+        input: 0.00000038,
+        output: 0.00000153,
+      },
+      headerMappings: {},
+      providers: {
+        baseten: {
+          api: "https://inference.baseten.co",
+          apiKey: Resource.BASETEN_API_KEY.value,
+          model: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+          weight: 5,
+        },
+        fireworks: {
+          api: "https://api.fireworks.ai/inference",
+          apiKey: Resource.FIREWORKS_API_KEY.value,
+          model: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
+          weight: 1,
+        },
+      },
+    },
+  }
+
+  const FREE_WORKSPACES = [
+    "wrk_01K46JDFR0E75SG2Q8K172KF3Y", // frank
+  ]
+
+  const logger = {
+    metric: (values: Record<string, any>) => {
+      console.log(`_metric:${JSON.stringify(values)}`)
+    },
+    log: console.log,
+    debug: (message: string) => {
+      if (Resource.App.stage === "production") return
+      console.debug(message)
+    },
+  }
+
+  try {
+    const url = new URL(input.request.url)
+    const body = await input.request.json()
+    logger.debug(JSON.stringify(body))
+    logger.metric({
+      is_tream: !!body.stream,
+      session: input.request.headers.get("x-opencode-session"),
+      request: input.request.headers.get("x-opencode-request"),
+    })
+    const MODEL = validateModel()
+    const apiKey = await authenticate()
+    const isFree = FREE_WORKSPACES.includes(apiKey?.workspaceID ?? "")
+    await checkCreditsAndLimit()
+    const providerName = selectProvider()
+    const providerData = MODEL.providers[providerName]
+    logger.metric({ provider: providerName })
+
+    // Request to model provider
+    const startTimestamp = Date.now()
+    const res = await fetch(path.posix.join(providerData.api, url.pathname.replace(/^\/zen/, "") + url.search), {
+      method: "POST",
+      headers: (() => {
+        const headers = input.request.headers
+        headers.delete("host")
+        headers.delete("content-length")
+        opts.setAuthHeader(headers, providerData.apiKey)
+        Object.entries(MODEL.headerMappings ?? {}).forEach(([k, v]) => {
+          headers.set(k, headers.get(v)!)
+        })
+        return headers
+      })(),
+      body: JSON.stringify({
+        ...(opts.modifyBody?.(body) ?? body),
+        model: providerData.model,
+      }),
+    })
+
+    // Scrub response headers
+    const resHeaders = new Headers()
+    const keepHeaders = ["content-type", "cache-control"]
+    for (const [k, v] of res.headers.entries()) {
+      if (keepHeaders.includes(k.toLowerCase())) {
+        resHeaders.set(k, v)
+      }
+    }
+
+    // Handle non-streaming response
+    if (!body.stream) {
+      const json = await res.json()
+      const body = JSON.stringify(json)
+      logger.metric({ response_length: body.length })
+      logger.debug(body)
+      await trackUsage(json.usage)
+      await reload()
+      return new Response(body, {
+        status: res.status,
+        statusText: res.statusText,
+        headers: resHeaders,
+      })
+    }
+
+    // Handle streaming response
+    const stream = new ReadableStream({
+      start(c) {
+        const reader = res.body?.getReader()
+        const decoder = new TextDecoder()
+        let buffer = ""
+        let responseLength = 0
+
+        function pump(): Promise<void> {
+          return (
+            reader?.read().then(async ({ done, value }) => {
+              if (done) {
+                logger.metric({ response_length: responseLength })
+                const usage = opts.getStreamUsage()
+                if (usage) {
+                  await trackUsage(usage)
+                  await reload()
+                }
+                c.close()
+                return
+              }
+
+              if (responseLength === 0) {
+                logger.metric({ time_to_first_byte: Date.now() - startTimestamp })
+              }
+              responseLength += value.length
+              buffer += decoder.decode(value, { stream: true })
+
+              const parts = buffer.split("\n\n")
+              buffer = parts.pop() ?? ""
+
+              for (const part of parts) {
+                logger.debug(part)
+                opts.onStreamPart(part.trim())
+              }
+
+              c.enqueue(value)
+
+              return pump()
+            }) || Promise.resolve()
+          )
+        }
+
+        return pump()
+      },
+    })
+
+    return new Response(stream, {
+      status: res.status,
+      statusText: res.statusText,
+      headers: resHeaders,
+    })
+
+    function validateModel() {
+      if (!(body.model in MODELS)) {
+        throw new ModelError(`Model ${body.model} not supported`)
+      }
+      const model = MODELS[body.model as keyof typeof MODELS]
+      logger.metric({ model: model.id })
+      return model
+    }
+
+    async function authenticate() {
+      try {
+        const apiKey = opts.parseApiKey(input.request.headers)
+        if (!apiKey) throw new AuthError("Missing API key.")
+
+        const key = await Database.use((tx) =>
+          tx
+            .select({
+              id: KeyTable.id,
+              workspaceID: KeyTable.workspaceID,
+            })
+            .from(KeyTable)
+            .where(and(eq(KeyTable.key, apiKey), isNull(KeyTable.timeDeleted)))
+            .then((rows) => rows[0]),
+        )
+
+        if (!key) throw new AuthError("Invalid API key.")
+        logger.metric({
+          api_key: key.id,
+          workspace: key.workspaceID,
+        })
+        return key
+      } catch (e) {
+        // ignore error if model does not require authentication
+        if (!MODEL.auth) return
+        throw e
+      }
+    }
+
+    async function checkCreditsAndLimit() {
+      if (!apiKey || !MODEL.auth || isFree) return
+
+      const billing = await Database.use((tx) =>
+        tx
+          .select({
+            balance: BillingTable.balance,
+            paymentMethodID: BillingTable.paymentMethodID,
+            monthlyLimit: BillingTable.monthlyLimit,
+            monthlyUsage: BillingTable.monthlyUsage,
+            timeMonthlyUsageUpdated: BillingTable.timeMonthlyUsageUpdated,
+          })
+          .from(BillingTable)
+          .where(eq(BillingTable.workspaceID, apiKey.workspaceID))
+          .then((rows) => rows[0]),
+      )
+
+      if (!billing.paymentMethodID) throw new CreditsError("No payment method")
+      if (billing.balance <= 0) throw new CreditsError("Insufficient balance")
+      if (
+        billing.monthlyLimit &&
+        billing.monthlyUsage &&
+        billing.timeMonthlyUsageUpdated &&
+        billing.monthlyUsage >= centsToMicroCents(billing.monthlyLimit * 100)
+      ) {
+        const now = new Date()
+        const currentYear = now.getUTCFullYear()
+        const currentMonth = now.getUTCMonth()
+        const dateYear = billing.timeMonthlyUsageUpdated.getUTCFullYear()
+        const dateMonth = billing.timeMonthlyUsageUpdated.getUTCMonth()
+        if (currentYear === dateYear && currentMonth === dateMonth)
+          throw new MonthlyLimitError(`You have reached your monthly spending limit of $${billing.monthlyLimit}.`)
+      }
+    }
+
+    function selectProvider() {
+      const picks = Object.entries(MODEL.providers).flatMap(([name, provider]) =>
+        Array<string>(provider.weight ?? 1).fill(name),
+      )
+      return picks[Math.floor(Math.random() * picks.length)]
+    }
+
+    async function trackUsage(usage: any) {
+      const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
+        opts.normalizeUsage(usage)
+
+      const modelCost = typeof MODEL.cost === "function" ? MODEL.cost(usage) : MODEL.cost
+
+      const inputCost = modelCost.input * inputTokens * 100
+      const outputCost = modelCost.output * outputTokens * 100
+      const reasoningCost = (() => {
+        if (!reasoningTokens) return undefined
+        return modelCost.output * reasoningTokens * 100
+      })()
+      const cacheReadCost = (() => {
+        if (!cacheReadTokens) return undefined
+        if (!modelCost.cacheRead) return undefined
+        return modelCost.cacheRead * cacheReadTokens * 100
+      })()
+      const cacheWrite5mCost = (() => {
+        if (!cacheWrite5mTokens) return undefined
+        if (!modelCost.cacheWrite5m) return undefined
+        return modelCost.cacheWrite5m * cacheWrite5mTokens * 100
+      })()
+      const cacheWrite1hCost = (() => {
+        if (!cacheWrite1hTokens) return undefined
+        if (!modelCost.cacheWrite1h) return undefined
+        return modelCost.cacheWrite1h * cacheWrite1hTokens * 100
+      })()
+      const totalCostInCent =
+        inputCost +
+        outputCost +
+        (reasoningCost ?? 0) +
+        (cacheReadCost ?? 0) +
+        (cacheWrite5mCost ?? 0) +
+        (cacheWrite1hCost ?? 0)
+
+      logger.metric({
+        "tokens.input": inputTokens,
+        "tokens.output": outputTokens,
+        "tokens.reasoning": reasoningTokens,
+        "tokens.cache_read": cacheReadTokens,
+        "tokens.cache_write_5m": cacheWrite5mTokens,
+        "tokens.cache_write_1h": cacheWrite1hTokens,
+        "cost.input": Math.round(inputCost),
+        "cost.output": Math.round(outputCost),
+        "cost.reasoning": reasoningCost ? Math.round(reasoningCost) : undefined,
+        "cost.cache_read": cacheReadCost ? Math.round(cacheReadCost) : undefined,
+        "cost.cache_write_5m": cacheWrite5mCost ? Math.round(cacheWrite5mCost) : undefined,
+        "cost.cache_write_1h": cacheWrite1hCost ? Math.round(cacheWrite1hCost) : undefined,
+        "cost.total": Math.round(totalCostInCent),
+      })
+
+      if (!apiKey) return
+
+      const cost = isFree ? 0 : centsToMicroCents(totalCostInCent)
+      await Database.transaction(async (tx) => {
+        await tx.insert(UsageTable).values({
+          workspaceID: apiKey.workspaceID,
+          id: Identifier.create("usage"),
+          model: MODEL.id,
+          provider: providerName,
+          inputTokens,
+          outputTokens,
+          reasoningTokens,
+          cacheReadTokens,
+          cacheWrite5mTokens,
+          cacheWrite1hTokens,
+          cost,
+        })
+        await tx
+          .update(BillingTable)
+          .set({
+            balance: sql`${BillingTable.balance} - ${cost}`,
+            monthlyUsage: sql`
+              CASE
+                WHEN MONTH(${BillingTable.timeMonthlyUsageUpdated}) = MONTH(now()) AND YEAR(${BillingTable.timeMonthlyUsageUpdated}) = YEAR(now()) THEN ${BillingTable.monthlyUsage} + ${cost}
+                ELSE ${cost}
+              END
+            `,
+            timeMonthlyUsageUpdated: sql`now()`,
+          })
+          .where(eq(BillingTable.workspaceID, apiKey.workspaceID))
+      })
+
+      await Database.use((tx) =>
+        tx
+          .update(KeyTable)
+          .set({ timeUsed: sql`now()` })
+          .where(eq(KeyTable.id, apiKey.id)),
+      )
+    }
+
+    async function reload() {
+      if (!apiKey) return
+
+      const lock = await Database.use((tx) =>
+        tx
+          .update(BillingTable)
+          .set({
+            timeReloadLockedTill: sql`now() + interval 1 minute`,
+          })
+          .where(
+            and(
+              eq(BillingTable.workspaceID, apiKey.workspaceID),
+              eq(BillingTable.reload, true),
+              lt(BillingTable.balance, centsToMicroCents(Billing.CHARGE_THRESHOLD)),
+              or(isNull(BillingTable.timeReloadLockedTill), lt(BillingTable.timeReloadLockedTill, sql`now()`)),
+            ),
+          ),
+      )
+      if (lock.rowsAffected === 0) return
+
+      await Actor.provide("system", { workspaceID: apiKey.workspaceID }, async () => {
+        await Billing.reload()
+      })
+    }
+  } catch (error: any) {
+    logger.metric({
+      "error.type": error.constructor.name,
+      "error.message": error.message,
+    })
+
+    // Note: both top level "type" and "error.type" fields are used by the @ai-sdk/anthropic client to render the error message.
+    if (
+      error instanceof AuthError ||
+      error instanceof CreditsError ||
+      error instanceof MonthlyLimitError ||
+      error instanceof ModelError
+    )
+      return new Response(
+        JSON.stringify({
+          type: "error",
+          error: { type: error.constructor.name, message: error.message },
+        }),
+        { status: 401 },
+      )
+
+    return new Response(
+      JSON.stringify({
+        type: "error",
+        error: {
+          type: "error",
+          message: error.message,
+        },
+      }),
+      { status: 500 },
+    )
+  }
+}
--- a/packages/console/app/src/routes/zen/v1/chat/completions.ts
+++ b/packages/console/app/src/routes/zen/v1/chat/completions.ts
@@ -0,0 +1,54 @@
+import type { APIEvent } from "@solidjs/start/server"
+import { handler } from "~/routes/zen/handler"
+
+type Usage = {
+  prompt_tokens?: number
+  completion_tokens?: number
+  total_tokens?: number
+  prompt_tokens_details?: {
+    text_tokens?: number
+    audio_tokens?: number
+    image_tokens?: number
+    cached_tokens?: number
+  }
+  completion_tokens_details?: {
+    reasoning_tokens?: number
+    audio_tokens?: number
+    accepted_prediction_tokens?: number
+    rejected_prediction_tokens?: number
+  }
+}
+
+export function POST(input: APIEvent) {
+  let usage: Usage
+  return handler(input, {
+    modifyBody: (body: any) => ({
+      ...body,
+      ...(body.stream ? { stream_options: { include_usage: true } } : {}),
+    }),
+    setAuthHeader: (headers: Headers, apiKey: string) => {
+      headers.set("authorization", `Bearer ${apiKey}`)
+    },
+    parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1],
+    onStreamPart: (chunk: string) => {
+      if (!chunk.startsWith("data: ")) return
+
+      let json
+      try {
+        json = JSON.parse(chunk.slice(6)) as { usage?: Usage }
+      } catch (e) {
+        return
+      }
+
+      if (!json.usage) return
+      usage = json.usage
+    },
+    getStreamUsage: () => usage,
+    normalizeUsage: (usage: Usage) => ({
+      inputTokens: usage.prompt_tokens ?? 0,
+      outputTokens: usage.completion_tokens ?? 0,
+      reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? undefined,
+      cacheReadTokens: usage.prompt_tokens_details?.cached_tokens ?? undefined,
+    }),
+  })
+}
--- a/packages/console/app/src/routes/zen/v1/messages.ts
+++ b/packages/console/app/src/routes/zen/v1/messages.ts
@@ -0,0 +1,61 @@
+import type { APIEvent } from "@solidjs/start/server"
+import { handler } from "~/routes/zen/handler"
+
+type Usage = {
+  cache_creation?: {
+    ephemeral_5m_input_tokens?: number
+    ephemeral_1h_input_tokens?: number
+  }
+  cache_creation_input_tokens?: number
+  cache_read_input_tokens?: number
+  input_tokens?: number
+  output_tokens?: number
+  server_tool_use?: {
+    web_search_requests?: number
+  }
+}
+
+export function POST(input: APIEvent) {
+  let usage: Usage
+  return handler(input, {
+    modifyBody: (body: any) => ({
+      ...body,
+      service_tier: "standard_only",
+    }),
+    setAuthHeader: (headers: Headers, apiKey: string) => headers.set("x-api-key", apiKey),
+    parseApiKey: (headers: Headers) => headers.get("x-api-key") ?? undefined,
+    onStreamPart: (chunk: string) => {
+      const data = chunk.split("\n")[1]
+      if (!data.startsWith("data: ")) return
+
+      let json
+      try {
+        json = JSON.parse(data.slice(6)) as { usage?: Usage }
+      } catch (e) {
+        return
+      }
+
+      if (!json.usage) return
+      usage = {
+        ...usage,
+        ...json.usage,
+        cache_creation: {
+          ...usage?.cache_creation,
+          ...json.usage.cache_creation,
+        },
+        server_tool_use: {
+          ...usage?.server_tool_use,
+          ...json.usage.server_tool_use,
+        },
+      }
+    },
+    getStreamUsage: () => usage,
+    normalizeUsage: (usage: Usage) => ({
+      inputTokens: usage.input_tokens ?? 0,
+      outputTokens: usage.output_tokens ?? 0,
+      cacheReadTokens: usage.cache_read_input_tokens ?? undefined,
+      cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens ?? undefined,
+      cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens ?? undefined,
+    }),
+  })
+}
--- a/packages/console/app/src/routes/zen/v1/responses.ts
+++ b/packages/console/app/src/routes/zen/v1/responses.ts
@@ -0,0 +1,52 @@
+import type { APIEvent } from "@solidjs/start/server"
+import { handler } from "~/routes/zen/handler"
+
+type Usage = {
+  input_tokens?: number
+  input_tokens_details?: {
+    cached_tokens?: number
+  }
+  output_tokens?: number
+  output_tokens_details?: {
+    reasoning_tokens?: number
+  }
+  total_tokens?: number
+}
+
+export function POST(input: APIEvent) {
+  let usage: Usage
+  return handler(input, {
+    setAuthHeader: (headers: Headers, apiKey: string) => {
+      headers.set("authorization", `Bearer ${apiKey}`)
+    },
+    parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1],
+    onStreamPart: (chunk: string) => {
+      const [event, data] = chunk.split("\n")
+      if (event !== "event: response.completed") return
+      if (!data.startsWith("data: ")) return
+
+      let json
+      try {
+        json = JSON.parse(data.slice(6)) as { response?: { usage?: Usage } }
+      } catch (e) {
+        return
+      }
+
+      if (!json.response?.usage) return
+      usage = json.response.usage
+    },
+    getStreamUsage: () => usage,
+    normalizeUsage: (usage: Usage) => {
+      const inputTokens = usage.input_tokens ?? 0
+      const outputTokens = usage.output_tokens ?? 0
+      const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? undefined
+      const cacheReadTokens = usage.input_tokens_details?.cached_tokens ?? undefined
+      return {
+        inputTokens: inputTokens - (cacheReadTokens ?? 0),
+        outputTokens: outputTokens - (reasoningTokens ?? 0),
+        reasoningTokens,
+        cacheReadTokens,
+      }
+    },
+  })
+}