This commit is contained in:
Frank
2025-09-10 17:39:09 -04:00
parent 5b56848c3d
commit fa3e7bb9b0
4 changed files with 190 additions and 61 deletions

View File

@@ -2,14 +2,19 @@ import type { APIEvent } from "@solidjs/start/server"
import { handler } from "~/util/zen" import { handler } from "~/util/zen"
export function POST(input: APIEvent) { export function POST(input: APIEvent) {
let usage: any
return handler(input, { return handler(input, {
transformBody: (body: any) => ({ modifyBody: (body: any) => ({
...body, ...body,
stream_options: { stream_options: {
include_usage: true, include_usage: true,
}, },
}), }),
parseUsageChunk: (chunk: string) => { setAuthHeader: (headers: Headers, apiKey: string) => {
headers.set("authorization", `Bearer ${apiKey}`)
},
parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1],
onStreamPart: (chunk: string) => {
if (!chunk.startsWith("data: ")) return if (!chunk.startsWith("data: ")) return
let json let json
@@ -19,15 +24,15 @@ export function POST(input: APIEvent) {
return return
} }
return json.usage if (!json.usage) return
usage = json.usage
}, },
buildUsage: (usage: any) => ({ getStreamUsage: () => usage,
normalizeUsage: (usage: any) => ({
inputTokens: usage.prompt_tokens ?? 0, inputTokens: usage.prompt_tokens ?? 0,
outputTokens: usage.completion_tokens ?? 0, outputTokens: usage.completion_tokens ?? 0,
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0, reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
cacheReadTokens: usage.prompt_tokens_details?.cached_tokens ?? 0, cacheReadTokens: usage.prompt_tokens_details?.cached_tokens ?? 0,
//cacheWriteTokens = usage.providerMetadata?.["anthropic"]?.["cacheCreationInputTokens"] ?? 0
cacheWriteTokens: 0,
}), }),
}) })
} }

View File

@@ -0,0 +1,61 @@
import type { APIEvent } from "@solidjs/start/server"
import { handler } from "~/util/zen"
type Usage = {
cache_creation?: {
ephemeral_5m_input_tokens?: number
ephemeral_1h_input_tokens?: number
}
cache_creation_input_tokens?: number
cache_read_input_tokens?: number
input_tokens?: number
output_tokens?: number
server_tool_use?: {
web_search_requests?: number
}
}
export function POST(input: APIEvent) {
let usage: Usage
return handler(input, {
modifyBody: (body: any) => ({
...body,
service_tier: "standard_only",
}),
setAuthHeader: (headers: Headers, apiKey: string) => headers.set("x-api-key", apiKey),
parseApiKey: (headers: Headers) => headers.get("x-api-key") ?? undefined,
onStreamPart: (chunk: string) => {
const data = chunk.split("\n")[1]
if (!data.startsWith("data: ")) return
let json
try {
json = JSON.parse(data.slice(6)) as { usage?: Usage }
} catch (e) {
return
}
if (!json.usage) return
usage = {
...usage,
...json.usage,
cache_creation: {
...usage?.cache_creation,
...json.usage.cache_creation,
},
server_tool_use: {
...usage?.server_tool_use,
...json.usage.server_tool_use,
},
}
},
getStreamUsage: () => usage,
normalizeUsage: (usage: Usage) => ({
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens,
cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens,
}),
})
}

View File

@@ -2,8 +2,13 @@ import type { APIEvent } from "@solidjs/start/server"
import { handler } from "~/util/zen" import { handler } from "~/util/zen"
export function POST(input: APIEvent) { export function POST(input: APIEvent) {
let usage: any
return handler(input, { return handler(input, {
parseUsageChunk: (chunk: string) => { setAuthHeader: (headers: Headers, apiKey: string) => {
headers.set("authorization", `Bearer ${apiKey}`)
},
parseApiKey: (headers: Headers) => headers.get("authorization")?.split(" ")[1],
onStreamPart: (chunk: string) => {
const [event, data] = chunk.split("\n") const [event, data] = chunk.split("\n")
if (event !== "event: response.completed") return if (event !== "event: response.completed") return
if (!data.startsWith("data: ")) return if (!data.startsWith("data: ")) return
@@ -15,9 +20,11 @@ export function POST(input: APIEvent) {
return return
} }
return json.response?.usage if (!json.response?.usage) return
usage = json.response.usage
}, },
buildUsage: (usage: any) => { getStreamUsage: () => usage,
normalizeUsage: (usage: any) => {
const inputTokens = usage.input_tokens ?? 0 const inputTokens = usage.input_tokens ?? 0
const outputTokens = usage.output_tokens ?? 0 const outputTokens = usage.output_tokens ?? 0
const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? 0 const reasoningTokens = usage.output_tokens_details?.reasoning_tokens ?? 0
@@ -27,7 +34,6 @@ export function POST(input: APIEvent) {
outputTokens: outputTokens - reasoningTokens, outputTokens: outputTokens - reasoningTokens,
reasoningTokens, reasoningTokens,
cacheReadTokens, cacheReadTokens,
cacheWriteTokens: 0,
} }
}, },
}) })

View File

@@ -9,14 +9,18 @@ import { Resource } from "@opencode/cloud-resource"
export async function handler( export async function handler(
input: APIEvent, input: APIEvent,
opts: { opts: {
transformBody?: (body: any) => any modifyBody?: (body: any) => any
parseUsageChunk: (chunk: string) => string | undefined setAuthHeader: (headers: Headers, apiKey: string) => void
buildUsage: (body: any) => { parseApiKey: (headers: Headers) => string | undefined
onStreamPart: (chunk: string) => void
getStreamUsage: () => any
normalizeUsage: (body: any) => {
inputTokens: number inputTokens: number
outputTokens: number outputTokens: number
reasoningTokens: number reasoningTokens?: number
cacheReadTokens: number cacheReadTokens: number
cacheWriteTokens: number cacheWrite5mTokens?: number
cacheWrite1hTokens?: number
} }
}, },
) { ) {
@@ -25,20 +29,63 @@ export async function handler(
class ModelError extends Error {} class ModelError extends Error {}
const MODELS = { const MODELS = {
// "anthropic/claude-sonnet-4": { "claude-opus-4-1": {
// auth: true, id: "claude-opus-4-1" as const,
// api: "https://api.anthropic.com", auth: true,
// apiKey: Resource.ANTHROPIC_API_KEY.value, api: "https://api.anthropic.com",
// model: "claude-sonnet-4-20250514", apiKey: Resource.ANTHROPIC_API_KEY.value,
// cost: { model: "claude-opus-4-1-20250805",
// input: 0.0000015, cost: {
// output: 0.000006, input: 0.000015,
// reasoning: 0.0000015, output: 0.000075,
// cacheRead: 0.0000001, cacheRead: 0.0000015,
// cacheWrite: 0.0000001, cacheWrite5m: 0.00001875,
// }, cacheWrite1h: 0.00003,
// headerMappings: {}, },
// }, headerMappings: {},
},
"claude-sonnet-4": {
id: "claude-sonnet-4" as const,
auth: true,
api: "https://api.anthropic.com",
apiKey: Resource.ANTHROPIC_API_KEY.value,
model: "claude-sonnet-4-20250514",
cost: (usage: any) => {
const totalInputTokens =
usage.inputTokens + usage.cacheReadTokens + usage.cacheWrite5mTokens + usage.cacheWrite1hTokens
return totalInputTokens <= 200_000
? {
input: 0.000003,
output: 0.000015,
cacheRead: 0.0000003,
cacheWrite5m: 0.00000375,
cacheWrite1h: 0.000006,
}
: {
input: 0.000006,
output: 0.0000225,
cacheRead: 0.0000006,
cacheWrite5m: 0.0000075,
cacheWrite1h: 0.000012,
}
},
headerMappings: {},
},
"claude-3-5-haiku": {
id: "claude-3-5-haiku" as const,
auth: true,
api: "https://api.anthropic.com",
apiKey: Resource.ANTHROPIC_API_KEY.value,
model: "claude-3-5-haiku-20241022",
cost: {
input: 0.0000008,
output: 0.000004,
cacheRead: 0.00000008,
cacheWrite5m: 0.000001,
cacheWrite1h: 0.0000016,
},
headerMappings: {},
},
"gpt-5": { "gpt-5": {
id: "gpt-5" as const, id: "gpt-5" as const,
auth: true, auth: true,
@@ -48,9 +95,9 @@ export async function handler(
cost: { cost: {
input: 0.00000125, input: 0.00000125,
output: 0.00001, output: 0.00001,
reasoning: 0.00001,
cacheRead: 0.000000125, cacheRead: 0.000000125,
cacheWrite: 0, cacheWrite5m: 0,
cacheWrite1h: 0,
}, },
headerMappings: {}, headerMappings: {},
}, },
@@ -63,9 +110,9 @@ export async function handler(
cost: { cost: {
input: 0.00000038, input: 0.00000038,
output: 0.00000153, output: 0.00000153,
reasoning: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite5m: 0,
cacheWrite1h: 0,
}, },
headerMappings: {}, headerMappings: {},
}, },
@@ -78,9 +125,9 @@ export async function handler(
cost: { cost: {
input: 0.0000006, input: 0.0000006,
output: 0.0000025, output: 0.0000025,
reasoning: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite5m: 0,
cacheWrite1h: 0,
}, },
headerMappings: {}, headerMappings: {},
}, },
@@ -93,9 +140,9 @@ export async function handler(
cost: { cost: {
input: 0, input: 0,
output: 0, output: 0,
reasoning: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite5m: 0,
cacheWrite1h: 0,
}, },
headerMappings: { headerMappings: {
"x-grok-conv-id": "x-opencode-session", "x-grok-conv-id": "x-opencode-session",
@@ -112,9 +159,9 @@ export async function handler(
cost: { cost: {
input: 0.00000038, input: 0.00000038,
output: 0.00000153, output: 0.00000153,
reasoning: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite5m: 0,
cacheWrite1h: 0,
}, },
headerMappings: {}, headerMappings: {},
}, },
@@ -156,14 +203,14 @@ export async function handler(
const headers = input.request.headers const headers = input.request.headers
headers.delete("host") headers.delete("host")
headers.delete("content-length") headers.delete("content-length")
headers.set("authorization", `Bearer ${MODEL.apiKey}`) opts.setAuthHeader(headers, MODEL.apiKey)
Object.entries(MODEL.headerMappings ?? {}).forEach(([k, v]) => { Object.entries(MODEL.headerMappings ?? {}).forEach(([k, v]) => {
headers.set(k, headers.get(v)!) headers.set(k, headers.get(v)!)
}) })
return headers return headers
})(), })(),
body: JSON.stringify({ body: JSON.stringify({
...(opts.transformBody?.(body) ?? body), ...(opts.modifyBody?.(body) ?? body),
model: MODEL.model, model: MODEL.model,
}), }),
}) })
@@ -199,32 +246,31 @@ export async function handler(
let buffer = "" let buffer = ""
let responseLength = 0 let responseLength = 0
let startTimestamp = Date.now() let startTimestamp = Date.now()
let receivedFirstByte = false
function pump(): Promise<void> { function pump(): Promise<void> {
return ( return (
reader?.read().then(async ({ done, value }) => { reader?.read().then(async ({ done, value }) => {
if (done) { if (done) {
logger.metric({ response_length: responseLength }) logger.metric({ response_length: responseLength })
const usage = opts.getStreamUsage()
if (usage) await trackUsage(usage)
c.close() c.close()
return return
} }
if (!receivedFirstByte) { if (responseLength === 0) {
receivedFirstByte = true
logger.metric({ time_to_first_byte: Date.now() - startTimestamp }) logger.metric({ time_to_first_byte: Date.now() - startTimestamp })
} }
responseLength += value.length
buffer += decoder.decode(value, { stream: true }) buffer += decoder.decode(value, { stream: true })
responseLength += value.length
const parts = buffer.split("\n\n") const parts = buffer.split("\n\n")
buffer = parts.pop() ?? "" buffer = parts.pop() ?? ""
for (const part of parts) { for (const part of parts) {
logger.debug(part) logger.debug(part)
const usage = opts.parseUsageChunk(part.trim()) opts.onStreamPart(part.trim())
if (usage) await trackUsage(usage)
} }
c.enqueue(value) c.enqueue(value)
@@ -255,10 +301,9 @@ export async function handler(
async function authenticate() { async function authenticate() {
try { try {
const authHeader = input.request.headers.get("authorization") const apiKey = opts.parseApiKey(input.request.headers)
if (!authHeader || !authHeader.startsWith("Bearer ")) throw new AuthError("Missing API key.") if (!apiKey) throw new AuthError("Missing API key.")
const apiKey = authHeader.split(" ")[1]
const key = await Database.use((tx) => const key = await Database.use((tx) =>
tx tx
.select({ .select({
@@ -300,26 +345,38 @@ export async function handler(
} }
async function trackUsage(usage: any) { async function trackUsage(usage: any) {
const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWriteTokens } = opts.buildUsage(usage) const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
opts.normalizeUsage(usage)
const inputCost = MODEL.cost.input * inputTokens * 100 const modelCost = typeof MODEL.cost === "function" ? MODEL.cost(usage) : MODEL.cost
const outputCost = MODEL.cost.output * outputTokens * 100
const reasoningCost = MODEL.cost.reasoning * reasoningTokens * 100 const inputCost = modelCost.input * inputTokens * 100
const cacheReadCost = MODEL.cost.cacheRead * cacheReadTokens * 100 const outputCost = modelCost.output * outputTokens * 100
const cacheWriteCost = MODEL.cost.cacheWrite * cacheWriteTokens * 100 const reasoningCost = reasoningTokens ? modelCost.output * reasoningTokens * 100 : undefined
const totalCostInCent = inputCost + outputCost + reasoningCost + cacheReadCost + cacheWriteCost const cacheReadCost = modelCost.cacheRead * cacheReadTokens * 100
const cacheWrite5mCost = cacheWrite5mTokens ? modelCost.cacheWrite5m * cacheWrite5mTokens * 100 : undefined
const cacheWrite1hCost = cacheWrite1hTokens ? modelCost.cacheWrite1h * cacheWrite1hTokens * 100 : undefined
const totalCostInCent =
inputCost +
outputCost +
(reasoningCost ?? 0) +
cacheReadCost +
(cacheWrite5mCost ?? 0) +
(cacheWrite1hCost ?? 0)
logger.metric({ logger.metric({
"tokens.input": inputTokens, "tokens.input": inputTokens,
"tokens.output": outputTokens, "tokens.output": outputTokens,
"tokens.reasoning": reasoningTokens, "tokens.reasoning": reasoningTokens,
"tokens.cache_read": cacheReadTokens, "tokens.cache_read": cacheReadTokens,
"tokens.cache_write": cacheWriteTokens, "tokens.cache_write_5m": cacheWrite5mTokens,
"tokens.cache_write_1h": cacheWrite1hTokens,
"cost.input": Math.round(inputCost), "cost.input": Math.round(inputCost),
"cost.output": Math.round(outputCost), "cost.output": Math.round(outputCost),
"cost.reasoning": Math.round(reasoningCost), "cost.reasoning": reasoningCost ? Math.round(reasoningCost) : undefined,
"cost.cache_read": Math.round(cacheReadCost), "cost.cache_read": Math.round(cacheReadCost),
"cost.cache_write": Math.round(cacheWriteCost), "cost.cache_write_5m": cacheWrite5mCost ? Math.round(cacheWrite5mCost) : undefined,
"cost.cache_write_1h": cacheWrite1hCost ? Math.round(cacheWrite1hCost) : undefined,
"cost.total": Math.round(totalCostInCent), "cost.total": Math.round(totalCostInCent),
}) })
@@ -335,7 +392,7 @@ export async function handler(
outputTokens, outputTokens,
reasoningTokens, reasoningTokens,
cacheReadTokens, cacheReadTokens,
cacheWriteTokens, cacheWriteTokens: (cacheWrite5mTokens ?? 0) + (cacheWrite1hTokens ?? 0),
cost, cost,
}) })
await tx await tx