fix: max output tokens when using large thinking budget (#2976)

This commit is contained in:
Aiden Cline
2025-10-04 23:38:41 -05:00
committed by GitHub
parent 5f7ae6477b
commit 71a7e8ef36
2 changed files with 26 additions and 10 deletions

View File

@@ -105,18 +105,29 @@ export namespace ProviderTransform {
return result
}
export function maxOutputTokens(providerID: string, outputLimit: number, options: Record<string, any>): number {
export function maxOutputTokens(
providerID: string,
options: Record<string, any>,
modelLimit: number,
globalLimit: number,
): number {
const modelCap = modelLimit || globalLimit
const standardLimit = Math.min(modelCap, globalLimit)
if (providerID === "anthropic") {
const thinking = options["thinking"]
if (typeof thinking === "object" && thinking !== null) {
const type = thinking["type"]
const budgetTokens = thinking["budgetTokens"]
if (type === "enabled" && typeof budgetTokens === "number" && budgetTokens > 0) {
return outputLimit - budgetTokens
const thinking = options?.["thinking"]
const budgetTokens = typeof thinking?.["budgetTokens"] === "number" ? thinking["budgetTokens"] : 0
const enabled = thinking?.["type"] === "enabled"
if (enabled && budgetTokens > 0) {
// Return text tokens so that text + thinking <= model cap, preferring 32k text when possible.
if (budgetTokens + standardLimit <= modelCap) {
return standardLimit
}
return modelCap - budgetTokens
}
}
return outputLimit
return standardLimit
}
export function schema(_providerID: string, _modelID: string, schema: JSONSchema.BaseSchema) {

View File

@@ -159,7 +159,7 @@ export namespace SessionPrompt {
agent,
model: input.model,
}).then((x) => Provider.getModel(x.providerID, x.modelID))
const outputLimit = Math.min(model.info.limit.output, OUTPUT_TOKEN_MAX) || OUTPUT_TOKEN_MAX
using abort = lock(input.sessionID)
const system = await resolveSystemPrompt({
@@ -266,7 +266,12 @@ export namespace SessionPrompt {
: undefined,
maxRetries: 10,
activeTools: Object.keys(tools).filter((x) => x !== "invalid"),
maxOutputTokens: ProviderTransform.maxOutputTokens(model.providerID, outputLimit, params.options),
maxOutputTokens: ProviderTransform.maxOutputTokens(
model.providerID,
params.options,
model.info.limit.output,
OUTPUT_TOKEN_MAX,
),
abortSignal: abort.signal,
providerOptions: {
[model.npm === "@ai-sdk/openai" ? "openai" : model.providerID]: params.options,