fix: request payload for o1 models (#921)

Co-authored-by: Salman Mohammed <smohammed@squareup.com>
This commit is contained in:
Wendy Tang
2025-01-30 07:24:32 -08:00
committed by GitHub
parent 6e9423b8c4
commit e8ced5a385
4 changed files with 31 additions and 11 deletions

View File

@@ -256,8 +256,16 @@ pub fn create_request(
tools: &[Tool], tools: &[Tool],
image_format: &ImageFormat, image_format: &ImageFormat,
) -> anyhow::Result<Value, Error> { ) -> anyhow::Result<Value, Error> {
if model_config.model_name.starts_with("o1-mini") {
return Err(anyhow!(
"o1-mini model is not currently supported since Goose uses tool calling."
));
}
let is_o1 = model_config.model_name.starts_with("o1");
let system_message = json!({ let system_message = json!({
"role": "system", "role": if is_o1 { "developer" } else { "system" },
"content": system "content": system
}); });
@@ -282,17 +290,27 @@ pub fn create_request(
.unwrap() .unwrap()
.insert("tools".to_string(), json!(tools_spec)); .insert("tools".to_string(), json!(tools_spec));
} }
// o1 models currently don't support temperature
if !is_o1 {
if let Some(temp) = model_config.temperature { if let Some(temp) = model_config.temperature {
payload payload
.as_object_mut() .as_object_mut()
.unwrap() .unwrap()
.insert("temperature".to_string(), json!(temp)); .insert("temperature".to_string(), json!(temp));
} }
}
// o1 models use max_completion_tokens instead of max_tokens
if let Some(tokens) = model_config.max_tokens { if let Some(tokens) = model_config.max_tokens {
let key = if is_o1 {
"max_completion_tokens"
} else {
"max_tokens"
};
payload payload
.as_object_mut() .as_object_mut()
.unwrap() .unwrap()
.insert("max_tokens".to_string(), json!(tokens)); .insert(key.to_string(), json!(tokens));
} }
Ok(payload) Ok(payload)
} }

View File

@@ -19,7 +19,6 @@ pub const OPEN_AI_KNOWN_MODELS: &[&str] = &[
"gpt-4-turbo", "gpt-4-turbo",
"gpt-3.5-turbo", "gpt-3.5-turbo",
"o1", "o1",
"o1-mini",
]; ];
pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models"; pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models";

View File

@@ -20,7 +20,7 @@ Goose is compatible with a wide range of LLM providers, allowing you to choose a
| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` | | [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` |
| [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` | | [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` |
| [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | N/A | | [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | N/A |
| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. | `OPENAI_API_KEY` | | [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. **o1-mini and o1-preview are not supported because Goose uses tool calling.** | `OPENAI_API_KEY` |
| [OpenRouter](https://openrouter.ai/) | API gateway for unified access to various models with features like rate-limiting management. | `OPENROUTER_API_KEY` | | [OpenRouter](https://openrouter.ai/) | API gateway for unified access to various models with features like rate-limiting management. | `OPENROUTER_API_KEY` |
:::tip Model Recommendation :::tip Model Recommendation

View File

@@ -64,7 +64,10 @@ To set up Google Gemini with Goose, follow these steps:
## DeepSeek-R1 ## DeepSeek-R1
:::warning :::warning
Depending on the model's size, you'll need a relatively powerful device to smoothly run local LLMs. In our testing, we have found the Google Gemini performs better that DeepSeek models, likely
because Goose relies heavily on tool calling and DeepSeek does not support it natively yet.
When using DeepSeek, we currently recommend the 70B model size, which requires
a powerful device to run smoothly.
::: :::
Ollama provides open source LLMs, such as `DeepSeek-r1`, that you can install and run locally. Ollama provides open source LLMs, such as `DeepSeek-r1`, that you can install and run locally.