fix: request payload for o1 models (#921)

Co-authored-by: Salman Mohammed <smohammed@squareup.com>
2025-12-17 22:24:21 +01:00 · 2025-01-30 07:24:32 -08:00
parent 6e9423b8c4
commit e8ced5a385
4 changed files with 31 additions and 11 deletions
--- a/crates/goose/src/providers/formats/openai.rs
+++ b/crates/goose/src/providers/formats/openai.rs
@@ -256,8 +256,16 @@ pub fn create_request(
    tools: &[Tool],
    image_format: &ImageFormat,
 ) -> anyhow::Result<Value, Error> {
    if model_config.model_name.starts_with("o1-mini") {
        return Err(anyhow!(
            "o1-mini model is not currently supported since Goose uses tool calling."
        ));
    }
    let is_o1 = model_config.model_name.starts_with("o1");
    let system_message = json!({
-        "role": "system",
+        "role": if is_o1 { "developer" } else { "system" },
        "content": system
    });
@@ -282,17 +290,27 @@ pub fn create_request(
            .unwrap()
            .insert("tools".to_string(), json!(tools_spec));
    }
-    if let Some(temp) = model_config.temperature {
+    // o1 models currently don't support temperature
-        payload
+    if !is_o1 {
-            .as_object_mut()
+        if let Some(temp) = model_config.temperature {
-            .unwrap()
+            payload
-            .insert("temperature".to_string(), json!(temp));
+                .as_object_mut()
                .unwrap()
                .insert("temperature".to_string(), json!(temp));
        }
    }
    // o1 models use max_completion_tokens instead of max_tokens
    if let Some(tokens) = model_config.max_tokens {
        let key = if is_o1 {
            "max_completion_tokens"
        } else {
            "max_tokens"
        };
        payload
            .as_object_mut()
            .unwrap()
-            .insert("max_tokens".to_string(), json!(tokens));
+            .insert(key.to_string(), json!(tokens));
    }
    Ok(payload)
 }
--- a/crates/goose/src/providers/openai.rs
+++ b/crates/goose/src/providers/openai.rs
@@ -19,7 +19,6 @@ pub const OPEN_AI_KNOWN_MODELS: &[&str] = &[
    "gpt-4-turbo",
    "gpt-3.5-turbo",
    "o1",
    "o1-mini",
 ];
 pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models";
--- a/documentation/docs/getting-started/providers.md
+++ b/documentation/docs/getting-started/providers.md
@@ -17,10 +17,10 @@ Goose is compatible with a wide range of LLM providers, allowing you to choose a
 |-----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------|
 | [Anthropic](https://www.anthropic.com/)       | Offers Claude, an advanced AI model for natural language tasks.                                                                                                                                                          | `ANTHROPIC_API_KEY`                   |
 | [Databricks](https://www.databricks.com/)     | Unified data analytics and AI platform for building and deploying models.                                                                                                                                                | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` |
-| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                     | `GOOGLE_API_KEY`                      |
+| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                   | `GOOGLE_API_KEY`                      |
 | [Groq](https://groq.com/)                     | High-performance inference hardware and tools for LLMs.                                                                                                                                                                  | `GROQ_API_KEY`                        |
 | [Ollama](https://ollama.com/)                 | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | N/A                                   |
-| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models.                                                                                                                                                                 | `OPENAI_API_KEY`                      |
+| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. **o1-mini and o1-preview are not supported because Goose uses tool calling.**                                                                                  | `OPENAI_API_KEY`                      |
 | [OpenRouter](https://openrouter.ai/)          | API gateway for unified access to various models with features like rate-limiting management.                                                                                                                            | `OPENROUTER_API_KEY`                  |
 :::tip Model Recommendation
--- a/documentation/docs/getting-started/using-goose-free.md
+++ b/documentation/docs/getting-started/using-goose-free.md
@@ -64,7 +64,10 @@ To set up Google Gemini with Goose, follow these steps:
 ## DeepSeek-R1
 :::warning
-Depending on the model's size, you'll need a relatively powerful device to smoothly run local LLMs.
+In our testing, we have found the Google Gemini performs better that DeepSeek models, likely
 because Goose relies heavily on tool calling and DeepSeek does not support it natively yet.
 When using DeepSeek, we currently recommend the 70B model size, which requires
 a powerful device to run smoothly.
 :::
 Ollama provides open source LLMs, such as `DeepSeek-r1`, that you can install and run locally.