fix: request payload for o1 models (#921)

Co-authored-by: Salman Mohammed <smohammed@squareup.com>
2025-12-17 22:24:21 +01:00 · 2025-01-30 07:24:32 -08:00
parent 6e9423b8c4
commit e8ced5a385
4 changed files with 31 additions and 11 deletions
--- a/crates/goose/src/providers/formats/openai.rs
+++ b/crates/goose/src/providers/formats/openai.rs
@@ -256,8 +256,16 @@ pub fn create_request(
    tools: &[Tool],
    image_format: &ImageFormat,
 ) -> anyhow::Result<Value, Error> {
+    if model_config.model_name.starts_with("o1-mini") {
+        return Err(anyhow!(
+            "o1-mini model is not currently supported since Goose uses tool calling."
+        ));
+    }
+
+    let is_o1 = model_config.model_name.starts_with("o1");
+
    let system_message = json!({
-        "role": "system",
+        "role": if is_o1 { "developer" } else { "system" },
        "content": system
    });

@@ -282,17 +290,27 @@ pub fn create_request(
            .unwrap()
            .insert("tools".to_string(), json!(tools_spec));
    }
-    if let Some(temp) = model_config.temperature {
-        payload
-            .as_object_mut()
-            .unwrap()
-            .insert("temperature".to_string(), json!(temp));
+    // o1 models currently don't support temperature
+    if !is_o1 {
+        if let Some(temp) = model_config.temperature {
+            payload
+                .as_object_mut()
+                .unwrap()
+                .insert("temperature".to_string(), json!(temp));
+        }
    }
+
+    // o1 models use max_completion_tokens instead of max_tokens
    if let Some(tokens) = model_config.max_tokens {
+        let key = if is_o1 {
+            "max_completion_tokens"
+        } else {
+            "max_tokens"
+        };
        payload
            .as_object_mut()
            .unwrap()
-            .insert("max_tokens".to_string(), json!(tokens));
+            .insert(key.to_string(), json!(tokens));
    }
    Ok(payload)
 }
--- a/crates/goose/src/providers/openai.rs
+++ b/crates/goose/src/providers/openai.rs
@@ -19,7 +19,6 @@ pub const OPEN_AI_KNOWN_MODELS: &[&str] = &[
    "gpt-4-turbo",
    "gpt-3.5-turbo",
    "o1",
-    "o1-mini",
 ];

 pub const OPEN_AI_DOC_URL: &str = "https://platform.openai.com/docs/models";
--- a/documentation/docs/getting-started/providers.md
+++ b/documentation/docs/getting-started/providers.md
@@ -17,10 +17,10 @@ Goose is compatible with a wide range of LLM providers, allowing you to choose a
 |-----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------|
 | [Anthropic](https://www.anthropic.com/)       | Offers Claude, an advanced AI model for natural language tasks.                                                                                                                                                          | `ANTHROPIC_API_KEY`                   |
 | [Databricks](https://www.databricks.com/)     | Unified data analytics and AI platform for building and deploying models.                                                                                                                                                | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` |
-| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                     | `GOOGLE_API_KEY`                      |
+| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images).                                                                                                                                                   | `GOOGLE_API_KEY`                      |
 | [Groq](https://groq.com/)                     | High-performance inference hardware and tools for LLMs.                                                                                                                                                                  | `GROQ_API_KEY`                        |
 | [Ollama](https://ollama.com/)                 | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms-ollama).** | N/A                                   |
-| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models.                                                                                                                                                                 | `OPENAI_API_KEY`                      |
+| [OpenAI](https://platform.openai.com/api-keys) | Provides gpt-4o, o1, and other advanced language models. **o1-mini and o1-preview are not supported because Goose uses tool calling.**                                                                                  | `OPENAI_API_KEY`                      |
 | [OpenRouter](https://openrouter.ai/)          | API gateway for unified access to various models with features like rate-limiting management.                                                                                                                            | `OPENROUTER_API_KEY`                  |

 :::tip Model Recommendation
--- a/documentation/docs/getting-started/using-goose-free.md
+++ b/documentation/docs/getting-started/using-goose-free.md
@@ -64,7 +64,10 @@ To set up Google Gemini with Goose, follow these steps:
 ## DeepSeek-R1

 :::warning
-Depending on the model's size, you'll need a relatively powerful device to smoothly run local LLMs.
+In our testing, we have found the Google Gemini performs better that DeepSeek models, likely
+because Goose relies heavily on tool calling and DeepSeek does not support it natively yet.
+When using DeepSeek, we currently recommend the 70B model size, which requires
+a powerful device to run smoothly.
 :::

 Ollama provides open source LLMs, such as `DeepSeek-r1`, that you can install and run locally.