feat(gcpvertexai): do HTTP 429 like retries for Anthropic API HTTP 529 overloaded status code (#3026)

Co-authored-by: Michael Neale <michael.neale@gmail.com>
This commit is contained in:
Uddhav Kambli
2025-07-16 22:10:48 -04:00
committed by GitHub
parent 33c082cd5b
commit 21b79ad240
4 changed files with 234 additions and 75 deletions

View File

@@ -20,7 +20,7 @@
"gcp_vertex_ai": { "gcp_vertex_ai": {
"name": "GCP Vertex AI", "name": "GCP Vertex AI",
"description": "Use Vertex AI platform models", "description": "Use Vertex AI platform models",
"models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.0-pro-exp-02-05", "gemini-2.5-pro-exp-03-25", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-05-06"], "models": ["claude-3-5-haiku@20241022", "claude-3-5-sonnet@20240620", "claude-3-5-sonnet-v2@20241022", "claude-3-7-sonnet@20250219", "claude-sonnet-4@20250514", "claude-opus-4@20250514", "gemini-1.5-pro-002", "gemini-2.0-flash-001", "gemini-2.0-pro-exp-02-05", "gemini-2.5-pro-exp-03-25", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-05-06", "gemini-2.5-flash", "gemini-2.5-pro"],
"required_keys": ["GCP_PROJECT_ID", "GCP_LOCATION"] "required_keys": ["GCP_PROJECT_ID", "GCP_LOCATION"]
}, },
"google": { "google": {

View File

@@ -83,6 +83,8 @@ pub enum ClaudeVersion {
Haiku35, Haiku35,
/// Claude Sonnet 4 /// Claude Sonnet 4
Sonnet4, Sonnet4,
/// Claude Opus 4
Opus4,
/// Generic Claude model for custom or new versions /// Generic Claude model for custom or new versions
Generic(String), Generic(String),
} }
@@ -102,6 +104,10 @@ pub enum GeminiVersion {
Flash25Preview, Flash25Preview,
/// Gemini 2.5 Pro Preview version /// Gemini 2.5 Pro Preview version
Pro25Preview, Pro25Preview,
/// Gemini 2.5 Flash version
Flash25,
/// Gemini 2.5 Pro version
Pro25,
/// Generic Gemini model for custom or new versions /// Generic Gemini model for custom or new versions
Generic(String), Generic(String),
} }
@@ -115,6 +121,7 @@ impl fmt::Display for GcpVertexAIModel {
ClaudeVersion::Sonnet37 => "claude-3-7-sonnet@20250219", ClaudeVersion::Sonnet37 => "claude-3-7-sonnet@20250219",
ClaudeVersion::Haiku35 => "claude-3-5-haiku@20241022", ClaudeVersion::Haiku35 => "claude-3-5-haiku@20241022",
ClaudeVersion::Sonnet4 => "claude-sonnet-4@20250514", ClaudeVersion::Sonnet4 => "claude-sonnet-4@20250514",
ClaudeVersion::Opus4 => "claude-opus-4@20250514",
ClaudeVersion::Generic(name) => name, ClaudeVersion::Generic(name) => name,
}, },
Self::Gemini(version) => match version { Self::Gemini(version) => match version {
@@ -124,6 +131,8 @@ impl fmt::Display for GcpVertexAIModel {
GeminiVersion::Pro25Exp => "gemini-2.5-pro-exp-03-25", GeminiVersion::Pro25Exp => "gemini-2.5-pro-exp-03-25",
GeminiVersion::Flash25Preview => "gemini-2.5-flash-preview-05-20", GeminiVersion::Flash25Preview => "gemini-2.5-flash-preview-05-20",
GeminiVersion::Pro25Preview => "gemini-2.5-pro-preview-05-06", GeminiVersion::Pro25Preview => "gemini-2.5-pro-preview-05-06",
GeminiVersion::Flash25 => "gemini-2.5-flash",
GeminiVersion::Pro25 => "gemini-2.5-pro",
GeminiVersion::Generic(name) => name, GeminiVersion::Generic(name) => name,
}, },
}; };
@@ -156,12 +165,15 @@ impl TryFrom<&str> for GcpVertexAIModel {
"claude-3-7-sonnet@20250219" => Ok(Self::Claude(ClaudeVersion::Sonnet37)), "claude-3-7-sonnet@20250219" => Ok(Self::Claude(ClaudeVersion::Sonnet37)),
"claude-3-5-haiku@20241022" => Ok(Self::Claude(ClaudeVersion::Haiku35)), "claude-3-5-haiku@20241022" => Ok(Self::Claude(ClaudeVersion::Haiku35)),
"claude-sonnet-4@20250514" => Ok(Self::Claude(ClaudeVersion::Sonnet4)), "claude-sonnet-4@20250514" => Ok(Self::Claude(ClaudeVersion::Sonnet4)),
"claude-opus-4@20250514" => Ok(Self::Claude(ClaudeVersion::Opus4)),
"gemini-1.5-pro-002" => Ok(Self::Gemini(GeminiVersion::Pro15)), "gemini-1.5-pro-002" => Ok(Self::Gemini(GeminiVersion::Pro15)),
"gemini-2.0-flash-001" => Ok(Self::Gemini(GeminiVersion::Flash20)), "gemini-2.0-flash-001" => Ok(Self::Gemini(GeminiVersion::Flash20)),
"gemini-2.0-pro-exp-02-05" => Ok(Self::Gemini(GeminiVersion::Pro20Exp)), "gemini-2.0-pro-exp-02-05" => Ok(Self::Gemini(GeminiVersion::Pro20Exp)),
"gemini-2.5-pro-exp-03-25" => Ok(Self::Gemini(GeminiVersion::Pro25Exp)), "gemini-2.5-pro-exp-03-25" => Ok(Self::Gemini(GeminiVersion::Pro25Exp)),
"gemini-2.5-flash-preview-05-20" => Ok(Self::Gemini(GeminiVersion::Flash25Preview)), "gemini-2.5-flash-preview-05-20" => Ok(Self::Gemini(GeminiVersion::Flash25Preview)),
"gemini-2.5-pro-preview-05-06" => Ok(Self::Gemini(GeminiVersion::Pro25Preview)), "gemini-2.5-pro-preview-05-06" => Ok(Self::Gemini(GeminiVersion::Pro25Preview)),
"gemini-2.5-flash" => Ok(Self::Gemini(GeminiVersion::Flash25)),
"gemini-2.5-pro" => Ok(Self::Gemini(GeminiVersion::Pro25)),
// Generic models based on prefix matching // Generic models based on prefix matching
_ if s.starts_with("claude-") => { _ if s.starts_with("claude-") => {
Ok(Self::Claude(ClaudeVersion::Generic(s.to_string()))) Ok(Self::Claude(ClaudeVersion::Generic(s.to_string())))
@@ -427,7 +439,7 @@ mod tests {
} }
// Test generic Gemini models // Test generic Gemini models
let gemini_models = ["gemini-3-pro", "gemini-2.5-flash", "gemini-experimental"]; let gemini_models = ["gemini-3-pro", "gemini-2.0-flash", "gemini-experimental"];
for model_id in gemini_models { for model_id in gemini_models {
let model = GcpVertexAIModel::try_from(model_id)?; let model = GcpVertexAIModel::try_from(model_id)?;

View File

@@ -2,6 +2,7 @@ use std::time::Duration;
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;
use once_cell::sync::Lazy;
use reqwest::{Client, StatusCode}; use reqwest::{Client, StatusCode};
use serde_json::Value; use serde_json::Value;
use tokio::time::sleep; use tokio::time::sleep;
@@ -34,6 +35,9 @@ const DEFAULT_MAX_RETRIES: usize = 6;
const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0; const DEFAULT_BACKOFF_MULTIPLIER: f64 = 2.0;
/// Default maximum interval for retry (in milliseconds) /// Default maximum interval for retry (in milliseconds)
const DEFAULT_MAX_RETRY_INTERVAL_MS: u64 = 320_000; const DEFAULT_MAX_RETRY_INTERVAL_MS: u64 = 320_000;
/// Status code for Anthropic's API overloaded error (529)
static STATUS_API_OVERLOADED: Lazy<StatusCode> =
Lazy::new(|| StatusCode::from_u16(529).expect("Valid status code 529 for API_OVERLOADED"));
/// Represents errors specific to GCP Vertex AI operations. /// Represents errors specific to GCP Vertex AI operations.
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@@ -50,8 +54,10 @@ enum GcpVertexAIError {
/// Retry configuration for handling rate limit errors /// Retry configuration for handling rate limit errors
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct RetryConfig { struct RetryConfig {
/// Maximum number of retry attempts /// Maximum number of retry attempts for 429 errors
max_retries: usize, max_rate_limit_retries: usize,
/// Maximum number of retry attempts for 529 errors
max_overloaded_retries: usize,
/// Initial interval between retries in milliseconds /// Initial interval between retries in milliseconds
initial_interval_ms: u64, initial_interval_ms: u64,
/// Multiplier for backoff (exponential) /// Multiplier for backoff (exponential)
@@ -63,7 +69,8 @@ struct RetryConfig {
impl Default for RetryConfig { impl Default for RetryConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
max_retries: DEFAULT_MAX_RETRIES, max_rate_limit_retries: DEFAULT_MAX_RETRIES,
max_overloaded_retries: DEFAULT_MAX_RETRIES,
initial_interval_ms: DEFAULT_INITIAL_RETRY_INTERVAL_MS, initial_interval_ms: DEFAULT_INITIAL_RETRY_INTERVAL_MS,
backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER, backoff_multiplier: DEFAULT_BACKOFF_MULTIPLIER,
max_interval_ms: DEFAULT_MAX_RETRY_INTERVAL_MS, max_interval_ms: DEFAULT_MAX_RETRY_INTERVAL_MS,
@@ -92,6 +99,19 @@ impl RetryConfig {
Duration::from_millis(jittered_delay_ms) Duration::from_millis(jittered_delay_ms)
} }
/// Get max retries for a specific error type
#[allow(dead_code)] // Used in tests
fn max_retries_for_status(&self, status: StatusCode) -> usize {
if status == StatusCode::TOO_MANY_REQUESTS {
self.max_rate_limit_retries
} else if status == *STATUS_API_OVERLOADED {
self.max_overloaded_retries
} else {
// Default to rate limit retries for any other status code
self.max_rate_limit_retries
}
}
} }
/// Provider implementation for Google Cloud Platform's Vertex AI service. /// Provider implementation for Google Cloud Platform's Vertex AI service.
@@ -172,10 +192,32 @@ impl GcpVertexAIProvider {
/// Loads retry configuration from environment variables or uses defaults. /// Loads retry configuration from environment variables or uses defaults.
fn load_retry_config(config: &crate::config::Config) -> RetryConfig { fn load_retry_config(config: &crate::config::Config) -> RetryConfig {
let max_retries = config // Load max retries for 429 rate limit errors
.get_param("GCP_MAX_RETRIES") let max_rate_limit_retries = config
.get_param("GCP_MAX_RATE_LIMIT_RETRIES")
.ok() .ok()
.and_then(|v: String| v.parse::<usize>().ok()) .and_then(|v: String| v.parse::<usize>().ok())
.or_else(|| {
// Fall back to generic GCP_MAX_RETRIES if specific one isn't set
config
.get_param("GCP_MAX_RETRIES")
.ok()
.and_then(|v: String| v.parse::<usize>().ok())
})
.unwrap_or(DEFAULT_MAX_RETRIES);
// Load max retries for 529 API overloaded errors
let max_overloaded_retries = config
.get_param("GCP_MAX_OVERLOADED_RETRIES")
.ok()
.and_then(|v: String| v.parse::<usize>().ok())
.or_else(|| {
// Fall back to generic GCP_MAX_RETRIES if specific one isn't set
config
.get_param("GCP_MAX_RETRIES")
.ok()
.and_then(|v: String| v.parse::<usize>().ok())
})
.unwrap_or(DEFAULT_MAX_RETRIES); .unwrap_or(DEFAULT_MAX_RETRIES);
let initial_interval_ms = config let initial_interval_ms = config
@@ -197,7 +239,8 @@ impl GcpVertexAIProvider {
.unwrap_or(DEFAULT_MAX_RETRY_INTERVAL_MS); .unwrap_or(DEFAULT_MAX_RETRY_INTERVAL_MS);
RetryConfig { RetryConfig {
max_retries, max_rate_limit_retries,
max_overloaded_retries,
initial_interval_ms, initial_interval_ms,
backoff_multiplier, backoff_multiplier,
max_interval_ms, max_interval_ms,
@@ -269,7 +312,7 @@ impl GcpVertexAIProvider {
} }
/// Makes an authenticated POST request to the Vertex AI API at a specific location. /// Makes an authenticated POST request to the Vertex AI API at a specific location.
/// Includes retry logic for 429 Too Many Requests errors. /// Includes retry logic for 429 (Too Many Requests) and 529 (API Overloaded) errors.
/// ///
/// # Arguments /// # Arguments
/// * `payload` - The request payload to send /// * `payload` - The request payload to send
@@ -285,21 +328,12 @@ impl GcpVertexAIProvider {
.build_request_url(context.provider(), location) .build_request_url(context.provider(), location)
.map_err(|e| ProviderError::RequestFailed(e.to_string()))?; .map_err(|e| ProviderError::RequestFailed(e.to_string()))?;
// Initialize retry counter // Initialize separate counters for different error types
let mut attempts = 0; let mut rate_limit_attempts = 0;
let mut overloaded_attempts = 0;
let mut last_error = None; let mut last_error = None;
loop { loop {
// Check if we've exceeded max retries
if attempts > 0 && attempts > self.retry_config.max_retries {
let error_msg = format!(
"Exceeded maximum retry attempts ({}) for rate limiting (429)",
self.retry_config.max_retries
);
tracing::error!("{}", error_msg);
return Err(last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg)));
}
// Get a fresh auth token for each attempt // Get a fresh auth token for each attempt
let auth_header = self let auth_header = self
.get_auth_header() .get_auth_header()
@@ -318,60 +352,116 @@ impl GcpVertexAIProvider {
let status = response.status(); let status = response.status();
// If not a 429, process normally // Handle 429 Too Many Requests and 529 API Overloaded errors
if status != StatusCode::TOO_MANY_REQUESTS { match status {
let response_json = response.json::<Value>().await.map_err(|e| { status if status == StatusCode::TOO_MANY_REQUESTS => {
ProviderError::RequestFailed(format!("Failed to parse response: {e}")) rate_limit_attempts += 1;
})?;
return match status { if rate_limit_attempts > self.retry_config.max_rate_limit_retries {
StatusCode::OK => Ok(response_json), let error_msg = format!(
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => { "Exceeded maximum retry attempts ({}) for rate limiting (429) errors",
tracing::debug!( self.retry_config.max_rate_limit_retries
"Authentication failed. Status: {status}, Payload: {payload:?}"
); );
Err(ProviderError::Authentication(format!( tracing::error!("{}", error_msg);
"Authentication failed: {response_json:?}" return Err(
))) last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
}
_ => {
tracing::debug!(
"Request failed. Status: {status}, Response: {response_json:?}"
); );
Err(ProviderError::RequestFailed(format!(
"Request failed with status {status}: {response_json:?}"
)))
} }
};
// Try to parse response for more detailed error info
let cite_gcp_vertex_429 =
"See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429";
let response_text = response.text().await.unwrap_or_default();
let error_message =
if response_text.contains("Exceeded the Provisioned Throughput") {
// Handle 429 rate limit due to throughput limits
format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}")
} else {
// Handle generic 429 rate limit
format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}")
};
tracing::warn!(
"Rate limit exceeded error (429) (attempt {}/{}): {}. Retrying after backoff...",
rate_limit_attempts,
self.retry_config.max_rate_limit_retries,
error_message
);
// Store the error in case we need to return it after max retries
last_error = Some(ProviderError::RateLimitExceeded(error_message));
// Calculate and apply the backoff delay
let delay = self.retry_config.delay_for_attempt(rate_limit_attempts);
tracing::info!("Backing off for {:?} before retry (rate limit 429)", delay);
sleep(delay).await;
}
status if status == *STATUS_API_OVERLOADED => {
overloaded_attempts += 1;
if overloaded_attempts > self.retry_config.max_overloaded_retries {
let error_msg = format!(
"Exceeded maximum retry attempts ({}) for API overloaded (529) errors",
self.retry_config.max_overloaded_retries
);
tracing::error!("{}", error_msg);
return Err(
last_error.unwrap_or(ProviderError::RateLimitExceeded(error_msg))
);
}
// Handle 529 Overloaded error (https://docs.anthropic.com/en/api/errors)
let error_message =
"Vertex AI Provider API is temporarily overloaded. This is similar to a rate limit \
error but indicates backend processing capacity issues."
.to_string();
tracing::warn!(
"API overloaded error (529) (attempt {}/{}): {}. Retrying after backoff...",
overloaded_attempts,
self.retry_config.max_overloaded_retries,
error_message
);
// Store the error in case we need to return it after max retries
last_error = Some(ProviderError::RateLimitExceeded(error_message));
// Calculate and apply the backoff delay
let delay = self.retry_config.delay_for_attempt(overloaded_attempts);
tracing::info!(
"Backing off for {:?} before retry (API overloaded 529)",
delay
);
sleep(delay).await;
}
// For any other status codes, process normally
_ => {
let response_json = response.json::<Value>().await.map_err(|e| {
ProviderError::RequestFailed(format!("Failed to parse response: {e}"))
})?;
return match status {
StatusCode::OK => Ok(response_json),
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
tracing::debug!(
"Authentication failed. Status: {status}, Payload: {payload:?}"
);
Err(ProviderError::Authentication(format!(
"Authentication failed: {response_json:?}"
)))
}
_ => {
tracing::debug!(
"Request failed. Status: {status}, Response: {response_json:?}"
);
Err(ProviderError::RequestFailed(format!(
"Request failed with status {status}: {response_json:?}"
)))
}
};
}
} }
// Handle 429 Too Many Requests
attempts += 1;
// Try to parse response for more detailed error info
let cite_gcp_vertex_429 =
"See https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429";
let response_text = response.text().await.unwrap_or_default();
let quota_error = if response_text.contains("Exceeded the Provisioned Throughput") {
format!("Exceeded the Provisioned Throughput: {cite_gcp_vertex_429}.")
} else {
format!("Pay-as-you-go resource exhausted: {cite_gcp_vertex_429}.")
};
tracing::warn!(
"Rate limit exceeded (attempt {}/{}): {}. Retrying after backoff...",
attempts,
self.retry_config.max_retries,
quota_error
);
// Store the error in case we need to return it after max retries
last_error = Some(ProviderError::RateLimitExceeded(quota_error));
// Calculate and apply the backoff delay
let delay = self.retry_config.delay_for_attempt(attempts);
tracing::info!("Backing off for {:?} before retry", delay);
sleep(delay).await;
} }
} }
@@ -431,12 +521,15 @@ impl Provider for GcpVertexAIProvider {
GcpVertexAIModel::Claude(ClaudeVersion::Sonnet37), GcpVertexAIModel::Claude(ClaudeVersion::Sonnet37),
GcpVertexAIModel::Claude(ClaudeVersion::Haiku35), GcpVertexAIModel::Claude(ClaudeVersion::Haiku35),
GcpVertexAIModel::Claude(ClaudeVersion::Sonnet4), GcpVertexAIModel::Claude(ClaudeVersion::Sonnet4),
GcpVertexAIModel::Claude(ClaudeVersion::Opus4),
GcpVertexAIModel::Gemini(GeminiVersion::Pro15), GcpVertexAIModel::Gemini(GeminiVersion::Pro15),
GcpVertexAIModel::Gemini(GeminiVersion::Flash20), GcpVertexAIModel::Gemini(GeminiVersion::Flash20),
GcpVertexAIModel::Gemini(GeminiVersion::Pro20Exp), GcpVertexAIModel::Gemini(GeminiVersion::Pro20Exp),
GcpVertexAIModel::Gemini(GeminiVersion::Pro25Exp), GcpVertexAIModel::Gemini(GeminiVersion::Pro25Exp),
GcpVertexAIModel::Gemini(GeminiVersion::Flash25Preview), GcpVertexAIModel::Gemini(GeminiVersion::Flash25Preview),
GcpVertexAIModel::Gemini(GeminiVersion::Pro25Preview), GcpVertexAIModel::Gemini(GeminiVersion::Pro25Preview),
GcpVertexAIModel::Gemini(GeminiVersion::Flash25),
GcpVertexAIModel::Gemini(GeminiVersion::Pro25),
] ]
.iter() .iter()
.map(|model| model.to_string()) .map(|model| model.to_string())
@@ -448,7 +541,7 @@ impl Provider for GcpVertexAIProvider {
"gcp_vertex_ai", "gcp_vertex_ai",
"GCP Vertex AI", "GCP Vertex AI",
"Access variety of AI models such as Claude, Gemini through Vertex AI", "Access variety of AI models such as Claude, Gemini through Vertex AI",
GcpVertexAIModel::Gemini(GeminiVersion::Flash20) GcpVertexAIModel::Gemini(GeminiVersion::Flash25)
.to_string() .to_string()
.as_str(), .as_str(),
known_models, known_models,
@@ -456,6 +549,18 @@ impl Provider for GcpVertexAIProvider {
vec![ vec![
ConfigKey::new("GCP_PROJECT_ID", true, false, None), ConfigKey::new("GCP_PROJECT_ID", true, false, None),
ConfigKey::new("GCP_LOCATION", true, false, Some(Iowa.to_string().as_str())), ConfigKey::new("GCP_LOCATION", true, false, Some(Iowa.to_string().as_str())),
ConfigKey::new(
"GCP_MAX_RATE_LIMIT_RETRIES",
false,
false,
Some(&DEFAULT_MAX_RETRIES.to_string()),
),
ConfigKey::new(
"GCP_MAX_OVERLOADED_RETRIES",
false,
false,
Some(&DEFAULT_MAX_RETRIES.to_string()),
),
ConfigKey::new( ConfigKey::new(
"GCP_MAX_RETRIES", "GCP_MAX_RETRIES",
false, false,
@@ -525,11 +630,13 @@ impl Provider for GcpVertexAIProvider {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use reqwest::StatusCode;
#[test] #[test]
fn test_retry_config_delay_calculation() { fn test_retry_config_delay_calculation() {
let config = RetryConfig { let config = RetryConfig {
max_retries: 5, max_rate_limit_retries: 5,
max_overloaded_retries: 5,
initial_interval_ms: 1000, initial_interval_ms: 1000,
backoff_multiplier: 2.0, backoff_multiplier: 2.0,
max_interval_ms: 32000, max_interval_ms: 32000,
@@ -552,6 +659,44 @@ mod tests {
assert!(delay10.as_millis() <= 38400); // max_interval_ms * 1.2 (max jitter) assert!(delay10.as_millis() <= 38400); // max_interval_ms * 1.2 (max jitter)
} }
#[test]
fn test_max_retries_for_status() {
let config = RetryConfig {
max_rate_limit_retries: 5,
max_overloaded_retries: 10,
initial_interval_ms: 1000,
backoff_multiplier: 2.0,
max_interval_ms: 32000,
};
// Check that we get the right max retries for each error type
assert_eq!(
config.max_retries_for_status(StatusCode::TOO_MANY_REQUESTS),
5
);
assert_eq!(config.max_retries_for_status(*STATUS_API_OVERLOADED), 10);
// For any other status code, we should get the rate limit retries
assert_eq!(config.max_retries_for_status(StatusCode::BAD_REQUEST), 5);
}
#[test]
fn test_status_overloaded_code() {
// Test that we correctly handle the 529 status code
// Verify the custom status code is created correctly
assert_eq!(STATUS_API_OVERLOADED.as_u16(), 529);
// This is not a standard HTTP status code, so it's classified as server error
assert!(STATUS_API_OVERLOADED.is_server_error());
// Should be different from TOO_MANY_REQUESTS (429)
assert_ne!(*STATUS_API_OVERLOADED, StatusCode::TOO_MANY_REQUESTS);
// Should be different from SERVICE_UNAVAILABLE (503)
assert_ne!(*STATUS_API_OVERLOADED, StatusCode::SERVICE_UNAVAILABLE);
}
#[test] #[test]
fn test_model_provider_conversion() { fn test_model_provider_conversion() {
assert_eq!(ModelProvider::Anthropic.as_str(), "anthropic"); assert_eq!(ModelProvider::Anthropic.as_str(), "anthropic");
@@ -596,7 +741,8 @@ mod tests {
.collect(); .collect();
assert!(model_names.contains(&"claude-3-5-sonnet-v2@20241022".to_string())); assert!(model_names.contains(&"claude-3-5-sonnet-v2@20241022".to_string()));
assert!(model_names.contains(&"gemini-1.5-pro-002".to_string())); assert!(model_names.contains(&"gemini-1.5-pro-002".to_string()));
// Should contain the original 2 config keys plus 4 new retry-related ones assert!(model_names.contains(&"gemini-2.5-pro".to_string()));
assert_eq!(metadata.config_keys.len(), 6); // Should contain the original 2 config keys plus 6 new retry-related ones
assert_eq!(metadata.config_keys.len(), 8);
} }
} }

View File

@@ -25,7 +25,8 @@ Goose relies heavily on tool calling capabilities and currently works best with
| [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication. | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`, `AZURE_OPENAI_API_KEY` (optional) | | [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/) | Access Azure-hosted OpenAI models, including GPT-4 and GPT-3.5. Supports both API key and Azure credential chain authentication. | `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT_NAME`, `AZURE_OPENAI_API_KEY` (optional) |
| [Databricks](https://www.databricks.com/) | Unified data analytics and AI platform for building and deploying models. | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` | | [Databricks](https://www.databricks.com/) | Unified data analytics and AI platform for building and deploying models. | `DATABRICKS_HOST`, `DATABRICKS_TOKEN` |
| [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` | | [Gemini](https://ai.google.dev/gemini-api/docs) | Advanced LLMs by Google with multimodal capabilities (text, images). | `GOOGLE_API_KEY` |
| [GCP Vertex AI](https://cloud.google.com/vertex-ai) | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be [configured in advance](https://cloud.google.com/vertex-ai/docs/authentication).** | `GCP_PROJECT_ID`, `GCP_LOCATION` and optional `GCP_MAX_RETRIES` (6), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). | | [GCP Vertex AI](https://cloud.google.com/vertex-ai) | Google Cloud's Vertex AI platform, supporting Gemini and Claude models. **Credentials must be [configured in advance](https://cloud.google.com/vertex-ai/docs/authentication).** | `GCP_PROJECT_ID`, `GCP_LOCATION` and optionally `GCP_MAX_RATE_LIMIT_RETRIES` (5), `GCP_MAX_OVERLOADED_RETRIES` (5), `GCP_INITIAL_RETRY_INTERVAL_MS` (5000), `GCP_BACKOFF_MULTIPLIER` (2.0), `GCP_MAX_RETRY_INTERVAL_MS` (320_000). |
| [GitHub Copilot](https://docs.github.com/en/copilot/using-github-copilot/ai-models) | Access to GitHub Copilot's chat models including gpt-4o, o1, o3-mini, and Claude models. Uses device code authentication flow for secure access. | Uses GitHub device code authentication flow (no API key needed) |
| [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` | | [Groq](https://groq.com/) | High-performance inference hardware and tools for LLMs. | `GROQ_API_KEY` |
| [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms).** | `OLLAMA_HOST` | | [Ollama](https://ollama.com/) | Local model runner supporting Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms).** | `OLLAMA_HOST` |
| [Ramalama](https://ramalama.ai/) | Local model using native [OCI](https://opencontainers.org/) container runtimes, [CNCF](https://www.cncf.io/) tools, and supporting models as OCI artifacts. Ramalama API an compatible alternative to Ollama and can be used with the Goose Ollama provider. Supports Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms).** | `OLLAMA_HOST` | | [Ramalama](https://ramalama.ai/) | Local model using native [OCI](https://opencontainers.org/) container runtimes, [CNCF](https://www.cncf.io/) tools, and supporting models as OCI artifacts. Ramalama API an compatible alternative to Ollama and can be used with the Goose Ollama provider. Supports Qwen, Llama, DeepSeek, and other open-source models. **Because this provider runs locally, you must first [download and run a model](/docs/getting-started/providers#local-llms).** | `OLLAMA_HOST` |