feat: recipes can retry with success criteria (#3474)

This commit is contained in:
Prem Pillai
2025-07-22 10:49:21 +10:00
committed by GitHub
parent 5f3c7d339c
commit 99cc0a9c81
17 changed files with 1078 additions and 82 deletions

View File

@@ -685,6 +685,14 @@ pub struct InputConfig {
pub additional_system_prompt: Option<String>, pub additional_system_prompt: Option<String>,
} }
#[derive(Debug)]
pub struct RecipeInfo {
pub session_settings: Option<SessionSettings>,
pub sub_recipes: Option<Vec<goose::recipe::SubRecipe>>,
pub final_output_response: Option<goose::recipe::Response>,
pub retry_config: Option<goose::agents::types::RetryConfig>,
}
pub async fn cli() -> Result<()> { pub async fn cli() -> Result<()> {
let cli = Cli::parse(); let cli = Cli::parse();
@@ -771,6 +779,7 @@ pub async fn cli() -> Result<()> {
quiet: false, quiet: false,
sub_recipes: None, sub_recipes: None,
final_output_response: None, final_output_response: None,
retry_config: None,
}) })
.await; .await;
setup_logging( setup_logging(
@@ -828,27 +837,19 @@ pub async fn cli() -> Result<()> {
provider, provider,
model, model,
}) => { }) => {
let (input_config, session_settings, sub_recipes, final_output_response) = match ( let (input_config, recipe_info) = match (instructions, input_text, recipe) {
instructions,
input_text,
recipe,
) {
(Some(file), _, _) if file == "-" => { (Some(file), _, _) if file == "-" => {
let mut input = String::new(); let mut input = String::new();
std::io::stdin() std::io::stdin()
.read_to_string(&mut input) .read_to_string(&mut input)
.expect("Failed to read from stdin"); .expect("Failed to read from stdin");
( let input_config = InputConfig {
InputConfig { contents: Some(input),
contents: Some(input), extensions_override: None,
extensions_override: None, additional_system_prompt: system,
additional_system_prompt: system, };
}, (input_config, None)
None,
None,
None,
)
} }
(Some(file), _, _) => { (Some(file), _, _) => {
let contents = std::fs::read_to_string(&file).unwrap_or_else(|err| { let contents = std::fs::read_to_string(&file).unwrap_or_else(|err| {
@@ -858,27 +859,21 @@ pub async fn cli() -> Result<()> {
); );
std::process::exit(1); std::process::exit(1);
}); });
( let input_config = InputConfig {
InputConfig { contents: Some(contents),
contents: Some(contents), extensions_override: None,
extensions_override: None, additional_system_prompt: None,
additional_system_prompt: None, };
}, (input_config, None)
None,
None,
None,
)
} }
(_, Some(text), _) => ( (_, Some(text), _) => {
InputConfig { let input_config = InputConfig {
contents: Some(text), contents: Some(text),
extensions_override: None, extensions_override: None,
additional_system_prompt: system, additional_system_prompt: system,
}, };
None, (input_config, None)
None, }
None,
),
(_, _, Some(recipe_name)) => { (_, _, Some(recipe_name)) => {
if explain { if explain {
explain_recipe(&recipe_name, params)?; explain_recipe(&recipe_name, params)?;
@@ -891,7 +886,9 @@ pub async fn cli() -> Result<()> {
} }
return Ok(()); return Ok(());
} }
extract_recipe_info_from_cli(recipe_name, params, additional_sub_recipes)? let (input_config, recipe_info) =
extract_recipe_info_from_cli(recipe_name, params, additional_sub_recipes)?;
(input_config, Some(recipe_info))
} }
(None, None, None) => { (None, None, None) => {
eprintln!("Error: Must provide either --instructions (-i), --text (-t), or --recipe. Use -i - for stdin."); eprintln!("Error: Must provide either --instructions (-i), --text (-t), or --recipe. Use -i - for stdin.");
@@ -909,7 +906,9 @@ pub async fn cli() -> Result<()> {
builtins, builtins,
extensions_override: input_config.extensions_override, extensions_override: input_config.extensions_override,
additional_system_prompt: input_config.additional_system_prompt, additional_system_prompt: input_config.additional_system_prompt,
settings: session_settings, settings: recipe_info
.as_ref()
.and_then(|r| r.session_settings.clone()),
provider, provider,
model, model,
debug, debug,
@@ -918,8 +917,11 @@ pub async fn cli() -> Result<()> {
scheduled_job_id, scheduled_job_id,
interactive, // Use the interactive flag from the Run command interactive, // Use the interactive flag from the Run command
quiet, quiet,
sub_recipes, sub_recipes: recipe_info.as_ref().and_then(|r| r.sub_recipes.clone()),
final_output_response, final_output_response: recipe_info
.as_ref()
.and_then(|r| r.final_output_response.clone()),
retry_config: recipe_info.as_ref().and_then(|r| r.retry_config.clone()),
}) })
.await; .await;
@@ -1051,6 +1053,7 @@ pub async fn cli() -> Result<()> {
quiet: false, quiet: false,
sub_recipes: None, sub_recipes: None,
final_output_response: None, final_output_response: None,
retry_config: None,
}) })
.await; .await;
setup_logging( setup_logging(

View File

@@ -52,6 +52,7 @@ pub async fn agent_generator(
quiet: false, quiet: false,
sub_recipes: None, sub_recipes: None,
final_output_response: None, final_output_response: None,
retry_config: None,
}) })
.await; .await;

View File

@@ -491,6 +491,7 @@ async fn process_message_streaming(
schedule_id: None, schedule_id: None,
execution_mode: None, execution_mode: None,
max_turns: None, max_turns: None,
retry_config: None,
}; };
// Get response from agent // Get response from agent

View File

@@ -1,24 +1,21 @@
use std::path::PathBuf; use std::path::PathBuf;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use goose::recipe::{Response, SubRecipe}; use goose::recipe::SubRecipe;
use crate::recipes::print_recipe::print_recipe_info; use crate::recipes::print_recipe::print_recipe_info;
use crate::recipes::recipe::load_recipe; use crate::recipes::recipe::load_recipe;
use crate::recipes::search_recipe::retrieve_recipe_file; use crate::recipes::search_recipe::retrieve_recipe_file;
use crate::{cli::InputConfig, session::SessionSettings}; use crate::{
cli::{InputConfig, RecipeInfo},
session::SessionSettings,
};
#[allow(clippy::type_complexity)]
pub fn extract_recipe_info_from_cli( pub fn extract_recipe_info_from_cli(
recipe_name: String, recipe_name: String,
params: Vec<(String, String)>, params: Vec<(String, String)>,
additional_sub_recipes: Vec<String>, additional_sub_recipes: Vec<String>,
) -> Result<( ) -> Result<(InputConfig, RecipeInfo)> {
InputConfig,
Option<SessionSettings>,
Option<Vec<SubRecipe>>,
Option<Response>,
)> {
let recipe = load_recipe(&recipe_name, params.clone()).unwrap_or_else(|err| { let recipe = load_recipe(&recipe_name, params.clone()).unwrap_or_else(|err| {
eprintln!("{}: {}", console::style("Error").red().bold(), err); eprintln!("{}: {}", console::style("Error").red().bold(), err);
std::process::exit(1); std::process::exit(1);
@@ -49,20 +46,24 @@ pub fn extract_recipe_info_from_cli(
} }
} }
} }
Ok(( let input_config = InputConfig {
InputConfig { contents: recipe.prompt.filter(|s| !s.trim().is_empty()),
contents: recipe.prompt.filter(|s| !s.trim().is_empty()), extensions_override: recipe.extensions,
extensions_override: recipe.extensions, additional_system_prompt: recipe.instructions,
additional_system_prompt: recipe.instructions, };
},
recipe.settings.map(|s| SessionSettings { let recipe_info = RecipeInfo {
session_settings: recipe.settings.map(|s| SessionSettings {
goose_provider: s.goose_provider, goose_provider: s.goose_provider,
goose_model: s.goose_model, goose_model: s.goose_model,
temperature: s.temperature, temperature: s.temperature,
}), }),
Some(all_sub_recipes), sub_recipes: Some(all_sub_recipes),
recipe.response, final_output_response: recipe.response,
)) retry_config: recipe.retry,
};
Ok((input_config, recipe_info))
} }
fn extract_recipe_name(recipe_identifier: &str) -> String { fn extract_recipe_name(recipe_identifier: &str) -> String {
@@ -93,8 +94,11 @@ mod tests {
let params = vec![("name".to_string(), "my_value".to_string())]; let params = vec![("name".to_string(), "my_value".to_string())];
let recipe_name = recipe_path.to_str().unwrap().to_string(); let recipe_name = recipe_path.to_str().unwrap().to_string();
let (input_config, settings, sub_recipes, response) = let (input_config, recipe_info) =
extract_recipe_info_from_cli(recipe_name, params, Vec::new()).unwrap(); extract_recipe_info_from_cli(recipe_name, params, Vec::new()).unwrap();
let settings = recipe_info.session_settings;
let sub_recipes = recipe_info.sub_recipes;
let response = recipe_info.final_output_response;
assert_eq!(input_config.contents, Some("test_prompt".to_string())); assert_eq!(input_config.contents, Some("test_prompt".to_string()));
assert_eq!( assert_eq!(
@@ -149,8 +153,11 @@ mod tests {
sub_recipe2_path.to_string_lossy().to_string(), sub_recipe2_path.to_string_lossy().to_string(),
]; ];
let (input_config, settings, sub_recipes, response) = let (input_config, recipe_info) =
extract_recipe_info_from_cli(recipe_name, params, additional_sub_recipes).unwrap(); extract_recipe_info_from_cli(recipe_name, params, additional_sub_recipes).unwrap();
let settings = recipe_info.session_settings;
let sub_recipes = recipe_info.sub_recipes;
let response = recipe_info.final_output_response;
assert_eq!(input_config.contents, Some("test_prompt".to_string())); assert_eq!(input_config.contents, Some("test_prompt".to_string()));
assert_eq!( assert_eq!(

View File

@@ -1,5 +1,6 @@
use console::style; use console::style;
use goose::agents::extension::ExtensionError; use goose::agents::extension::ExtensionError;
use goose::agents::types::RetryConfig;
use goose::agents::Agent; use goose::agents::Agent;
use goose::config::{Config, ExtensionConfig, ExtensionConfigManager}; use goose::config::{Config, ExtensionConfig, ExtensionConfigManager};
use goose::providers::create; use goose::providers::create;
@@ -60,6 +61,8 @@ pub struct SessionBuilderConfig {
pub sub_recipes: Option<Vec<SubRecipe>>, pub sub_recipes: Option<Vec<SubRecipe>>,
/// Final output expected response /// Final output expected response
pub final_output_response: Option<Response>, pub final_output_response: Option<Response>,
/// Retry configuration for automated validation and recovery
pub retry_config: Option<RetryConfig>,
} }
/// Offers to help debug an extension failure by creating a minimal debugging session /// Offers to help debug an extension failure by creating a minimal debugging session
@@ -138,6 +141,7 @@ async fn offer_extension_debugging_help(
None, None,
None, None,
None, None,
None,
); );
// Process the debugging request // Process the debugging request
@@ -407,6 +411,7 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> Session {
session_config.scheduled_job_id.clone(), session_config.scheduled_job_id.clone(),
session_config.max_turns, session_config.max_turns,
edit_mode, edit_mode,
session_config.retry_config.clone(),
); );
// Add extensions if provided // Add extensions if provided
@@ -602,6 +607,7 @@ mod tests {
quiet: false, quiet: false,
sub_recipes: None, sub_recipes: None,
final_output_response: None, final_output_response: None,
retry_config: None,
}; };
assert_eq!(config.extensions.len(), 1); assert_eq!(config.extensions.len(), 1);

View File

@@ -28,6 +28,7 @@ use anyhow::{Context, Result};
use completion::GooseCompleter; use completion::GooseCompleter;
use etcetera::{choose_app_strategy, AppStrategy}; use etcetera::{choose_app_strategy, AppStrategy};
use goose::agents::extension::{Envs, ExtensionConfig}; use goose::agents::extension::{Envs, ExtensionConfig};
use goose::agents::types::RetryConfig;
use goose::agents::{Agent, SessionConfig}; use goose::agents::{Agent, SessionConfig};
use goose::config::Config; use goose::config::Config;
use goose::message::{Message, MessageContent}; use goose::message::{Message, MessageContent};
@@ -64,6 +65,7 @@ pub struct Session {
scheduled_job_id: Option<String>, // ID of the scheduled job that triggered this session scheduled_job_id: Option<String>, // ID of the scheduled job that triggered this session
max_turns: Option<u32>, max_turns: Option<u32>,
edit_mode: Option<EditMode>, edit_mode: Option<EditMode>,
retry_config: Option<RetryConfig>,
} }
// Cache structure for completion data // Cache structure for completion data
@@ -127,6 +129,7 @@ impl Session {
scheduled_job_id: Option<String>, scheduled_job_id: Option<String>,
max_turns: Option<u32>, max_turns: Option<u32>,
edit_mode: Option<EditMode>, edit_mode: Option<EditMode>,
retry_config: Option<RetryConfig>,
) -> Self { ) -> Self {
let messages = if let Some(session_file) = &session_file { let messages = if let Some(session_file) = &session_file {
match session::read_messages(session_file) { match session::read_messages(session_file) {
@@ -151,6 +154,7 @@ impl Session {
scheduled_job_id, scheduled_job_id,
max_turns, max_turns,
edit_mode, edit_mode,
retry_config,
} }
} }
@@ -879,6 +883,7 @@ impl Session {
schedule_id: self.scheduled_job_id.clone(), schedule_id: self.scheduled_job_id.clone(),
execution_mode: None, execution_mode: None,
max_turns: self.max_turns, max_turns: self.max_turns,
retry_config: self.retry_config.clone(),
} }
}); });
let mut stream = self let mut stream = self

View File

@@ -186,6 +186,7 @@ async fn handler(
schedule_id: request.scheduled_job_id.clone(), schedule_id: request.scheduled_job_id.clone(),
execution_mode: None, execution_mode: None,
max_turns: None, max_turns: None,
retry_config: None,
}), }),
) )
.await .await
@@ -368,6 +369,7 @@ async fn ask_handler(
schedule_id: request.scheduled_job_id.clone(), schedule_id: request.scheduled_job_id.clone(),
execution_mode: None, execution_mode: None,
max_turns: None, max_turns: None,
retry_config: None,
}), }),
) )
.await .await

View File

@@ -29,7 +29,7 @@ use crate::tool_monitor::{ToolCall, ToolMonitor};
use regex::Regex; use regex::Regex;
use serde_json::Value; use serde_json::Value;
use tokio::sync::{mpsc, Mutex, RwLock}; use tokio::sync::{mpsc, Mutex, RwLock};
use tracing::{debug, error, instrument}; use tracing::{debug, error, info, instrument};
use crate::agents::extension::{ExtensionConfig, ExtensionError, ExtensionResult, ToolInfo}; use crate::agents::extension::{ExtensionConfig, ExtensionError, ExtensionResult, ToolInfo};
use crate::agents::extension_manager::{get_parameter_names, ExtensionManager}; use crate::agents::extension_manager::{get_parameter_names, ExtensionManager};
@@ -39,6 +39,7 @@ use crate::agents::platform_tools::{
PLATFORM_SEARCH_AVAILABLE_EXTENSIONS_TOOL_NAME, PLATFORM_SEARCH_AVAILABLE_EXTENSIONS_TOOL_NAME,
}; };
use crate::agents::prompt_manager::PromptManager; use crate::agents::prompt_manager::PromptManager;
use crate::agents::retry::{RetryManager, RetryResult};
use crate::agents::router_tool_selector::{ use crate::agents::router_tool_selector::{
create_tool_selector, RouterToolSelectionStrategy, RouterToolSelector, create_tool_selector, RouterToolSelectionStrategy, RouterToolSelector,
}; };
@@ -64,7 +65,7 @@ pub struct Agent {
pub(super) extension_manager: Arc<RwLock<ExtensionManager>>, pub(super) extension_manager: Arc<RwLock<ExtensionManager>>,
pub(super) sub_recipe_manager: Mutex<SubRecipeManager>, pub(super) sub_recipe_manager: Mutex<SubRecipeManager>,
pub(super) tasks_manager: TasksManager, pub(super) tasks_manager: TasksManager,
pub(super) final_output_tool: Mutex<Option<FinalOutputTool>>, pub(super) final_output_tool: Arc<Mutex<Option<FinalOutputTool>>>,
pub(super) frontend_tools: Mutex<HashMap<String, FrontendTool>>, pub(super) frontend_tools: Mutex<HashMap<String, FrontendTool>>,
pub(super) frontend_instructions: Mutex<Option<String>>, pub(super) frontend_instructions: Mutex<Option<String>>,
pub(super) prompt_manager: Mutex<PromptManager>, pub(super) prompt_manager: Mutex<PromptManager>,
@@ -72,11 +73,12 @@ pub struct Agent {
pub(super) confirmation_rx: Mutex<mpsc::Receiver<(String, PermissionConfirmation)>>, pub(super) confirmation_rx: Mutex<mpsc::Receiver<(String, PermissionConfirmation)>>,
pub(super) tool_result_tx: mpsc::Sender<(String, ToolResult<Vec<Content>>)>, pub(super) tool_result_tx: mpsc::Sender<(String, ToolResult<Vec<Content>>)>,
pub(super) tool_result_rx: ToolResultReceiver, pub(super) tool_result_rx: ToolResultReceiver,
pub(super) tool_monitor: Mutex<Option<ToolMonitor>>, pub(super) tool_monitor: Arc<Mutex<Option<ToolMonitor>>>,
pub(super) router_tool_selector: Mutex<Option<Arc<Box<dyn RouterToolSelector>>>>, pub(super) router_tool_selector: Mutex<Option<Arc<Box<dyn RouterToolSelector>>>>,
pub(super) scheduler_service: Mutex<Option<Arc<dyn SchedulerTrait>>>, pub(super) scheduler_service: Mutex<Option<Arc<dyn SchedulerTrait>>>,
pub(super) mcp_tx: Mutex<mpsc::Sender<JsonRpcMessage>>, pub(super) mcp_tx: Mutex<mpsc::Sender<JsonRpcMessage>>,
pub(super) mcp_notification_rx: Arc<Mutex<mpsc::Receiver<JsonRpcMessage>>>, pub(super) mcp_notification_rx: Arc<Mutex<mpsc::Receiver<JsonRpcMessage>>>,
pub(super) retry_manager: RetryManager,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@@ -134,12 +136,15 @@ impl Agent {
// Add MCP notification channel // Add MCP notification channel
let (mcp_tx, mcp_rx) = mpsc::channel(100); let (mcp_tx, mcp_rx) = mpsc::channel(100);
let tool_monitor = Arc::new(Mutex::new(None));
let retry_manager = RetryManager::with_tool_monitor(tool_monitor.clone());
Self { Self {
provider: Mutex::new(None), provider: Mutex::new(None),
extension_manager: Arc::new(RwLock::new(ExtensionManager::new())), extension_manager: Arc::new(RwLock::new(ExtensionManager::new())),
sub_recipe_manager: Mutex::new(SubRecipeManager::new()), sub_recipe_manager: Mutex::new(SubRecipeManager::new()),
tasks_manager: TasksManager::new(), tasks_manager: TasksManager::new(),
final_output_tool: Mutex::new(None), final_output_tool: Arc::new(Mutex::new(None)),
frontend_tools: Mutex::new(HashMap::new()), frontend_tools: Mutex::new(HashMap::new()),
frontend_instructions: Mutex::new(None), frontend_instructions: Mutex::new(None),
prompt_manager: Mutex::new(PromptManager::new()), prompt_manager: Mutex::new(PromptManager::new()),
@@ -147,12 +152,13 @@ impl Agent {
confirmation_rx: Mutex::new(confirm_rx), confirmation_rx: Mutex::new(confirm_rx),
tool_result_tx: tool_tx, tool_result_tx: tool_tx,
tool_result_rx: Arc::new(Mutex::new(tool_rx)), tool_result_rx: Arc::new(Mutex::new(tool_rx)),
tool_monitor: Mutex::new(None), tool_monitor,
router_tool_selector: Mutex::new(None), router_tool_selector: Mutex::new(None),
scheduler_service: Mutex::new(None), scheduler_service: Mutex::new(None),
// Initialize with MCP notification support // Initialize with MCP notification support
mcp_tx: Mutex::new(mcp_tx), mcp_tx: Mutex::new(mcp_tx),
mcp_notification_rx: Arc::new(Mutex::new(mcp_rx)), mcp_notification_rx: Arc::new(Mutex::new(mcp_rx)),
retry_manager,
} }
} }
@@ -172,6 +178,41 @@ impl Agent {
} }
} }
/// Reset the retry attempts counter to 0
pub async fn reset_retry_attempts(&self) {
self.retry_manager.reset_attempts().await;
}
/// Increment the retry attempts counter and return the new value
pub async fn increment_retry_attempts(&self) -> u32 {
self.retry_manager.increment_attempts().await
}
/// Get the current retry attempts count
pub async fn get_retry_attempts(&self) -> u32 {
self.retry_manager.get_attempts().await
}
/// Handle retry logic for the agent reply loop
async fn handle_retry_logic(
&self,
messages: &mut Vec<Message>,
session: &Option<SessionConfig>,
initial_messages: &[Message],
) -> Result<bool> {
let result = self
.retry_manager
.handle_retry_logic(messages, session, initial_messages, &self.final_output_tool)
.await?;
match result {
RetryResult::Retried => Ok(true),
RetryResult::Skipped
| RetryResult::MaxAttemptsReached
| RetryResult::SuccessChecksPassed => Ok(false),
}
}
/// Set the scheduler service for this agent /// Set the scheduler service for this agent
pub async fn set_scheduler(&self, scheduler: Arc<dyn SchedulerTrait>) { pub async fn set_scheduler(&self, scheduler: Arc<dyn SchedulerTrait>) {
let mut scheduler_service = self.scheduler_service.lock().await; let mut scheduler_service = self.scheduler_service.lock().await;
@@ -680,8 +721,11 @@ impl Agent {
session: Option<SessionConfig>, session: Option<SessionConfig>,
) -> anyhow::Result<BoxStream<'_, anyhow::Result<AgentEvent>>> { ) -> anyhow::Result<BoxStream<'_, anyhow::Result<AgentEvent>>> {
let mut messages = messages.to_vec(); let mut messages = messages.to_vec();
let initial_messages = messages.clone();
let reply_span = tracing::Span::current(); let reply_span = tracing::Span::current();
self.reset_retry_attempts().await;
// Load settings from config // Load settings from config
let config = Config::global(); let config = Config::global();
@@ -1040,6 +1084,22 @@ impl Agent {
yield AgentEvent::Message(message); yield AgentEvent::Message(message);
} }
} }
match self.handle_retry_logic(&mut messages, &session, &initial_messages).await {
Ok(should_retry) => {
if should_retry {
info!("Retry logic triggered, restarting agent loop");
continue;
}
}
Err(e) => {
error!("Retry logic failed: {}", e);
yield AgentEvent::Message(Message::assistant().with_text(
format!("Retry logic encountered an error: {}", e)
));
}
}
break; break;
} }

View File

@@ -8,6 +8,7 @@ pub mod platform_tools;
pub mod prompt_manager; pub mod prompt_manager;
mod recipe_tools; mod recipe_tools;
mod reply_parts; mod reply_parts;
pub mod retry;
mod router_tool_selector; mod router_tool_selector;
mod router_tools; mod router_tools;
mod schedule_tool; mod schedule_tool;
@@ -19,7 +20,7 @@ mod subagent_task_config;
mod tool_execution; mod tool_execution;
mod tool_router_index_manager; mod tool_router_index_manager;
pub(crate) mod tool_vectordb; pub(crate) mod tool_vectordb;
mod types; pub mod types;
pub use agent::{Agent, AgentEvent}; pub use agent::{Agent, AgentEvent};
pub use extension::ExtensionConfig; pub use extension::ExtensionConfig;
@@ -27,4 +28,4 @@ pub use extension_manager::ExtensionManager;
pub use prompt_manager::PromptManager; pub use prompt_manager::PromptManager;
pub use subagent::{SubAgent, SubAgentProgress, SubAgentStatus}; pub use subagent::{SubAgent, SubAgentProgress, SubAgentStatus};
pub use subagent_task_config::TaskConfig; pub use subagent_task_config::TaskConfig;
pub use types::{FrontendTool, SessionConfig}; pub use types::{FrontendTool, RetryConfig, SessionConfig, SuccessCheck};

View File

@@ -0,0 +1,498 @@
use anyhow::Result;
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use tokio::process::Command;
use tokio::sync::Mutex;
use tracing::{debug, info, warn};
use crate::agents::types::SessionConfig;
use crate::agents::types::{
RetryConfig, SuccessCheck, DEFAULT_ON_FAILURE_TIMEOUT_SECONDS, DEFAULT_RETRY_TIMEOUT_SECONDS,
};
use crate::config::Config;
use crate::message::Message;
use crate::tool_monitor::ToolMonitor;
/// Result of a retry logic evaluation
#[derive(Debug, Clone, PartialEq)]
pub enum RetryResult {
/// No retry configuration or session available, retry logic skipped
Skipped,
/// Maximum retry attempts reached, cannot retry further
MaxAttemptsReached,
/// Success checks passed, no retry needed
SuccessChecksPassed,
/// Retry is needed and will be performed
Retried,
}
/// Environment variable for configuring retry timeout globally
const GOOSE_RECIPE_RETRY_TIMEOUT_SECONDS: &str = "GOOSE_RECIPE_RETRY_TIMEOUT_SECONDS";
/// Environment variable for configuring on_failure timeout globally
const GOOSE_RECIPE_ON_FAILURE_TIMEOUT_SECONDS: &str = "GOOSE_RECIPE_ON_FAILURE_TIMEOUT_SECONDS";
/// Manages retry state and operations for agent execution
#[derive(Debug)]
pub struct RetryManager {
/// Current number of retry attempts
attempts: Arc<Mutex<u32>>,
/// Optional tool monitor for reset operations
tool_monitor: Option<Arc<Mutex<Option<ToolMonitor>>>>,
}
impl Default for RetryManager {
fn default() -> Self {
Self::new()
}
}
impl RetryManager {
/// Create a new retry manager
pub fn new() -> Self {
Self {
attempts: Arc::new(Mutex::new(0)),
tool_monitor: None,
}
}
/// Create a new retry manager with tool monitor
pub fn with_tool_monitor(tool_monitor: Arc<Mutex<Option<ToolMonitor>>>) -> Self {
Self {
attempts: Arc::new(Mutex::new(0)),
tool_monitor: Some(tool_monitor),
}
}
/// Reset the retry attempts counter to 0
pub async fn reset_attempts(&self) {
let mut attempts = self.attempts.lock().await;
*attempts = 0;
// Reset tool monitor if available
if let Some(monitor) = &self.tool_monitor {
if let Some(monitor) = monitor.lock().await.as_mut() {
monitor.reset();
}
}
}
/// Increment the retry attempts counter and return the new value
pub async fn increment_attempts(&self) -> u32 {
let mut attempts = self.attempts.lock().await;
*attempts += 1;
*attempts
}
/// Get the current retry attempts count
pub async fn get_attempts(&self) -> u32 {
*self.attempts.lock().await
}
/// Reset status for retry: clear message history and final output tool state
async fn reset_status_for_retry(
messages: &mut Vec<Message>,
initial_messages: &[Message],
final_output_tool: &Arc<Mutex<Option<crate::agents::final_output_tool::FinalOutputTool>>>,
) {
messages.clear();
messages.extend_from_slice(initial_messages);
info!("Reset message history to initial state for retry");
if let Some(final_output_tool) = final_output_tool.lock().await.as_mut() {
final_output_tool.final_output = None;
info!("Cleared final output tool state for retry");
}
}
/// Handle retry logic for the agent reply loop
pub async fn handle_retry_logic(
&self,
messages: &mut Vec<Message>,
session: &Option<SessionConfig>,
initial_messages: &[Message],
final_output_tool: &Arc<Mutex<Option<crate::agents::final_output_tool::FinalOutputTool>>>,
) -> Result<RetryResult> {
let Some(session_config) = session else {
return Ok(RetryResult::Skipped);
};
let Some(retry_config) = &session_config.retry_config else {
return Ok(RetryResult::Skipped);
};
let success = execute_success_checks(&retry_config.checks, retry_config).await?;
if success {
info!("All success checks passed, no retry needed");
return Ok(RetryResult::SuccessChecksPassed);
}
let current_attempts = self.get_attempts().await;
if current_attempts >= retry_config.max_retries {
let error_msg = Message::assistant().with_text(format!(
"Maximum retry attempts ({}) exceeded. Unable to complete the task successfully.",
retry_config.max_retries
));
messages.push(error_msg);
warn!(
"Maximum retry attempts ({}) exceeded",
retry_config.max_retries
);
return Ok(RetryResult::MaxAttemptsReached);
}
if let Some(on_failure_cmd) = &retry_config.on_failure {
info!("Executing on_failure command: {}", on_failure_cmd);
execute_on_failure_command(on_failure_cmd, retry_config).await?;
}
Self::reset_status_for_retry(messages, initial_messages, final_output_tool).await;
let new_attempts = self.increment_attempts().await;
info!("Incrementing retry attempts to {}", new_attempts);
Ok(RetryResult::Retried)
}
}
/// Get the configured timeout duration for retry operations
/// retry_config.timeout_seconds -> env var -> default
fn get_retry_timeout(retry_config: &RetryConfig) -> Duration {
let timeout_seconds = retry_config
.timeout_seconds
.or_else(|| {
let config = Config::global();
config.get_param(GOOSE_RECIPE_RETRY_TIMEOUT_SECONDS).ok()
})
.unwrap_or(DEFAULT_RETRY_TIMEOUT_SECONDS);
Duration::from_secs(timeout_seconds)
}
/// Get the configured timeout duration for on_failure operations
/// retry_config.on_failure_timeout_seconds -> env var -> default
fn get_on_failure_timeout(retry_config: &RetryConfig) -> Duration {
let timeout_seconds = retry_config
.on_failure_timeout_seconds
.or_else(|| {
let config = Config::global();
config
.get_param(GOOSE_RECIPE_ON_FAILURE_TIMEOUT_SECONDS)
.ok()
})
.unwrap_or(DEFAULT_ON_FAILURE_TIMEOUT_SECONDS);
Duration::from_secs(timeout_seconds)
}
/// Execute all success checks and return true if all pass
pub async fn execute_success_checks(
checks: &[SuccessCheck],
retry_config: &RetryConfig,
) -> Result<bool> {
let timeout = get_retry_timeout(retry_config);
for check in checks {
match check {
SuccessCheck::Shell { command } => {
let result = execute_shell_command(command, timeout).await?;
if !result.status.success() {
warn!(
"Success check failed: command '{}' exited with status {}, stderr: {}",
command,
result.status,
String::from_utf8_lossy(&result.stderr)
);
return Ok(false);
}
info!(
"Success check passed: command '{}' completed successfully",
command
);
}
}
}
Ok(true)
}
/// Execute a shell command with cross-platform compatibility and mandatory timeout
pub async fn execute_shell_command(
command: &str,
timeout: std::time::Duration,
) -> Result<std::process::Output> {
debug!(
"Executing shell command with timeout {:?}: {}",
timeout, command
);
let future = async {
let mut cmd = if cfg!(target_os = "windows") {
let mut cmd = Command::new("cmd");
cmd.args(["/C", command]);
cmd
} else {
let mut cmd = Command::new("sh");
cmd.args(["-c", command]);
cmd
};
let output = cmd
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.stdin(Stdio::null())
.kill_on_drop(true)
.output()
.await?;
debug!(
"Shell command completed with status: {}, stdout: {}, stderr: {}",
output.status,
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
Ok(output)
};
match tokio::time::timeout(timeout, future).await {
Ok(result) => result,
Err(_) => {
let error_msg = format!("Shell command timed out after {:?}: {}", timeout, command);
warn!("{}", error_msg);
Err(anyhow::anyhow!("{}", error_msg))
}
}
}
/// Execute an on_failure command and return an error if it fails
pub async fn execute_on_failure_command(command: &str, retry_config: &RetryConfig) -> Result<()> {
let timeout = get_on_failure_timeout(retry_config);
info!(
"Executing on_failure command with timeout {:?}: {}",
timeout, command
);
let output = match execute_shell_command(command, timeout).await {
Ok(output) => output,
Err(e) => {
if e.to_string().contains("timed out") {
let error_msg = format!(
"On_failure command timed out after {:?}: {}",
timeout, command
);
warn!("{}", error_msg);
return Err(anyhow::anyhow!(error_msg));
} else {
warn!("On_failure command execution error: {}", e);
return Err(e);
}
}
};
if !output.status.success() {
let error_msg = format!(
"On_failure command failed: command '{}' exited with status {}, stderr: {}",
command,
output.status,
String::from_utf8_lossy(&output.stderr)
);
warn!("{}", error_msg);
return Err(anyhow::anyhow!(error_msg));
} else {
info!("On_failure command completed successfully: {}", command);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::agents::types::SuccessCheck;
fn create_test_retry_config() -> RetryConfig {
RetryConfig {
max_retries: 3,
checks: vec![],
on_failure: None,
timeout_seconds: Some(60),
on_failure_timeout_seconds: Some(120),
}
}
#[test]
fn test_retry_result_enum() {
assert_ne!(RetryResult::Skipped, RetryResult::MaxAttemptsReached);
assert_ne!(RetryResult::Skipped, RetryResult::SuccessChecksPassed);
assert_ne!(RetryResult::Skipped, RetryResult::Retried);
assert_ne!(
RetryResult::MaxAttemptsReached,
RetryResult::SuccessChecksPassed
);
assert_ne!(RetryResult::MaxAttemptsReached, RetryResult::Retried);
assert_ne!(RetryResult::SuccessChecksPassed, RetryResult::Retried);
let result = RetryResult::Retried;
let cloned = result.clone();
assert_eq!(result, cloned);
let debug_str = format!("{:?}", RetryResult::MaxAttemptsReached);
assert!(debug_str.contains("MaxAttemptsReached"));
}
#[tokio::test]
async fn test_execute_success_checks_all_pass() {
let checks = vec![
SuccessCheck::Shell {
command: "echo 'test'".to_string(),
},
SuccessCheck::Shell {
command: "true".to_string(),
},
];
let retry_config = create_test_retry_config();
let result = execute_success_checks(&checks, &retry_config).await;
assert!(result.is_ok());
assert!(result.unwrap());
}
#[tokio::test]
async fn test_execute_success_checks_one_fails() {
let checks = vec![
SuccessCheck::Shell {
command: "echo 'test'".to_string(),
},
SuccessCheck::Shell {
command: "false".to_string(),
},
];
let retry_config = create_test_retry_config();
let result = execute_success_checks(&checks, &retry_config).await;
assert!(result.is_ok());
assert!(!result.unwrap());
}
#[tokio::test]
async fn test_execute_shell_command_success() {
let result = execute_shell_command("echo 'hello world'", Duration::from_secs(30)).await;
assert!(result.is_ok());
let output = result.unwrap();
assert!(output.status.success());
assert!(String::from_utf8_lossy(&output.stdout).contains("hello world"));
}
#[tokio::test]
async fn test_execute_shell_command_failure() {
let result = execute_shell_command("false", Duration::from_secs(30)).await;
assert!(result.is_ok());
let output = result.unwrap();
assert!(!output.status.success());
}
#[tokio::test]
async fn test_execute_on_failure_command_success() {
let retry_config = create_test_retry_config();
let result = execute_on_failure_command("echo 'cleanup'", &retry_config).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_execute_on_failure_command_failure() {
let retry_config = create_test_retry_config();
let result = execute_on_failure_command("false", &retry_config).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_shell_command_timeout() {
let timeout = std::time::Duration::from_millis(100);
let result = if cfg!(target_os = "windows") {
execute_shell_command("timeout /t 1", timeout).await
} else {
execute_shell_command("sleep 1", timeout).await
};
assert!(result.is_err());
}
#[tokio::test]
async fn test_get_retry_timeout_uses_config_default() {
let retry_config = RetryConfig {
max_retries: 1,
checks: vec![],
on_failure: None,
timeout_seconds: None,
on_failure_timeout_seconds: None,
};
let timeout = get_retry_timeout(&retry_config);
assert_eq!(timeout, Duration::from_secs(DEFAULT_RETRY_TIMEOUT_SECONDS));
}
#[tokio::test]
async fn test_get_retry_timeout_uses_retry_config() {
let retry_config = RetryConfig {
max_retries: 1,
checks: vec![],
on_failure: None,
timeout_seconds: Some(120),
on_failure_timeout_seconds: None,
};
let timeout = get_retry_timeout(&retry_config);
assert_eq!(timeout, Duration::from_secs(120));
}
#[tokio::test]
async fn test_get_on_failure_timeout_uses_config_default() {
let retry_config = RetryConfig {
max_retries: 1,
checks: vec![],
on_failure: None,
timeout_seconds: None,
on_failure_timeout_seconds: None,
};
let timeout = get_on_failure_timeout(&retry_config);
assert_eq!(
timeout,
Duration::from_secs(DEFAULT_ON_FAILURE_TIMEOUT_SECONDS)
);
}
#[tokio::test]
async fn test_get_on_failure_timeout_uses_retry_config() {
let retry_config = RetryConfig {
max_retries: 1,
checks: vec![],
on_failure: None,
timeout_seconds: None,
on_failure_timeout_seconds: Some(900),
};
let timeout = get_on_failure_timeout(&retry_config);
assert_eq!(timeout, Duration::from_secs(900));
}
#[tokio::test]
async fn test_on_failure_timeout_different_from_retry_timeout() {
let retry_config = RetryConfig {
max_retries: 1,
checks: vec![],
on_failure: None,
timeout_seconds: Some(60),
on_failure_timeout_seconds: Some(300),
};
let retry_timeout = get_retry_timeout(&retry_config);
let on_failure_timeout = get_on_failure_timeout(&retry_config);
assert_eq!(retry_timeout, Duration::from_secs(60));
assert_eq!(on_failure_timeout, Duration::from_secs(300));
assert_ne!(retry_timeout, on_failure_timeout);
}
}

View File

@@ -9,6 +9,67 @@ use tokio::sync::{mpsc, Mutex};
/// Type alias for the tool result channel receiver /// Type alias for the tool result channel receiver
pub type ToolResultReceiver = Arc<Mutex<mpsc::Receiver<(String, ToolResult<Vec<Content>>)>>>; pub type ToolResultReceiver = Arc<Mutex<mpsc::Receiver<(String, ToolResult<Vec<Content>>)>>>;
/// Default timeout for retry operations (5 minutes)
pub const DEFAULT_RETRY_TIMEOUT_SECONDS: u64 = 300;
/// Default timeout for on_failure operations (10 minutes - longer for on_failure tasks)
pub const DEFAULT_ON_FAILURE_TIMEOUT_SECONDS: u64 = 600;
/// Configuration for retry logic in recipe execution
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RetryConfig {
/// Maximum number of retry attempts before giving up
pub max_retries: u32,
/// List of success checks to validate recipe completion
pub checks: Vec<SuccessCheck>,
/// Optional shell command to run on failure for cleanup
#[serde(skip_serializing_if = "Option::is_none")]
pub on_failure: Option<String>,
/// Timeout in seconds for individual shell commands (default: 300 seconds)
#[serde(skip_serializing_if = "Option::is_none")]
pub timeout_seconds: Option<u64>,
/// Timeout in seconds for on_failure commands (default: 600 seconds)
#[serde(skip_serializing_if = "Option::is_none")]
pub on_failure_timeout_seconds: Option<u64>,
}
impl RetryConfig {
/// Validates the retry configuration values
pub fn validate(&self) -> Result<(), String> {
if self.max_retries == 0 {
return Err("max_retries must be greater than 0".to_string());
}
if let Some(timeout) = self.timeout_seconds {
if timeout == 0 {
return Err("timeout_seconds must be greater than 0 if specified".to_string());
}
}
if let Some(on_failure_timeout) = self.on_failure_timeout_seconds {
if on_failure_timeout == 0 {
return Err(
"on_failure_timeout_seconds must be greater than 0 if specified".to_string(),
);
}
}
Ok(())
}
}
/// A single success check to validate recipe completion
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum SuccessCheck {
/// Execute a shell command and check its exit status
#[serde(alias = "shell")]
Shell {
/// The shell command to execute
command: String,
},
}
/// A frontend tool that will be executed by the frontend rather than an extension /// A frontend tool that will be executed by the frontend rather than an extension
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FrontendTool { pub struct FrontendTool {
@@ -29,4 +90,7 @@ pub struct SessionConfig {
pub execution_mode: Option<String>, pub execution_mode: Option<String>,
/// Maximum number of turns (iterations) allowed without user input /// Maximum number of turns (iterations) allowed without user input
pub max_turns: Option<u32>, pub max_turns: Option<u32>,
/// Retry configuration for automated validation and recovery
#[serde(skip_serializing_if = "Option::is_none")]
pub retry_config: Option<RetryConfig>,
} }

View File

@@ -231,12 +231,16 @@ impl Default for PricingCache {
// Global cache instance // Global cache instance
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref PRICING_CACHE: PricingCache = PricingCache::new(); static ref PRICING_CACHE: PricingCache = PricingCache::new();
static ref HTTP_CLIENT: Client = Client::builder() }
/// Create a properly configured HTTP client for the current runtime
fn create_http_client() -> Client {
Client::builder()
.timeout(Duration::from_secs(30)) .timeout(Duration::from_secs(30))
.pool_idle_timeout(Duration::from_secs(90)) .pool_idle_timeout(Duration::from_secs(90))
.pool_max_idle_per_host(10) .pool_max_idle_per_host(10)
.build() .build()
.unwrap(); .expect("Failed to create HTTP client")
} }
/// OpenRouter model pricing information /// OpenRouter model pricing information
@@ -270,7 +274,8 @@ pub struct OpenRouterModelsResponse {
/// Internal function to fetch pricing data /// Internal function to fetch pricing data
async fn fetch_openrouter_pricing_internal() -> Result<HashMap<String, OpenRouterModel>> { async fn fetch_openrouter_pricing_internal() -> Result<HashMap<String, OpenRouterModel>> {
let response = HTTP_CLIENT let client = create_http_client();
let response = client
.get("https://openrouter.ai/api/v1/models") .get("https://openrouter.ai/api/v1/models")
.send() .send()
.await?; .await?;

View File

@@ -4,6 +4,7 @@ use std::collections::HashMap;
use std::fmt; use std::fmt;
use crate::agents::extension::ExtensionConfig; use crate::agents::extension::ExtensionConfig;
use crate::agents::types::RetryConfig;
use serde::de::Deserializer; use serde::de::Deserializer;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema; use utoipa::ToSchema;
@@ -37,7 +38,7 @@ fn default_version() -> String {
/// * `author` - Information about the Recipe's creator and metadata /// * `author` - Information about the Recipe's creator and metadata
/// * `parameters` - Additional parameters for the Recipe /// * `parameters` - Additional parameters for the Recipe
/// * `response` - Response configuration including JSON schema validation /// * `response` - Response configuration including JSON schema validation
/// /// * `retry` - Retry configuration for automated validation and recovery
/// # Example /// # Example
/// ///
/// ///
@@ -66,6 +67,7 @@ fn default_version() -> String {
/// parameters: None, /// parameters: None,
/// response: None, /// response: None,
/// sub_recipes: None, /// sub_recipes: None,
/// retry: None,
/// }; /// };
/// ///
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema)] #[derive(Serialize, Deserialize, Debug, Clone, ToSchema)]
@@ -109,6 +111,9 @@ pub struct Recipe {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub sub_recipes: Option<Vec<SubRecipe>>, // sub-recipes for the recipe pub sub_recipes: Option<Vec<SubRecipe>>, // sub-recipes for the recipe
#[serde(skip_serializing_if = "Option::is_none")]
pub retry: Option<RetryConfig>,
} }
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema)] #[derive(Serialize, Deserialize, Debug, Clone, ToSchema)]
@@ -239,6 +244,7 @@ pub struct RecipeBuilder {
parameters: Option<Vec<RecipeParameter>>, parameters: Option<Vec<RecipeParameter>>,
response: Option<Response>, response: Option<Response>,
sub_recipes: Option<Vec<SubRecipe>>, sub_recipes: Option<Vec<SubRecipe>>,
retry: Option<RetryConfig>,
} }
impl Recipe { impl Recipe {
@@ -271,26 +277,39 @@ impl Recipe {
parameters: None, parameters: None,
response: None, response: None,
sub_recipes: None, sub_recipes: None,
retry: None,
} }
} }
pub fn from_content(content: &str) -> Result<Self> { pub fn from_content(content: &str) -> Result<Self> {
if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(content) { let recipe: Recipe =
if let Some(nested_recipe) = json_value.get("recipe") { if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(content) {
Ok(serde_json::from_value(nested_recipe.clone())?) if let Some(nested_recipe) = json_value.get("recipe") {
serde_json::from_value(nested_recipe.clone())?
} else {
serde_json::from_str(content)?
}
} else if let Ok(yaml_value) = serde_yaml::from_str::<serde_yaml::Value>(content) {
if let Some(nested_recipe) = yaml_value.get("recipe") {
serde_yaml::from_value(nested_recipe.clone())?
} else {
serde_yaml::from_str(content)?
}
} else { } else {
Ok(serde_json::from_str(content)?) return Err(anyhow::anyhow!(
"Unsupported format. Expected JSON or YAML."
));
};
if let Some(ref retry_config) = recipe.retry {
if let Err(validation_error) = retry_config.validate() {
return Err(anyhow::anyhow!(
"Invalid retry configuration: {}",
validation_error
));
} }
} else if let Ok(yaml_value) = serde_yaml::from_str::<serde_yaml::Value>(content) {
if let Some(nested_recipe) = yaml_value.get("recipe") {
Ok(serde_yaml::from_value(nested_recipe.clone())?)
} else {
Ok(serde_yaml::from_str(content)?)
}
} else {
Err(anyhow::anyhow!(
"Unsupported format. Expected JSON or YAML."
))
} }
Ok(recipe)
} }
} }
@@ -369,6 +388,12 @@ impl RecipeBuilder {
self self
} }
/// Sets the retry configuration for the Recipe
pub fn retry(mut self, retry: RetryConfig) -> Self {
self.retry = Some(retry);
self
}
/// Builds the Recipe instance /// Builds the Recipe instance
/// ///
/// Returns an error if any required fields are missing /// Returns an error if any required fields are missing
@@ -394,6 +419,7 @@ impl RecipeBuilder {
parameters: self.parameters, parameters: self.parameters,
response: self.response, response: self.response,
sub_recipes: self.sub_recipes, sub_recipes: self.sub_recipes,
retry: self.retry,
}) })
} }
} }

View File

@@ -1204,6 +1204,7 @@ async fn run_scheduled_job_internal(
schedule_id: Some(job.id.clone()), schedule_id: Some(job.id.clone()),
execution_mode: job.execution_mode.clone(), execution_mode: job.execution_mode.clone(),
max_turns: None, max_turns: None,
retry_config: None,
}; };
match agent match agent
@@ -1424,6 +1425,7 @@ mod tests {
settings: None, settings: None,
response: None, response: None,
sub_recipes: None, sub_recipes: None,
retry: None,
}; };
let mut recipe_file = File::create(&recipe_filename)?; let mut recipe_file = File::create(&recipe_filename)?;
writeln!( writeln!(

View File

@@ -761,6 +761,184 @@ mod final_output_tool_tests {
} }
} }
#[cfg(test)]
mod retry_tests {
use super::*;
use async_trait::async_trait;
use goose::agents::types::{RetryConfig, SessionConfig, SuccessCheck};
use goose::model::ModelConfig;
use goose::providers::base::{Provider, ProviderUsage, Usage};
use goose::providers::errors::ProviderError;
use mcp_core::tool::Tool;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
#[derive(Clone)]
struct MockRetryProvider {
model_config: ModelConfig,
call_count: Arc<AtomicUsize>,
fail_until: usize,
}
#[async_trait]
impl Provider for MockRetryProvider {
fn metadata() -> goose::providers::base::ProviderMetadata {
goose::providers::base::ProviderMetadata::empty()
}
fn get_model_config(&self) -> ModelConfig {
self.model_config.clone()
}
async fn complete(
&self,
_system: &str,
_messages: &[Message],
_tools: &[Tool],
) -> anyhow::Result<(Message, ProviderUsage), ProviderError> {
let count = self.call_count.fetch_add(1, Ordering::SeqCst);
if count < self.fail_until {
Ok((
Message::assistant().with_text("Task failed - will retry."),
ProviderUsage::new("mock".to_string(), Usage::default()),
))
} else {
Ok((
Message::assistant().with_text("Task completed successfully."),
ProviderUsage::new("mock".to_string(), Usage::default()),
))
}
}
}
#[tokio::test]
async fn test_retry_config_validation_integration() -> Result<()> {
let agent = Agent::new();
let model_config = ModelConfig::new("test-model".to_string());
let mock_provider = Arc::new(MockRetryProvider {
model_config,
call_count: Arc::new(AtomicUsize::new(0)),
fail_until: 0,
});
agent.update_provider(mock_provider.clone()).await?;
let retry_config = RetryConfig {
max_retries: 3,
checks: vec![SuccessCheck::Shell {
command: "echo 'success check'".to_string(),
}],
on_failure: Some("echo 'cleanup executed'".to_string()),
timeout_seconds: Some(30),
on_failure_timeout_seconds: Some(60),
};
assert!(
retry_config.validate().is_ok(),
"Valid config should pass validation"
);
let session_config = SessionConfig {
id: goose::session::Identifier::Name("test-retry".to_string()),
working_dir: std::env::current_dir()?,
schedule_id: None,
execution_mode: None,
max_turns: None,
retry_config: Some(retry_config),
};
let initial_messages = vec![Message::user().with_text("Complete this task")];
let reply_stream = agent.reply(&initial_messages, Some(session_config)).await?;
tokio::pin!(reply_stream);
let mut responses = Vec::new();
while let Some(response_result) = reply_stream.next().await {
match response_result {
Ok(AgentEvent::Message(response)) => responses.push(response),
Ok(_) => {}
Err(e) => return Err(e),
}
}
assert!(!responses.is_empty(), "Should have received responses");
Ok(())
}
#[tokio::test]
async fn test_retry_success_check_execution() -> Result<()> {
use goose::agents::retry::execute_success_checks;
let retry_config = RetryConfig {
max_retries: 3,
checks: vec![],
on_failure: None,
timeout_seconds: Some(30),
on_failure_timeout_seconds: Some(60),
};
let success_checks = vec![SuccessCheck::Shell {
command: "echo 'test'".to_string(),
}];
let result = execute_success_checks(&success_checks, &retry_config).await;
assert!(result.is_ok(), "Success check should pass");
assert!(result.unwrap(), "Command should succeed");
let fail_checks = vec![SuccessCheck::Shell {
command: "false".to_string(),
}];
let result = execute_success_checks(&fail_checks, &retry_config).await;
assert!(result.is_ok(), "Success check execution should not error");
assert!(!result.unwrap(), "Command should fail");
Ok(())
}
#[tokio::test]
async fn test_retry_logic_with_validation_errors() -> Result<()> {
let invalid_retry_config = RetryConfig {
max_retries: 0,
checks: vec![],
on_failure: None,
timeout_seconds: Some(0),
on_failure_timeout_seconds: None,
};
let validation_result = invalid_retry_config.validate();
assert!(
validation_result.is_err(),
"Should validate max_retries > 0"
);
assert!(validation_result
.unwrap_err()
.contains("max_retries must be greater than 0"));
Ok(())
}
#[tokio::test]
async fn test_retry_attempts_counter_reset() -> Result<()> {
let agent = Agent::new();
agent.reset_retry_attempts().await;
let initial_attempts = agent.get_retry_attempts().await;
assert_eq!(initial_attempts, 0);
let new_attempts = agent.increment_retry_attempts().await;
assert_eq!(new_attempts, 1);
agent.reset_retry_attempts().await;
let reset_attempts = agent.get_retry_attempts().await;
assert_eq!(reset_attempts, 0);
Ok(())
}
}
#[cfg(test)] #[cfg(test)]
mod max_turns_tests { mod max_turns_tests {
use super::*; use super::*;
@@ -831,6 +1009,7 @@ mod max_turns_tests {
schedule_id: None, schedule_id: None,
execution_mode: None, execution_mode: None,
max_turns: Some(1), max_turns: Some(1),
retry_config: None,
}; };
let messages = vec![Message::user().with_text("Hello")]; let messages = vec![Message::user().with_text("Hello")];

View File

@@ -38,6 +38,7 @@ After creating recipe files, you can use [`goose` CLI commands](/docs/guides/goo
| `extensions` | Array | List of extension configurations | | `extensions` | Array | List of extension configurations |
| `sub_recipes` | Array | List of sub-recipes | | `sub_recipes` | Array | List of sub-recipes |
| `response` | Object | Configuration for structured output validation | | `response` | Object | Configuration for structured output validation |
| `retry` | Object | Configuration for automated retry logic with success validation |
## Parameters ## Parameters
@@ -136,6 +137,87 @@ sub_recipes:
path: "./sub-recipes/quality-analysis.yaml" path: "./sub-recipes/quality-analysis.yaml"
``` ```
## Automated Retry with Success Validation
The `retry` field enables recipes to automatically retry execution if success criteria are not met. This is useful for recipes that might need multiple attempts to achieve their goal, or for implementing automated validation and recovery workflows.
### Retry Configuration Fields
| Field | Type | Description |
|-------|------|-------------|
| `max_retries` | Number | Maximum number of retry attempts (required) |
| `timeout_seconds` | Number | (Optional) Timeout for success check commands (default: 300 seconds) |
| `on_failure_timeout_seconds` | Number | (Optional) Timeout for on_failure commands (default: 600 seconds) |
| `checks` | Array | List of success check configurations (required) |
| `on_failure` | String | (Optional) Shell command to run when a retry attempt fails |
### Success Check Configuration
Each success check in the `checks` array has the following structure:
| Field | Type | Description |
|-------|------|-------------|
| `type` | String | Type of check - currently only "shell" is supported |
| `command` | String | Shell command to execute for validation (must exit with code 0 for success) |
### How Retry Logic Works
1. **Recipe Execution**: The recipe runs normally with the provided instructions
2. **Success Validation**: After completion, all success checks are executed in order
3. **Retry Decision**: If any success check fails and retry attempts remain:
- Execute the on_failure command (if configured)
- Reset the agent's message history to initial state
- Increment retry counter and restart execution
4. **Completion**: Process stops when either:
- All success checks pass (success)
- Maximum retry attempts are reached (failure)
### Basic Retry Example
```yaml
version: "1.0.0"
title: "Counter Increment Task"
description: "Increment a counter until it reaches target value"
prompt: "Increment the counter value in /tmp/counter.txt by 1."
retry:
max_retries: 5
timeout_seconds: 10
checks:
- type: shell
command: "test $(cat /tmp/counter.txt 2>/dev/null || echo 0) -ge 3"
on_failure: "echo 'Counter is at:' $(cat /tmp/counter.txt 2>/dev/null || echo 0) '(need 3 to succeed)'"
```
### Advanced Retry Example
```yaml
version: "1.0.0"
title: "Service Health Check"
description: "Start service and verify it's running properly"
prompt: "Start the web service and verify it responds to health checks"
retry:
max_retries: 3
timeout_seconds: 30
on_failure_timeout_seconds: 60
checks:
- type: shell
command: "curl -f http://localhost:8080/health"
- type: shell
command: "pgrep -f 'web-service' > /dev/null"
on_failure: "systemctl stop web-service || killall web-service"
```
### Environment Variables
You can configure retry behavior globally using environment variables:
- `GOOSE_RECIPE_RETRY_TIMEOUT_SECONDS`: Global timeout for success check commands
- `GOOSE_RECIPE_ON_FAILURE_TIMEOUT_SECONDS`: Global timeout for on_failure commands
These environment variables are overridden by recipe-specific timeout configurations.
## Structured Output with `response` ## Structured Output with `response`
The `response` field enables recipes to enforce a final structured JSON output from Goose. When you specify a `json_schema`, Goose will: The `response` field enables recipes to enforce a final structured JSON output from Goose. When you specify a `json_schema`, Goose will:
@@ -243,6 +325,14 @@ extensions:
bundled: true bundled: true
description: "Query codesearch directly from goose" description: "Query codesearch directly from goose"
retry:
max_retries: 3
timeout_seconds: 30
checks:
- type: shell
command: "echo 'Task validation check passed'"
on_failure: "echo 'Retry attempt failed, cleaning up...'"
response: response:
json_schema: json_schema:
type: object type: object
@@ -313,8 +403,16 @@ Common errors to watch for:
- Invalid YAML/JSON syntax - Invalid YAML/JSON syntax
- Missing required fields - Missing required fields
- Invalid extension configurations - Invalid extension configurations
- Invalid retry configuration (missing required fields, invalid shell commands)
When these occur, Goose will provide helpful error messages indicating what needs to be fixed. When these occur, Goose will provide helpful error messages indicating what needs to be fixed.
### Retry-Specific Errors
- **Invalid success checks**: Shell commands that cannot be executed or have syntax errors
- **Timeout errors**: Success checks or on_failure commands that exceed their timeout limits
- **Max retries exceeded**: When all retry attempts are exhausted without success
- **Missing required retry fields**: When `max_retries` or `checks` are not specified
## Learn More ## Learn More
Check out the [Goose Recipes](/docs/guides/recipes) guide for more docs, tools, and resources to help you master Goose recipes. Check out the [Goose Recipes](/docs/guides/recipes) guide for more docs, tools, and resources to help you master Goose recipes.

View File

@@ -87,6 +87,12 @@ You can turn your current Goose session into a reusable recipe that includes the
goose_provider: $provider # Provider to use for this recipe goose_provider: $provider # Provider to use for this recipe
goose_model: $model # Specific model to use for this recipe goose_model: $model # Specific model to use for this recipe
temperature: $temperature # Model temperature setting for this recipe (0.0 to 1.0) temperature: $temperature # Model temperature setting for this recipe (0.0 to 1.0)
retry: # Automated retry logic with success validation
max_retries: $max_retries # Maximum number of retry attempts
checks: # Success validation checks
- type: shell
command: $validation_command
on_failure: $cleanup_command # Optional cleanup command on failure
``` ```
</details> </details>
@@ -529,6 +535,37 @@ When scheduling Goose recipes with the CLI, you can use Goose's built-in cron sc
- Help users understand what the recipe can do - Help users understand what the recipe can do
- Make it easy to get started - Make it easy to get started
## Advanced Features
### Automated Retry Logic
Recipes can include retry logic to automatically attempt task completion multiple times until success criteria are met. This is particularly useful for:
- **Automation workflows** that need to ensure successful completion
- **Development tasks** like running tests that may need multiple attempts
- **System operations** that require validation and cleanup
**Basic retry configuration:**
```yaml
retry:
max_retries: 3
checks:
- type: shell
command: "test -f output.txt" # Check if output file exists
on_failure: "rm -f temp_files*" # Cleanup on failure
```
**How it works:**
1. Recipe executes normally with provided instructions
2. After completion, success checks validate the results
3. If validation fails and retries remain:
- Optional cleanup command runs
- Agent state resets to initial conditions
- Recipe execution starts over
4. Process continues until either success or max retries reached
See the [Recipe Reference Guide](/docs/guides/recipes/recipe-reference#automated-retry-with-success-validation) for complete retry configuration options and examples.
## What's Included ## What's Included
A recipe captures: A recipe captures:
@@ -539,6 +576,7 @@ A recipe captures:
- Project folder or file context - Project folder or file context
- Initial setup (but not full conversation history) - Initial setup (but not full conversation history)
- The model and provider to use when running the recipe (optional) - The model and provider to use when running the recipe (optional)
- Retry logic and success validation configuration (if configured)
To protect your privacy and system integrity, Goose excludes: To protect your privacy and system integrity, Goose excludes: