diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py index 16e5f163..9c1a7d73 100644 --- a/autogpt/agents/agent.py +++ b/autogpt/agents/agent.py @@ -24,6 +24,7 @@ from autogpt.logs.log_cycle import ( LogCycleHandler, ) from autogpt.models.agent_actions import ( + Action, ActionErrorResult, ActionInterruptedByHuman, ActionResult, @@ -111,8 +112,8 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): kwargs["append_messages"] = [] kwargs["append_messages"].append(budget_msg) - # Include message history in base prompt - kwargs["with_message_history"] = True + # # Include message history in base prompt + # kwargs["with_message_history"] = True return super().construct_base_prompt(*args, **kwargs) @@ -124,7 +125,7 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): self.ai_config.ai_name, self.created_at, self.cycle_count, - self.history.raw(), + self.message_history.raw(), FULL_MESSAGE_HISTORY_FILE_NAME, ) self.log_cycle_handler.log_cycle( @@ -146,7 +147,7 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): if command_name == "human_feedback": result = ActionInterruptedByHuman(user_input) - self.history.add( + self.message_history.add( "user", "I interrupted the execution of the command you proposed " f"to give you some feedback: {user_input}", @@ -187,13 +188,11 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): except AgentException as e: result = ActionErrorResult(e.message, e) - logger.debug(f"Command result: {result}") - result_tlength = count_string_tokens(str(result), self.llm.name) - memory_tlength = count_string_tokens( - str(self.history.summary_message()), self.llm.name + history_tlength = count_string_tokens( + self.event_history.fmt_paragraph(), self.llm.name ) - if result_tlength + memory_tlength > self.send_token_limit: + if result_tlength + history_tlength > self.send_token_limit: result = ActionErrorResult( reason=f"Command {command_name} returned too much output. " "Do not execute this command again with the same arguments." @@ -203,15 +202,15 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): if not plugin.can_handle_post_command(): continue if result.status == "success": - result.results = plugin.post_command(command_name, result.results) + result.outputs = plugin.post_command(command_name, result.outputs) elif result.status == "error": result.reason = plugin.post_command(command_name, result.reason) # Check if there's a result from the command append it to the message if result.status == "success": - self.history.add( + self.message_history.add( "system", - f"Command {command_name} returned: {result.results}", + f"Command {command_name} returned: {result.outputs}", "action_result", ) elif result.status == "error": @@ -225,7 +224,10 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): ): message = message.rstrip(".") + f". {result.error.hint}" - self.history.add("system", message, "action_result") + self.message_history.add("system", message, "action_result") + + # Update action history + self.event_history.register_result(result) return result @@ -264,6 +266,15 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent): assistant_reply_dict, NEXT_ACTION_FILE_NAME, ) + + self.event_history.register_action( + Action( + name=command_name, + args=arguments, + reasoning=assistant_reply_dict["thoughts"]["reasoning"], + ) + ) + return response diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py index 4b682ff7..28f69602 100644 --- a/autogpt/agents/base.py +++ b/autogpt/agents/base.py @@ -15,7 +15,7 @@ from autogpt.llm.base import ChatModelResponse, ChatSequence, Message from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, get_openai_command_specs from autogpt.llm.utils import count_message_tokens, create_chat_completion from autogpt.memory.message_history import MessageHistory -from autogpt.models.agent_actions import ActionResult +from autogpt.models.agent_actions import ActionHistory, ActionResult from autogpt.prompts.generator import PromptGenerator from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT @@ -93,7 +93,9 @@ class BaseAgent(metaclass=ABCMeta): defaults to 75% of `llm.max_tokens`. """ - self.history = MessageHistory( + self.event_history = ActionHistory() + + self.message_history = MessageHistory( self.llm, max_summary_tlength=summary_max_tlength or self.send_token_limit // 6, ) @@ -177,6 +179,15 @@ class BaseAgent(metaclass=ABCMeta): reserve_tokens: Number of tokens to reserve for content that is added later """ + if self.event_history: + prepend_messages.insert( + 0, + Message( + "system", + "## Progress\n\n" f"{self.event_history.fmt_paragraph()}", + ), + ) + prompt = ChatSequence.for_model( self.llm.name, [Message("system", self.system_prompt)] + prepend_messages, @@ -184,7 +195,7 @@ class BaseAgent(metaclass=ABCMeta): if with_message_history: # Reserve tokens for messages to be appended later, if any - reserve_tokens += self.history.max_summary_tlength + reserve_tokens += self.message_history.max_summary_tlength if append_messages: reserve_tokens += count_message_tokens(append_messages, self.llm.name) @@ -192,10 +203,10 @@ class BaseAgent(metaclass=ABCMeta): # Trim remaining historical messages and add them to the running summary. history_start_index = len(prompt) trimmed_history = add_history_upto_token_limit( - prompt, self.history, self.send_token_limit - reserve_tokens + prompt, self.message_history, self.send_token_limit - reserve_tokens ) if trimmed_history: - new_summary_msg, _ = self.history.trim_messages( + new_summary_msg, _ = self.message_history.trim_messages( list(prompt), self.config ) prompt.insert(history_start_index, new_summary_msg) @@ -359,8 +370,8 @@ class BaseAgent(metaclass=ABCMeta): """ # Save assistant reply to message history - self.history.append(prompt[-1]) - self.history.add( + self.message_history.append(prompt[-1]) + self.message_history.add( "assistant", llm_response.content, "ai_response" ) # FIXME: support function calls @@ -370,7 +381,7 @@ class BaseAgent(metaclass=ABCMeta): ) except InvalidAgentResponseError as e: # TODO: tune this message - self.history.add( + self.message_history.add( "system", f"Your response could not be parsed: {e}" "\n\nRemember to only respond using the specified format above!", diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py index bdc064fc..f4b6fa4c 100644 --- a/autogpt/agents/planning_agent.py +++ b/autogpt/agents/planning_agent.py @@ -285,7 +285,7 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent): result_tlength = count_string_tokens(str(result), self.llm.name) memory_tlength = count_string_tokens( - str(self.history.summary_message()), self.llm.name + str(self.message_history.summary_message()), self.llm.name ) if result_tlength + memory_tlength > self.send_token_limit: result = ActionErrorResult( @@ -297,15 +297,15 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent): if not plugin.can_handle_post_command(): continue if result.status == "success": - result.results = plugin.post_command(command_name, result.results) + result.outputs = plugin.post_command(command_name, result.outputs) elif result.status == "error": result.reason = plugin.post_command(command_name, result.reason) # Check if there's a result from the command append it to the message if result.status == "success": - self.history.add( + self.message_history.add( "system", - f"Command {command_name} returned: {result.results}", + f"Command {command_name} returned: {result.outputs}", "action_result", ) elif result.status == "error": @@ -316,7 +316,7 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent): and result.error.hint ): message = message.rstrip(".") + f". {result.error.hint}" - self.history.add("system", message, "action_result") + self.message_history.add("system", message, "action_result") return result diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py index 1aa73014..3a1bd104 100644 --- a/autogpt/commands/file_operations.py +++ b/autogpt/commands/file_operations.py @@ -11,7 +11,7 @@ import logging import os import os.path from pathlib import Path -from typing import Generator, Literal +from typing import Iterator, Literal from autogpt.agents.agent import Agent from autogpt.agents.utils.exceptions import DuplicateOperationError @@ -34,7 +34,9 @@ def text_checksum(text: str) -> str: def operations_from_log( log_path: str | Path, -) -> Generator[tuple[Operation, str, str | None], None, None]: +) -> Iterator[ + tuple[Literal["write", "append"], str, str] | tuple[Literal["delete"], str, None] +]: """Parse the file operations log and return a tuple containing the log entries""" try: log = open(log_path, "r", encoding="utf-8") @@ -48,11 +50,7 @@ def operations_from_log( operation, tail = line.split(": ", maxsplit=1) operation = operation.strip() if operation in ("write", "append"): - try: - path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1)) - except ValueError: - logger.warn(f"File log entry lacks checksum: '{line}'") - path, checksum = tail.strip(), None + path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1)) yield (operation, path, checksum) elif operation == "delete": yield (operation, tail.strip(), None) @@ -228,7 +226,7 @@ def write_to_file(filename: Path, text: str, agent: Agent) -> str: with open(filename, "w", encoding="utf-8") as f: f.write(text) log_operation("write", filename, agent, checksum) - return "File written to successfully." + return f"File {filename.name} has been written successfully." @sanitize_path_arg("filename") diff --git a/autogpt/models/agent_actions.py b/autogpt/models/agent_actions.py index 2062e2d4..676c403e 100644 --- a/autogpt/models/agent_actions.py +++ b/autogpt/models/agent_actions.py @@ -3,6 +3,8 @@ from __future__ import annotations from dataclasses import dataclass from typing import Any, Iterator, Literal, Optional +from autogpt.prompts.utils import format_numbered_list, indent + @dataclass class Action: @@ -16,11 +18,13 @@ class Action: @dataclass class ActionSuccessResult: - results: Any + outputs: Any status: Literal["success"] = "success" def __str__(self) -> str: - return f"Action succeeded and returned: `{self.results}`" + outputs = str(self.outputs).replace("```", r"\```") + multiline = "\n" in outputs + return f"```\n{self.outputs}\n```" if multiline else str(self.outputs) @dataclass @@ -30,7 +34,7 @@ class ActionErrorResult: status: Literal["error"] = "error" def __str__(self) -> str: - return f"Action failed: `{self.reason}`" + return f"Action failed: '{self.reason}'" @dataclass @@ -50,9 +54,14 @@ class ActionHistory: @dataclass class CycleRecord: - action: Action | None + action: Action result: ActionResult | None + def __str__(self) -> str: + executed_action = f"Executed `{self.action.format_call()}`" + action_result = f": {self.result}" if self.result else "." + return executed_action + action_result + cursor: int cycles: list[CycleRecord] @@ -80,13 +89,11 @@ class ActionHistory: def register_action(self, action: Action) -> None: if not self.current_record: - self.cycles.append(self.CycleRecord(None, None)) + self.cycles.append(self.CycleRecord(action, None)) assert self.current_record elif self.current_record.action: raise ValueError("Action for current cycle already set") - self.current_record.action = action - def register_result(self, result: ActionResult) -> None: if not self.current_record: raise RuntimeError("Cannot register result for cycle without action") @@ -94,3 +101,32 @@ class ActionHistory: raise ValueError("Result for current cycle already set") self.current_record.result = result + self.cursor = len(self.cycles) + + def fmt_list(self) -> str: + return format_numbered_list(self.cycles) + + def fmt_paragraph(self) -> str: + steps: list[str] = [] + + for i, c in enumerate(self.cycles, 1): + step = f"### Step {i}: Executed `{c.action.format_call()}`\n" + step += f'- **Reasoning:** "{c.action.reasoning}"\n' + step += ( + f"- **Status:** `{c.result.status if c.result else 'did_not_finish'}`\n" + ) + if c.result: + if c.result.status == "success": + result = str(c.result) + result = "\n" + indent(result) if "\n" in result else result + step += f"- **Output:** {result}" + elif c.result.status == "error": + step += f"- **Reason:** {c.result.reason}\n" + if c.result.error: + step += f"- **Error:** {c.result.error}\n" + elif c.result.status == "interrupted_by_human": + step += f"- **Feedback:** {c.result.feedback}\n" + + steps.append(step) + + return "\n\n".join(steps) diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py index 6ca17282..b3c9ddcb 100644 --- a/autogpt/prompts/generator.py +++ b/autogpt/prompts/generator.py @@ -13,6 +13,8 @@ if TYPE_CHECKING: from autogpt.config import AIConfig, AIDirectives, Config from autogpt.models.command_registry import CommandRegistry +from .utils import format_numbered_list + logger = logging.getLogger(__name__) @@ -124,19 +126,6 @@ class PromptGenerator: if best_practice not in self.best_practices: self.best_practices.append(best_practice) - def _generate_numbered_list(self, items: list[str], start_at: int = 1) -> str: - """ - Generate a numbered list containing the given items. - - Params: - items (list): A list of items to be numbered. - start_at (int, optional): The number to start the sequence with; defaults to 1. - - Returns: - str: The formatted numbered list. - """ - return "\n".join(f"{i}. {item}" for i, item in enumerate(items, start_at)) - def construct_system_prompt(self, agent: BaseAgent) -> str: """Constructs a system prompt containing the most important information for the AI. @@ -257,15 +246,15 @@ class PromptGenerator: return [ "## Constraints\n" "You operate within the following constraints:\n" - f"{self._generate_numbered_list(self.constraints + additional_constraints)}", + f"{format_numbered_list(self.constraints + additional_constraints)}", "## Resources\n" "You can leverage access to the following resources:\n" - f"{self._generate_numbered_list(self.resources + additional_resources)}", + f"{format_numbered_list(self.resources + additional_resources)}", "## Commands\n" "You have access to the following commands:\n" f"{self.list_commands(agent)}", "## Best practices\n" - f"{self._generate_numbered_list(self.best_practices + additional_best_practices)}", + f"{format_numbered_list(self.best_practices + additional_best_practices)}", ] def list_commands(self, agent: BaseAgent) -> str: @@ -286,4 +275,4 @@ class PromptGenerator: # Add commands from plugins etc. command_strings += [str(cmd) for cmd in self.commands.values()] - return self._generate_numbered_list(command_strings) + return format_numbered_list(command_strings) diff --git a/autogpt/prompts/utils.py b/autogpt/prompts/utils.py new file mode 100644 index 00000000..ad5aaa23 --- /dev/null +++ b/autogpt/prompts/utils.py @@ -0,0 +1,11 @@ +from typing import Any + + +def format_numbered_list(items: list[Any], start_at: int = 1) -> str: + return "\n".join(f"{i}. {str(item)}" for i, item in enumerate(items, start_at)) + + +def indent(content: str, indentation: int | str = 4) -> str: + if type(indentation) == int: + indentation = " " * indentation + return indentation + content.replace("\n", f"\n{indentation}") # type: ignore diff --git a/tests/unit/test_agent.py b/tests/unit/test_agent.py index ef5ef28a..02b6e349 100644 --- a/tests/unit/test_agent.py +++ b/tests/unit/test_agent.py @@ -3,7 +3,7 @@ from autogpt.agents.agent import Agent, execute_command def test_agent_initialization(agent: Agent): assert agent.ai_config.ai_name == "Base" - assert agent.history.messages == [] + assert agent.message_history.messages == [] assert agent.cycle_budget is None assert "You are Base" in agent.system_prompt