Merge branch 'action-history'

2026-01-16 20:44:24 +01:00 · 2023-08-28 19:46:25 +02:00
parent 78a620c1ec 82a81e5865
commit a929716456
8 changed files with 115 additions and 59 deletions
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -24,6 +24,7 @@ from autogpt.logs.log_cycle import (
    LogCycleHandler,
 )
 from autogpt.models.agent_actions import (
+    Action,
    ActionErrorResult,
    ActionInterruptedByHuman,
    ActionResult,
@@ -111,8 +112,8 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
                kwargs["append_messages"] = []
            kwargs["append_messages"].append(budget_msg)

-        # Include message history in base prompt
-        kwargs["with_message_history"] = True
+        # # Include message history in base prompt
+        # kwargs["with_message_history"] = True

        return super().construct_base_prompt(*args, **kwargs)

@@ -124,7 +125,7 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
            self.ai_config.ai_name,
            self.created_at,
            self.cycle_count,
-            self.history.raw(),
+            self.message_history.raw(),
            FULL_MESSAGE_HISTORY_FILE_NAME,
        )
        self.log_cycle_handler.log_cycle(
@@ -146,7 +147,7 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):

        if command_name == "human_feedback":
            result = ActionInterruptedByHuman(user_input)
-            self.history.add(
+            self.message_history.add(
                "user",
                "I interrupted the execution of the command you proposed "
                f"to give you some feedback: {user_input}",
@@ -187,13 +188,11 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
            except AgentException as e:
                result = ActionErrorResult(e.message, e)

-            logger.debug(f"Command result: {result}")
-
            result_tlength = count_string_tokens(str(result), self.llm.name)
-            memory_tlength = count_string_tokens(
-                str(self.history.summary_message()), self.llm.name
+            history_tlength = count_string_tokens(
+                self.event_history.fmt_paragraph(), self.llm.name
            )
-            if result_tlength + memory_tlength > self.send_token_limit:
+            if result_tlength + history_tlength > self.send_token_limit:
                result = ActionErrorResult(
                    reason=f"Command {command_name} returned too much output. "
                    "Do not execute this command again with the same arguments."
@@ -203,15 +202,15 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
                if not plugin.can_handle_post_command():
                    continue
                if result.status == "success":
-                    result.results = plugin.post_command(command_name, result.results)
+                    result.outputs = plugin.post_command(command_name, result.outputs)
                elif result.status == "error":
                    result.reason = plugin.post_command(command_name, result.reason)

        # Check if there's a result from the command append it to the message
        if result.status == "success":
-            self.history.add(
+            self.message_history.add(
                "system",
-                f"Command {command_name} returned: {result.results}",
+                f"Command {command_name} returned: {result.outputs}",
                "action_result",
            )
        elif result.status == "error":
@@ -225,7 +224,10 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
            ):
                message = message.rstrip(".") + f". {result.error.hint}"

-            self.history.add("system", message, "action_result")
+            self.message_history.add("system", message, "action_result")
+
+        # Update action history
+        self.event_history.register_result(result)

        return result

@@ -264,6 +266,15 @@ class Agent(ContextMixin, WorkspaceMixin, BaseAgent):
            assistant_reply_dict,
            NEXT_ACTION_FILE_NAME,
        )
+
+        self.event_history.register_action(
+            Action(
+                name=command_name,
+                args=arguments,
+                reasoning=assistant_reply_dict["thoughts"]["reasoning"],
+            )
+        )
+
        return response


--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -15,7 +15,7 @@ from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, get_openai_command_specs
 from autogpt.llm.utils import count_message_tokens, create_chat_completion
 from autogpt.memory.message_history import MessageHistory
-from autogpt.models.agent_actions import ActionResult
+from autogpt.models.agent_actions import ActionHistory, ActionResult
 from autogpt.prompts.generator import PromptGenerator
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT

@@ -93,7 +93,9 @@ class BaseAgent(metaclass=ABCMeta):
        defaults to 75% of `llm.max_tokens`.
        """

-        self.history = MessageHistory(
+        self.event_history = ActionHistory()
+
+        self.message_history = MessageHistory(
            self.llm,
            max_summary_tlength=summary_max_tlength or self.send_token_limit // 6,
        )
@@ -177,6 +179,15 @@ class BaseAgent(metaclass=ABCMeta):
            reserve_tokens: Number of tokens to reserve for content that is added later
        """

+        if self.event_history:
+            prepend_messages.insert(
+                0,
+                Message(
+                    "system",
+                    "## Progress\n\n" f"{self.event_history.fmt_paragraph()}",
+                ),
+            )
+
        prompt = ChatSequence.for_model(
            self.llm.name,
            [Message("system", self.system_prompt)] + prepend_messages,
@@ -184,7 +195,7 @@ class BaseAgent(metaclass=ABCMeta):

        if with_message_history:
            # Reserve tokens for messages to be appended later, if any
-            reserve_tokens += self.history.max_summary_tlength
+            reserve_tokens += self.message_history.max_summary_tlength
            if append_messages:
                reserve_tokens += count_message_tokens(append_messages, self.llm.name)

@@ -192,10 +203,10 @@ class BaseAgent(metaclass=ABCMeta):
            # Trim remaining historical messages and add them to the running summary.
            history_start_index = len(prompt)
            trimmed_history = add_history_upto_token_limit(
-                prompt, self.history, self.send_token_limit - reserve_tokens
+                prompt, self.message_history, self.send_token_limit - reserve_tokens
            )
            if trimmed_history:
-                new_summary_msg, _ = self.history.trim_messages(
+                new_summary_msg, _ = self.message_history.trim_messages(
                    list(prompt), self.config
                )
                prompt.insert(history_start_index, new_summary_msg)
@@ -359,8 +370,8 @@ class BaseAgent(metaclass=ABCMeta):
        """

        # Save assistant reply to message history
-        self.history.append(prompt[-1])
-        self.history.add(
+        self.message_history.append(prompt[-1])
+        self.message_history.add(
            "assistant", llm_response.content, "ai_response"
        )  # FIXME: support function calls

@@ -370,7 +381,7 @@ class BaseAgent(metaclass=ABCMeta):
            )
        except InvalidAgentResponseError as e:
            # TODO: tune this message
-            self.history.add(
+            self.message_history.add(
                "system",
                f"Your response could not be parsed: {e}"
                "\n\nRemember to only respond using the specified format above!",
--- a/autogpt/agents/planning_agent.py
+++ b/autogpt/agents/planning_agent.py
@@ -285,7 +285,7 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent):

            result_tlength = count_string_tokens(str(result), self.llm.name)
            memory_tlength = count_string_tokens(
-                str(self.history.summary_message()), self.llm.name
+                str(self.message_history.summary_message()), self.llm.name
            )
            if result_tlength + memory_tlength > self.send_token_limit:
                result = ActionErrorResult(
@@ -297,15 +297,15 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent):
                if not plugin.can_handle_post_command():
                    continue
                if result.status == "success":
-                    result.results = plugin.post_command(command_name, result.results)
+                    result.outputs = plugin.post_command(command_name, result.outputs)
                elif result.status == "error":
                    result.reason = plugin.post_command(command_name, result.reason)

        # Check if there's a result from the command append it to the message
        if result.status == "success":
-            self.history.add(
+            self.message_history.add(
                "system",
-                f"Command {command_name} returned: {result.results}",
+                f"Command {command_name} returned: {result.outputs}",
                "action_result",
            )
        elif result.status == "error":
@@ -316,7 +316,7 @@ class PlanningAgent(ContextMixin, WorkspaceMixin, BaseAgent):
                and result.error.hint
            ):
                message = message.rstrip(".") + f". {result.error.hint}"
-            self.history.add("system", message, "action_result")
+            self.message_history.add("system", message, "action_result")

        return result

--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -11,7 +11,7 @@ import logging
 import os
 import os.path
 from pathlib import Path
-from typing import Generator, Literal
+from typing import Iterator, Literal

 from autogpt.agents.agent import Agent
 from autogpt.agents.utils.exceptions import DuplicateOperationError
@@ -34,7 +34,9 @@ def text_checksum(text: str) -> str:

 def operations_from_log(
    log_path: str | Path,
-) -> Generator[tuple[Operation, str, str | None], None, None]:
+) -> Iterator[
+    tuple[Literal["write", "append"], str, str] | tuple[Literal["delete"], str, None]
+]:
    """Parse the file operations log and return a tuple containing the log entries"""
    try:
        log = open(log_path, "r", encoding="utf-8")
@@ -48,11 +50,7 @@ def operations_from_log(
        operation, tail = line.split(": ", maxsplit=1)
        operation = operation.strip()
        if operation in ("write", "append"):
-            try:
-                path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1))
-            except ValueError:
-                logger.warn(f"File log entry lacks checksum: '{line}'")
-                path, checksum = tail.strip(), None
+            path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1))
            yield (operation, path, checksum)
        elif operation == "delete":
            yield (operation, tail.strip(), None)
@@ -228,7 +226,7 @@ def write_to_file(filename: Path, text: str, agent: Agent) -> str:
    with open(filename, "w", encoding="utf-8") as f:
        f.write(text)
    log_operation("write", filename, agent, checksum)
-    return "File written to successfully."
+    return f"File {filename.name} has been written successfully."


@sanitize_path_arg("filename")
--- a/autogpt/models/agent_actions.py
+++ b/autogpt/models/agent_actions.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Any, Iterator, Literal, Optional

+from autogpt.prompts.utils import format_numbered_list, indent
+

@dataclass
 class Action:
@@ -16,11 +18,13 @@ class Action:

@dataclass
 class ActionSuccessResult:
-    results: Any
+    outputs: Any
    status: Literal["success"] = "success"

    def __str__(self) -> str:
-        return f"Action succeeded and returned: `{self.results}`"
+        outputs = str(self.outputs).replace("```", r"\```")
+        multiline = "\n" in outputs
+        return f"```\n{self.outputs}\n```" if multiline else str(self.outputs)


@dataclass
@@ -30,7 +34,7 @@ class ActionErrorResult:
    status: Literal["error"] = "error"

    def __str__(self) -> str:
-        return f"Action failed: `{self.reason}`"
+        return f"Action failed: '{self.reason}'"


@dataclass
@@ -50,9 +54,14 @@ class ActionHistory:

    @dataclass
    class CycleRecord:
-        action: Action | None
+        action: Action
        result: ActionResult | None

+        def __str__(self) -> str:
+            executed_action = f"Executed `{self.action.format_call()}`"
+            action_result = f": {self.result}" if self.result else "."
+            return executed_action + action_result
+
    cursor: int
    cycles: list[CycleRecord]

@@ -80,13 +89,11 @@ class ActionHistory:

    def register_action(self, action: Action) -> None:
        if not self.current_record:
-            self.cycles.append(self.CycleRecord(None, None))
+            self.cycles.append(self.CycleRecord(action, None))
            assert self.current_record
        elif self.current_record.action:
            raise ValueError("Action for current cycle already set")

-        self.current_record.action = action
-
    def register_result(self, result: ActionResult) -> None:
        if not self.current_record:
            raise RuntimeError("Cannot register result for cycle without action")
@@ -94,3 +101,32 @@ class ActionHistory:
            raise ValueError("Result for current cycle already set")

        self.current_record.result = result
+        self.cursor = len(self.cycles)
+
+    def fmt_list(self) -> str:
+        return format_numbered_list(self.cycles)
+
+    def fmt_paragraph(self) -> str:
+        steps: list[str] = []
+
+        for i, c in enumerate(self.cycles, 1):
+            step = f"### Step {i}: Executed `{c.action.format_call()}`\n"
+            step += f'- **Reasoning:** "{c.action.reasoning}"\n'
+            step += (
+                f"- **Status:** `{c.result.status if c.result else 'did_not_finish'}`\n"
+            )
+            if c.result:
+                if c.result.status == "success":
+                    result = str(c.result)
+                    result = "\n" + indent(result) if "\n" in result else result
+                    step += f"- **Output:** {result}"
+                elif c.result.status == "error":
+                    step += f"- **Reason:** {c.result.reason}\n"
+                    if c.result.error:
+                        step += f"- **Error:** {c.result.error}\n"
+                elif c.result.status == "interrupted_by_human":
+                    step += f"- **Feedback:** {c.result.feedback}\n"
+
+            steps.append(step)
+
+        return "\n\n".join(steps)
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -13,6 +13,8 @@ if TYPE_CHECKING:
    from autogpt.config import AIConfig, AIDirectives, Config
    from autogpt.models.command_registry import CommandRegistry

+from .utils import format_numbered_list
+
 logger = logging.getLogger(__name__)


@@ -124,19 +126,6 @@ class PromptGenerator:
        if best_practice not in self.best_practices:
            self.best_practices.append(best_practice)

-    def _generate_numbered_list(self, items: list[str], start_at: int = 1) -> str:
-        """
-        Generate a numbered list containing the given items.
-
-        Params:
-            items (list): A list of items to be numbered.
-            start_at (int, optional): The number to start the sequence with; defaults to 1.
-
-        Returns:
-            str: The formatted numbered list.
-        """
-        return "\n".join(f"{i}. {item}" for i, item in enumerate(items, start_at))
-
    def construct_system_prompt(self, agent: BaseAgent) -> str:
        """Constructs a system prompt containing the most important information for the AI.

@@ -257,15 +246,15 @@ class PromptGenerator:
        return [
            "## Constraints\n"
            "You operate within the following constraints:\n"
-            f"{self._generate_numbered_list(self.constraints + additional_constraints)}",
+            f"{format_numbered_list(self.constraints + additional_constraints)}",
            "## Resources\n"
            "You can leverage access to the following resources:\n"
-            f"{self._generate_numbered_list(self.resources + additional_resources)}",
+            f"{format_numbered_list(self.resources + additional_resources)}",
            "## Commands\n"
            "You have access to the following commands:\n"
            f"{self.list_commands(agent)}",
            "## Best practices\n"
-            f"{self._generate_numbered_list(self.best_practices + additional_best_practices)}",
+            f"{format_numbered_list(self.best_practices + additional_best_practices)}",
        ]

    def list_commands(self, agent: BaseAgent) -> str:
@@ -286,4 +275,4 @@ class PromptGenerator:
        # Add commands from plugins etc.
        command_strings += [str(cmd) for cmd in self.commands.values()]

-        return self._generate_numbered_list(command_strings)
+        return format_numbered_list(command_strings)
--- a/autogpt/prompts/utils.py
+++ b/autogpt/prompts/utils.py
@@ -0,0 +1,11 @@
+from typing import Any
+
+
+def format_numbered_list(items: list[Any], start_at: int = 1) -> str:
+    return "\n".join(f"{i}. {str(item)}" for i, item in enumerate(items, start_at))
+
+
+def indent(content: str, indentation: int | str = 4) -> str:
+    if type(indentation) == int:
+        indentation = " " * indentation
+    return indentation + content.replace("\n", f"\n{indentation}")  # type: ignore
--- a/tests/unit/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -3,7 +3,7 @@ from autogpt.agents.agent import Agent, execute_command

 def test_agent_initialization(agent: Agent):
    assert agent.ai_config.ai_name == "Base"
-    assert agent.history.messages == []
+    assert agent.message_history.messages == []
    assert agent.cycle_budget is None
    assert "You are Base" in agent.system_prompt