Auto-GPT/autogpt/agents/planning_agent.py

from __future__ import annotations

import logging
import re
from datetime import datetime
from typing import TYPE_CHECKING, Literal, Optional

if TYPE_CHECKING:
    from autogpt.config import AIConfig, Config
    from autogpt.llm.base import ChatModelResponse, ChatSequence
    from autogpt.memory.vector import VectorMemory
    from autogpt.models.command_registry import CommandRegistry

from autogpt.agents.utils.exceptions import AgentException, InvalidAgentResponseError
from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
from autogpt.llm.base import Message
from autogpt.llm.utils import count_string_tokens
from autogpt.logs.log_cycle import (
    CURRENT_CONTEXT_FILE_NAME,
    NEXT_ACTION_FILE_NAME,
    USER_INPUT_FILE_NAME,
    LogCycleHandler,
)
from autogpt.models.agent_actions import (
    ActionErrorResult,
    ActionHistory,
    ActionInterruptedByHuman,
    ActionResult,
    ActionSuccessResult,
)
from autogpt.models.context_item import ContextItem
from autogpt.workspace import Workspace

from .agent import execute_command, extract_command
from .base import BaseAgent
from .utils.context import AgentContext

logger = logging.getLogger(__name__)


class PlanningAgent(BaseAgent):
    """Agent class for interacting with Auto-GPT."""

    ThoughtProcessID = Literal["plan", "action", "evaluate"]

    def __init__(
        self,
        ai_config: AIConfig,
        command_registry: CommandRegistry,
        memory: VectorMemory,
        triggering_prompt: str,
        config: Config,
        cycle_budget: Optional[int] = None,
    ):
        super().__init__(
            ai_config=ai_config,
            command_registry=command_registry,
            config=config,
            default_cycle_instruction=triggering_prompt,
            cycle_budget=cycle_budget,
        )

        self.memory = memory
        """VectorMemoryProvider used to manage the agent's context (TODO)"""

        self.workspace = Workspace(config.workspace_path, config.restrict_to_workspace)
        """Workspace that the agent has access to, e.g. for reading/writing files."""

        self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
        """Timestamp the agent was created; only used for structured debug logging."""

        self.log_cycle_handler = LogCycleHandler()
        """LogCycleHandler for structured debug logging."""

        self.action_history = ActionHistory()

        self.context = AgentContext()
        """Dynamic segment of the prompt, to provide the LLM with relevant context"""

        self.plan: list[str] = []
        """List of steps that the Agent plans to take"""

    def construct_base_prompt(
        self, thought_process_id: ThoughtProcessID, **kwargs
    ) -> ChatSequence:
        prepend_messages = kwargs["prepend_messages"] = kwargs.get(
            "prepend_messages", []
        )

        # Add the current plan to the prompt, if any
        if self.plan:
            plan_section = [
                "## Plan",
                "To complete your task, you have composed the following plan:",
            ]
            plan_section += [f"{i}. {s}" for i, s in enumerate(self.plan, 1)]

            # Add the actions so far to the prompt
            if self.action_history:
                plan_section += [
                    "\n### Progress",
                    "So far, you have executed the following actions based on the plan:",
                ]
                for i, cycle in enumerate(self.action_history, 1):
                    if not (cycle.action and cycle.result):
                        logger.warn(f"Incomplete action in history: {cycle}")
                        continue

                    plan_section.append(
                        f"{i}. You executed the command `{cycle.action.format_call()}`, "
                        f"which gave the result `{cycle.result}`."
                    )

            prepend_messages.append(Message("system", "\n".join(plan_section)))

        if self.context:
            context_section = [
                "## Context",
                "Below is information that may be relevant to your task. These take up "
                "part of your working memory, which is limited, so when a context item is "
                "no longer relevant for your plan, use the `close_context_item` command to "
                "free up some memory."
                "\n",
                self.context.format_numbered(),
            ]
            prepend_messages.append(Message("system", "\n".join(context_section)))

        match thought_process_id:
            case "plan":
                # TODO: add planning instructions; details about what to pay attention to when planning
                pass
            case "action":
                # TODO: need to insert the functions here again?
                pass
            case "evaluate":
                # TODO: insert latest action (with reasoning) + result + evaluation instructions
                pass
            case _:
                raise NotImplementedError(
                    f"Unknown thought process '{thought_process_id}'"
                )

        return super().construct_base_prompt(
            thought_process_id=thought_process_id, **kwargs
        )

    def response_format_instruction(self, thought_process_id: ThoughtProcessID) -> str:
        match thought_process_id:
            case "plan":
                # TODO: add planning instructions; details about what to pay attention to when planning
                response_format = f"""```ts
                interface Response {{
                    thoughts: {{
                        // Thoughts
                        text: string;
                        // A short logical explanation about how the action is part of the earlier composed plan
                        reasoning: string;
                        // Constructive self-criticism
                        criticism: string;
                    }};
                    // A plan to achieve the goals with the available resources and/or commands.
                    plan: Array<{{
                        // An actionable subtask
                        subtask: string;
                        // Criterium to determine whether the subtask has been completed
                        completed_if: string;
                    }}>;
                }}
                ```"""
                pass
            case "action":
                # TODO: need to insert the functions here again?
                response_format = """```ts
                interface Response {
                    thoughts: {
                        // Thoughts
                        text: string;
                        // A short logical explanation about how the action is part of the earlier composed plan
                        reasoning: string;
                        // Constructive self-criticism
                        criticism: string;
                    };
                    // The action to take, from the earlier specified list of commands
                    command: {
                        name: string;
                        args: Record<string, any>;
                    };
                }
                ```"""
                pass
            case "evaluate":
                # TODO: insert latest action (with reasoning) + result + evaluation instructions
                response_format = f"""```ts
                interface Response {{
                    thoughts: {{
                        // Thoughts
                        text: string;
                        reasoning: string;
                        // Constructive self-criticism
                        criticism: string;
                    }};
                    result_evaluation: {{
                        // A short logical explanation of why the given partial result does or does not complete the corresponding subtask
                        reasoning: string;
                        // Whether the current subtask has been completed
                        completed: boolean;
                        // An estimate of the progress (0.0 - 1.0) that has been made on the subtask with the actions that have been taken so far
                        progress: float;
                    }};
                }}
                ```"""
                pass
            case _:
                raise NotImplementedError(
                    f"Unknown thought process '{thought_process_id}'"
                )

        response_format = re.sub(
            r"\n\s+",
            "\n",
            response_format,
        )

        return (
            f"Respond strictly with JSON. The JSON should be compatible with "
            "the TypeScript type `Response` from the following:\n"
            f"{response_format}\n"
        )

    def on_before_think(self, *args, **kwargs) -> ChatSequence:
        prompt = super().on_before_think(*args, **kwargs)

        self.log_cycle_handler.log_count_within_cycle = 0
        self.log_cycle_handler.log_cycle(
            self.ai_config.ai_name,
            self.created_at,
            self.cycle_count,
            self.action_history.cycles,
            "action_history.json",
        )
        self.log_cycle_handler.log_cycle(
            self.ai_config.ai_name,
            self.created_at,
            self.cycle_count,
            prompt.raw(),
            CURRENT_CONTEXT_FILE_NAME,
        )
        return prompt

    def execute(
        self,
        command_name: str,
        command_args: dict[str, str] = {},
        user_input: str = "",
    ) -> ActionResult:
        result: ActionResult

        if command_name == "human_feedback":
            result = ActionInterruptedByHuman(user_input)
            self.log_cycle_handler.log_cycle(
                self.ai_config.ai_name,
                self.created_at,
                self.cycle_count,
                user_input,
                USER_INPUT_FILE_NAME,
            )

        else:
            for plugin in self.config.plugins:
                if not plugin.can_handle_pre_command():
                    continue
                command_name, arguments = plugin.pre_command(command_name, command_args)

            try:
                return_value = execute_command(
                    command_name=command_name,
                    arguments=command_args,
                    agent=self,
                )

                # Intercept ContextItem if one is returned by the command
                if type(return_value) == tuple and isinstance(
                    return_value[1], ContextItem
                ):
                    self.context.add(return_value[1])
                    return_value = return_value[0]

                result = ActionSuccessResult(return_value)
            except AgentException as e:
                result = ActionErrorResult(e.message, e)

            result_tlength = count_string_tokens(str(result), self.llm.name)
            memory_tlength = count_string_tokens(
                str(self.history.summary_message()), self.llm.name
            )
            if result_tlength + memory_tlength > self.send_token_limit:
                result = ActionErrorResult(
                    reason=f"Command {command_name} returned too much output. "
                    "Do not execute this command again with the same arguments."
                )

            for plugin in self.config.plugins:
                if not plugin.can_handle_post_command():
                    continue
                if result.status == "success":
                    result.results = plugin.post_command(command_name, result.results)
                elif result.status == "error":
                    result.reason = plugin.post_command(command_name, result.reason)

        # Check if there's a result from the command append it to the message
        if result.status == "success":
            self.history.add(
                "system",
                f"Command {command_name} returned: {result.results}",
                "action_result",
            )
        elif result.status == "error":
            message = f"Command {command_name} failed: {result.reason}"
            if (
                result.error
                and isinstance(result.error, AgentException)
                and result.error.hint
            ):
                message = message.rstrip(".") + f". {result.error.hint}"
            self.history.add("system", message, "action_result")

        return result

    def parse_and_process_response(
        self,
        llm_response: ChatModelResponse,
        thought_process_id: ThoughtProcessID,
        *args,
        **kwargs,
    ) -> PlanningAgent.ThoughtProcessOutput:
        if not llm_response.content:
            raise InvalidAgentResponseError("Assistant response has no text content")

        response_content = llm_response.content

        for plugin in self.config.plugins:
            if not plugin.can_handle_post_planning():
                continue
            response_content = plugin.post_planning(response_content)

        assistant_reply_dict = extract_dict_from_response(response_content)

        _, errors = validate_dict(assistant_reply_dict, self.config)
        if errors:
            raise InvalidAgentResponseError(
                "Validation of response failed:\n  "
                + ";\n  ".join([str(e) for e in errors])
            )

        # Get command name and arguments
        command_name, arguments = extract_command(
            assistant_reply_dict, llm_response, self.config
        )
        response = command_name, arguments, assistant_reply_dict

        self.log_cycle_handler.log_cycle(
            self.ai_config.ai_name,
            self.created_at,
            self.cycle_count,
            assistant_reply_dict,
            NEXT_ACTION_FILE_NAME,
        )
        return response