Improve prompting and prompt generation infrastructure (#5076)

* Clean up prompt generation * Rename Performance Evaluations to Best Practices * Move specification of response format from system prompt to Agent.construct_base_prompt * Clean up PromptGenerator class * Add debug logging to AIConfig autogeneration * Clarify prompting and add support for multiple thought processes to Agent
2026-02-22 14:44:33 +01:00 · 2023-07-29 19:06:47 +02:00
parent c8d9b69ee2
commit 2eb346e06e
11 changed files with 239 additions and 154 deletions
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -293,10 +293,10 @@ def execute_command(
        # Handle non-native commands (e.g. from plugins)
        for command in agent.ai_config.prompt_generator.commands:
            if (
-                command_name == command["label"].lower()
-                or command_name == command["name"].lower()
+                command_name == command.label.lower()
+                or command_name == command.name.lower()
            ):
-                return command["function"](**arguments)
+                return command.function(**arguments)

        raise RuntimeError(
            f"Cannot execute '{command_name}': unknown command."
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -1,7 +1,8 @@
 from __future__ import annotations

+import re
 from abc import ABCMeta, abstractmethod
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any, Literal, Optional

 if TYPE_CHECKING:
    from autogpt.config import AIConfig, Config
@@ -23,6 +24,8 @@ AgentThoughts = dict[str, Any]
 class BaseAgent(metaclass=ABCMeta):
    """Base class for all Auto-GPT agents."""

+    ThoughtProcessID = Literal["one-shot"]
+
    def __init__(
        self,
        ai_config: AIConfig,
@@ -91,6 +94,7 @@ class BaseAgent(metaclass=ABCMeta):
    def think(
        self,
        instruction: Optional[str] = None,
+        thought_process_id: ThoughtProcessID = "one-shot",
    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
        """Runs the agent for one cycle.

@@ -103,8 +107,8 @@ class BaseAgent(metaclass=ABCMeta):

        instruction = instruction or self.default_cycle_instruction

-        prompt: ChatSequence = self.construct_prompt(instruction)
-        prompt = self.on_before_think(prompt, instruction)
+        prompt: ChatSequence = self.construct_prompt(instruction, thought_process_id)
+        prompt = self.on_before_think(prompt, thought_process_id, instruction)
        raw_response = create_chat_completion(
            prompt,
            self.config,
@@ -114,7 +118,7 @@ class BaseAgent(metaclass=ABCMeta):
        )
        self.cycle_count += 1

-        return self.on_response(raw_response, prompt, instruction)
+        return self.on_response(raw_response, thought_process_id, prompt, instruction)

    @abstractmethod
    def execute(
@@ -137,6 +141,7 @@ class BaseAgent(metaclass=ABCMeta):

    def construct_base_prompt(
        self,
+        thought_process_id: ThoughtProcessID,
        prepend_messages: list[Message] = [],
        append_messages: list[Message] = [],
        reserve_tokens: int = 0,
@@ -178,7 +183,11 @@ class BaseAgent(metaclass=ABCMeta):

        return prompt

-    def construct_prompt(self, cycle_instruction: str) -> ChatSequence:
+    def construct_prompt(
+        self,
+        cycle_instruction: str,
+        thought_process_id: ThoughtProcessID,
+    ) -> ChatSequence:
        """Constructs and returns a prompt with the following structure:
        1. System prompt
        2. Message history of the agent, truncated & prepended with running summary as needed
@@ -195,14 +204,86 @@ class BaseAgent(metaclass=ABCMeta):
        cycle_instruction_tlength = count_message_tokens(
            cycle_instruction_msg, self.llm.name
        )
-        prompt = self.construct_base_prompt(reserve_tokens=cycle_instruction_tlength)
+
+        append_messages: list[Message] = []
+
+        response_format_instr = self.response_format_instruction(thought_process_id)
+        if response_format_instr:
+            append_messages.append(Message("system", response_format_instr))
+
+        prompt = self.construct_base_prompt(
+            thought_process_id,
+            append_messages=append_messages,
+            reserve_tokens=cycle_instruction_tlength,
+        )

        # ADD user input message ("triggering prompt")
        prompt.append(cycle_instruction_msg)

        return prompt

-    def on_before_think(self, prompt: ChatSequence, instruction: str) -> ChatSequence:
+    # This can be expanded to support multiple types of (inter)actions within an agent
+    def response_format_instruction(self, thought_process_id: ThoughtProcessID) -> str:
+        if thought_process_id != "one-shot":
+            raise NotImplementedError(f"Unknown thought process '{thought_process_id}'")
+
+        RESPONSE_FORMAT_WITH_COMMAND = """```ts
+        interface Response {
+            thoughts: {
+                // Thoughts
+                text: string;
+                reasoning: string;
+                // Short markdown-style bullet list that conveys the long-term plan
+                plan: string;
+                // Constructive self-criticism
+                criticism: string;
+                // Summary of thoughts to say to the user
+                speak: string;
+            };
+            command: {
+                name: string;
+                args: Record<string, any>;
+            };
+        }
+        ```"""
+
+        RESPONSE_FORMAT_WITHOUT_COMMAND = """```ts
+        interface Response {
+            thoughts: {
+                // Thoughts
+                text: string;
+                reasoning: string;
+                // Short markdown-style bullet list that conveys the long-term plan
+                plan: string;
+                // Constructive self-criticism
+                criticism: string;
+                // Summary of thoughts to say to the user
+                speak: string;
+            };
+        }
+        ```"""
+
+        response_format = re.sub(
+            r"\n\s+",
+            "\n",
+            RESPONSE_FORMAT_WITHOUT_COMMAND
+            if self.config.openai_functions
+            else RESPONSE_FORMAT_WITH_COMMAND,
+        )
+
+        use_functions = self.config.openai_functions and self.command_registry.commands
+        return (
+            f"Respond strictly with JSON{', and also specify a command to use through a function_call' if use_functions else ''}. "
+            "The JSON should be compatible with the TypeScript type `Response` from the following:\n"
+            f"{response_format}\n"
+        )
+
+    def on_before_think(
+        self,
+        prompt: ChatSequence,
+        thought_process_id: ThoughtProcessID,
+        instruction: str,
+    ) -> ChatSequence:
        """Called after constructing the prompt but before executing it.

        Calls the `on_planning` hook of any enabled and capable plugins, adding their
@@ -237,7 +318,11 @@ class BaseAgent(metaclass=ABCMeta):
        return prompt

    def on_response(
-        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+        self,
+        llm_response: ChatModelResponse,
+        thought_process_id: ThoughtProcessID,
+        prompt: ChatSequence,
+        instruction: str,
    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
        """Called upon receiving a response from the chat model.

@@ -260,7 +345,9 @@ class BaseAgent(metaclass=ABCMeta):
        )  # FIXME: support function calls

        try:
-            return self.parse_and_process_response(llm_response, prompt, instruction)
+            return self.parse_and_process_response(
+                llm_response, thought_process_id, prompt, instruction
+            )
        except SyntaxError as e:
            logger.error(f"Response could not be parsed: {e}")
            # TODO: tune this message
@@ -275,7 +362,11 @@ class BaseAgent(metaclass=ABCMeta):

    @abstractmethod
    def parse_and_process_response(
-        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+        self,
+        llm_response: ChatModelResponse,
+        thought_process_id: ThoughtProcessID,
+        prompt: ChatSequence,
+        instruction: str,
    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
        """Validate, parse & process the LLM's response.

--- a/autogpt/app/setup.py
+++ b/autogpt/app/setup.py
@@ -83,6 +83,7 @@ def prompt_user(
                "Falling back to manual mode.",
                speak_text=True,
            )
+            logger.debug(f"Error during AIConfig generation: {e}")

            return generate_aiconfig_manual(config)

--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -1,7 +1,4 @@
-# sourcery skip: do-not-use-staticmethod
-"""
-A module that contains the AIConfig class object that contains the configuration
-"""
+"""A module that contains the AIConfig class object that contains the configuration"""
 from __future__ import annotations

 import platform
@@ -15,6 +12,8 @@ if TYPE_CHECKING:
    from autogpt.models.command_registry import CommandRegistry
    from autogpt.prompts.generator import PromptGenerator

+    from .config import Config
+

 class AIConfig:
    """
@@ -104,7 +103,7 @@ class AIConfig:
            yaml.dump(config, file, allow_unicode=True)

    def construct_full_prompt(
-        self, config, prompt_generator: Optional[PromptGenerator] = None
+        self, config: Config, prompt_generator: Optional[PromptGenerator] = None
    ) -> str:
        """
        Returns a prompt to the user with the class information in an organized fashion.
@@ -117,26 +116,27 @@ class AIConfig:
              including the ai_name, ai_role, ai_goals, and api_budget.
        """

-        prompt_start = (
-            "Your decisions must always be made independently without"
-            " seeking user assistance. Play to your strengths as an LLM and pursue"
-            " simple strategies with no legal complications."
-            ""
-        )
-
        from autogpt.prompts.prompt import build_default_prompt_generator

+        prompt_generator = prompt_generator or self.prompt_generator
        if prompt_generator is None:
            prompt_generator = build_default_prompt_generator(config)
-        prompt_generator.goals = self.ai_goals
-        prompt_generator.name = self.ai_name
-        prompt_generator.role = self.ai_role
-        prompt_generator.command_registry = self.command_registry
+            prompt_generator.command_registry = self.command_registry
+            self.prompt_generator = prompt_generator
+
        for plugin in config.plugins:
            if not plugin.can_handle_post_prompt():
                continue
            prompt_generator = plugin.post_prompt(prompt_generator)

+        # Construct full prompt
+        full_prompt_parts = [
+            f"You are {self.ai_name}, {self.ai_role.rstrip('.')}.",
+            "Your decisions must always be made independently without seeking "
+            "user assistance. Play to your strengths as an LLM and pursue "
+            "simple strategies with no legal complications.",
+        ]
+
        if config.execute_local_commands:
            # add OS info to prompt
            os_name = platform.system()
@@ -146,14 +146,30 @@ class AIConfig:
                else distro.name(pretty=True)
            )

-            prompt_start += f"\nThe OS you are running on is: {os_info}"
+            full_prompt_parts.append(f"The OS you are running on is: {os_info}")

-        # Construct full prompt
-        full_prompt = f"You are {prompt_generator.name}, {prompt_generator.role}\n{prompt_start}\n\nGOALS:\n\n"
-        for i, goal in enumerate(self.ai_goals):
-            full_prompt += f"{i+1}. {goal}\n"
+        additional_constraints: list[str] = []
        if self.api_budget > 0.0:
-            full_prompt += f"\nIt takes money to let you run. Your API budget is ${self.api_budget:.3f}"
-        self.prompt_generator = prompt_generator
-        full_prompt += f"\n\n{prompt_generator.generate_prompt_string(config)}"
-        return full_prompt
+            additional_constraints.append(
+                f"It takes money to let you run. "
+                f"Your API budget is ${self.api_budget:.3f}"
+            )
+
+        full_prompt_parts.append(
+            prompt_generator.generate_prompt_string(
+                additional_constraints=additional_constraints
+            )
+        )
+
+        if self.ai_goals:
+            full_prompt_parts.append(
+                "\n".join(
+                    [
+                        "## Goals",
+                        "For your task, you must fulfill the following goals:",
+                        *[f"{i+1}. {goal}" for i, goal in enumerate(self.ai_goals)],
+                    ]
+                )
+            )
+
+        return "\n\n".join(full_prompt_parts).strip("\n")
--- a/autogpt/config/prompt_config.py
+++ b/autogpt/config/prompt_config.py
@@ -44,4 +44,4 @@ class PromptConfig:

        self.constraints = config_params.get("constraints", [])
        self.resources = config_params.get("resources", [])
-        self.performance_evaluations = config_params.get("performance_evaluations", [])
+        self.best_practices = config_params.get("best_practices", [])
--- a/autogpt/llm/utils/init.py
+++ b/autogpt/llm/utils/init.py
@@ -154,6 +154,9 @@ def create_chat_completion(
            function.schema for function in functions
        ]

+    # Print full prompt to debug log
+    logger.debug(prompt.dump())
+
    response = iopenai.create_chat_completion(
        messages=prompt.raw(),
        **chat_completion_kwargs,
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,11 +1,8 @@
 """ A module for generating custom prompt strings."""
 from __future__ import annotations

-import json
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypedDict
-
-from autogpt.config import Config
-from autogpt.json_utils.utilities import llm_response_schema
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, Optional

 if TYPE_CHECKING:
    from autogpt.models.command_registry import CommandRegistry
@@ -17,34 +14,33 @@ class PromptGenerator:
        resources, and performance evaluations.
    """

-    class Command(TypedDict):
+    @dataclass
+    class Command:
        label: str
        name: str
        params: dict[str, str]
        function: Optional[Callable]

+        def __str__(self) -> str:
+            """Returns a string representation of the command."""
+            params_string = ", ".join(
+                f'"{key}": "{value}"' for key, value in self.params.items()
+            )
+            return f'{self.label}: "{self.name}", params: ({params_string})'
+
    constraints: list[str]
    commands: list[Command]
    resources: list[str]
-    performance_evaluation: list[str]
+    best_practices: list[str]
    command_registry: CommandRegistry | None

-    # TODO: replace with AIConfig
-    name: str
-    role: str
-    goals: list[str]
-
    def __init__(self):
        self.constraints = []
        self.commands = []
        self.resources = []
-        self.performance_evaluation = []
+        self.best_practices = []
        self.command_registry = None

-        self.name = "Bob"
-        self.role = "AI"
-        self.goals = []
-
    def add_constraint(self, constraint: str) -> None:
        """
        Add a constraint to the constraints list.
@@ -75,31 +71,15 @@ class PromptGenerator:
            function (callable, optional): A callable function to be called when
                the command is executed. Defaults to None.
        """
-        command_params = {name: type for name, type in params.items()}

-        command: PromptGenerator.Command = {
-            "label": command_label,
-            "name": command_name,
-            "params": command_params,
-            "function": function,
-        }
-
-        self.commands.append(command)
-
-    def _generate_command_string(self, command: Dict[str, Any]) -> str:
-        """
-        Generate a formatted string representation of a command.
-
-        Args:
-            command (dict): A dictionary containing command information.
-
-        Returns:
-            str: The formatted command string.
-        """
-        params_string = ", ".join(
-            f'"{key}": "{value}"' for key, value in command["params"].items()
+        self.commands.append(
+            PromptGenerator.Command(
+                label=command_label,
+                name=command_name,
+                params={name: type for name, type in params.items()},
+                function=function,
+            )
        )
-        return f'{command["label"]}: "{command["name"]}", params: {params_string}'

    def add_resource(self, resource: str) -> None:
        """
@@ -110,71 +90,67 @@ class PromptGenerator:
        """
        self.resources.append(resource)

-    def add_performance_evaluation(self, evaluation: str) -> None:
+    def add_best_practice(self, best_practice: str) -> None:
        """
-        Add a performance evaluation item to the performance_evaluation list.
+        Add an item to the list of best practices.

        Args:
-            evaluation (str): The evaluation item to be added.
+            best_practice (str): The best practice item to be added.
        """
-        self.performance_evaluation.append(evaluation)
+        self.best_practices.append(best_practice)

-    def _generate_numbered_list(self, items: List[Any], item_type="list") -> str:
+    def _generate_numbered_list(self, items: list[str], start_at: int = 1) -> str:
        """
-        Generate a numbered list from given items based on the item_type.
+        Generate a numbered list containing the given items.

        Args:
            items (list): A list of items to be numbered.
-            item_type (str, optional): The type of items in the list.
-                Defaults to 'list'.
+            start_at (int, optional): The number to start the sequence with; defaults to 1.

        Returns:
            str: The formatted numbered list.
        """
-        if item_type == "command":
-            command_strings = []
-            if self.command_registry:
-                command_strings += [
-                    str(item)
-                    for item in self.command_registry.commands.values()
-                    if item.enabled
-                ]
-            # terminate command is added manually
-            command_strings += [self._generate_command_string(item) for item in items]
-            return "\n".join(f"{i+1}. {item}" for i, item in enumerate(command_strings))
-        else:
-            return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
+        return "\n".join(f"{i}. {item}" for i, item in enumerate(items, start_at))

-    def generate_prompt_string(self, config: Config) -> str:
+    def generate_prompt_string(
+        self,
+        *,
+        additional_constraints: list[str] = [],
+        additional_resources: list[str] = [],
+        additional_best_practices: list[str] = [],
+    ) -> str:
        """
        Generate a prompt string based on the constraints, commands, resources,
-            and performance evaluations.
+            and best practices.

        Returns:
            str: The generated prompt string.
        """
+
        return (
-            f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
-            f"{generate_commands(self, config)}"
-            f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
-            "Performance Evaluation:\n"
-            f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
-            "Respond with only valid JSON conforming to the following schema: \n"
-            f"{json.dumps(llm_response_schema(config))}\n"
+            "## Constraints\n"
+            "You operate within the following constraints:\n"
+            f"{self._generate_numbered_list(self.constraints + additional_constraints)}\n\n"
+            "## Commands\n"
+            "You have access to the following commands:\n"
+            f"{self._generate_commands()}\n\n"
+            "## Resources\n"
+            "You can leverage access to the following resources:\n"
+            f"{self._generate_numbered_list(self.resources + additional_resources)}\n\n"
+            "## Best practices\n"
+            f"{self._generate_numbered_list(self.best_practices + additional_best_practices)}"
        )

+    def _generate_commands(self) -> str:
+        command_strings = []
+        if self.command_registry:
+            command_strings += [
+                str(cmd)
+                for cmd in self.command_registry.commands.values()
+                if cmd.enabled
+            ]

-def generate_commands(self, config: Config) -> str:
-    """
-    Generate a prompt string based on the constraints, commands, resources,
-        and performance evaluations.
+        # Add commands from plugins etc.
+        command_strings += [str(cmd) for cmd in self.commands]

-    Returns:
-        str: The generated prompt string.
-    """
-    if config.openai_functions:
-        return ""
-    return (
-        "Commands:\n"
-        f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
-    )
+        return self._generate_numbered_list(command_strings)
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -2,13 +2,17 @@ from autogpt.config.config import Config
 from autogpt.config.prompt_config import PromptConfig
 from autogpt.prompts.generator import PromptGenerator

-DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
+DEFAULT_TRIGGERING_PROMPT = (
+    "Determine exactly one command to use based on the given goals "
+    "and the progress you have made so far, "
+    "and respond using the JSON schema specified previously:"
+)


 def build_default_prompt_generator(config: Config) -> PromptGenerator:
    """
    This function generates a prompt string that includes various constraints,
-        commands, resources, and performance evaluations.
+        commands, resources, and best practices.

    Returns:
        str: The generated prompt string.
@@ -28,8 +32,8 @@ def build_default_prompt_generator(config: Config) -> PromptGenerator:
    for resource in prompt_config.resources:
        prompt_generator.add_resource(resource)

-    # Add performance evaluations to the PromptGenerator object
-    for performance_evaluation in prompt_config.performance_evaluations:
-        prompt_generator.add_performance_evaluation(performance_evaluation)
+    # Add best practices to the PromptGenerator object
+    for best_practice in prompt_config.best_practices:
+        prompt_generator.add_best_practice(best_practice)

    return prompt_generator
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -7,9 +7,10 @@ constraints: [
 resources: [
  'Internet access for searches and information gathering.',
  'Long Term memory management.',
-  'File output.'
+  'File output.',
+  'Command execution'
 ]
-performance_evaluations: [
+best_practices: [
  'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',
  'Constructively self-criticize your big-picture behavior constantly.',
  'Reflect on past decisions and strategies to refine your approach.',
--- a/tests/unit/test_prompt_config.py
+++ b/tests/unit/test_prompt_config.py
@@ -18,10 +18,10 @@ resources:
 - A test resource
 - Another test resource
 - A third test resource
-performance_evaluations:
- A test performance evaluation
- Another test performance evaluation
- A third test performance evaluation
+best_practices:
+- A test best-practice
+- Another test best-practice
+- A third test best-practice
 """
    prompt_settings_file = tmp_path / "test_prompt_settings.yaml"
    prompt_settings_file.write_text(yaml_content)
@@ -36,13 +36,7 @@ performance_evaluations:
    assert prompt_config.resources[0] == "A test resource"
    assert prompt_config.resources[1] == "Another test resource"
    assert prompt_config.resources[2] == "A third test resource"
-    assert len(prompt_config.performance_evaluations) == 3
-    assert prompt_config.performance_evaluations[0] == "A test performance evaluation"
-    assert (
-        prompt_config.performance_evaluations[1]
-        == "Another test performance evaluation"
-    )
-    assert (
-        prompt_config.performance_evaluations[2]
-        == "A third test performance evaluation"
-    )
+    assert len(prompt_config.best_practices) == 3
+    assert prompt_config.best_practices[0] == "A test best-practice"
+    assert prompt_config.best_practices[1] == "Another test best-practice"
+    assert prompt_config.best_practices[2] == "A third test best-practice"
--- a/tests/unit/test_prompt_generator.py
+++ b/tests/unit/test_prompt_generator.py
@@ -20,13 +20,12 @@ def test_add_command():
    params = {"arg1": "value1", "arg2": "value2"}
    generator = PromptGenerator()
    generator.add_command(command_label, command_name, params)
-    command = {
+    assert generator.commands[0].__dict__ == {
        "label": command_label,
        "name": command_name,
        "params": params,
        "function": None,
    }
-    assert command in generator.commands


 def test_add_resource():
@@ -39,18 +38,18 @@ def test_add_resource():
    assert resource in generator.resources


-def test_add_performance_evaluation():
+def test_add_best_practice():
    """
-    Test if the add_performance_evaluation() method adds an evaluation to the generator's
-    performance_evaluation list.
+    Test if the add_best_practice() method adds a best practice to the generator's
+    best_practices list.
    """
-    evaluation = "Evaluation1"
+    practice = "Practice1"
    generator = PromptGenerator()
-    generator.add_performance_evaluation(evaluation)
-    assert evaluation in generator.performance_evaluation
+    generator.add_best_practice(practice)
+    assert practice in generator.best_practices


-def test_generate_prompt_string(config):
+def test_generate_prompt_string():
    """
    Test if the generate_prompt_string() method generates a prompt string with all the added
    constraints, commands, resources, and evaluations.
@@ -82,10 +81,10 @@ def test_generate_prompt_string(config):
    for resource in resources:
        generator.add_resource(resource)
    for evaluation in evaluations:
-        generator.add_performance_evaluation(evaluation)
+        generator.add_best_practice(evaluation)

    # Generate the prompt string and verify its correctness
-    prompt_string = generator.generate_prompt_string(config)
+    prompt_string = generator.generate_prompt_string()
    assert prompt_string is not None

    # Check if all constraints, commands, resources, and evaluations are present in the prompt string