Release v0.4.5 (#4981)

2026-02-22 22:54:22 +01:00 · 2023-07-19 14:17:43 -04:00
parent 2240033b07 d76317fbf3
commit 9799ec5551
70 changed files with 1435 additions and 1116 deletions
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -27,8 +27,8 @@ jobs:
        with:
          ref: master

-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+      - name: Set up Python ${{ matrix.config.python-version }}
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.config.python-version }}

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,7 +33,7 @@ jobs:
          repository: ${{ github.event.pull_request.head.repo.full_name }}

      - name: Set up Python ${{ env.min-python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.min-python-version }}

@@ -132,7 +132,7 @@ jobs:
          fi

      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}

@@ -153,14 +153,16 @@ jobs:

      - name: Run pytest with coverage
        run: |
-          pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+          pytest -vv --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+            --numprocesses=logical --durations=10 \
            tests/unit tests/integration tests/challenges
          python tests/challenges/utils/build_current_score.py
        env:
          CI: true
-          PROXY: ${{ secrets.PROXY }}
-          AGENT_MODE: ${{ secrets.AGENT_MODE }}
-          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PROXY: ${{ github.event_name == 'pull_request_target' && secrets.PROXY || '' }}
+          AGENT_MODE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_MODE || '' }}
+          AGENT_TYPE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_TYPE || '' }}
+          OPENAI_API_KEY: ${{ github.event_name == 'pull_request' && secrets.OPENAI_API_KEY || '' }}
          PLAIN_OUTPUT: True

      - name: Upload coverage reports to Codecov
@@ -251,7 +253,7 @@ jobs:
            gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
          fi

-      - name: Upload logs as artifact
+      - name: Upload logs to artifact
        if: always()
        uses: actions/upload-artifact@v3
        with:
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -73,16 +73,13 @@ jobs:
      run: .github/workflows/scripts/docker-ci-summary.sh >> $GITHUB_STEP_SUMMARY
      continue-on-error: true

-  # Docker setup needs fixing before this is going to work: #1843
  test:
    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    needs: build
+    timeout-minutes: 10
    steps:
      - name: Check out repository
        uses: actions/checkout@v3
        with:
-          fetch-depth: 0
          submodules: true

      - name: Set up Docker Buildx
@@ -102,14 +99,15 @@ jobs:
      - id: test
        name: Run tests
        env:
-          PLAIN_OUTPUT: True
          CI: true
+          PLAIN_OUTPUT: True
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        run: |
          set +e
          test_output=$(
            docker run --env CI --env OPENAI_API_KEY --entrypoint python ${{ env.IMAGE_NAME }} -m \
-            pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing \
+            pytest -v --cov=autogpt --cov-branch --cov-report term-missing \
+              --numprocesses=4 --durations=10 \
              tests/unit tests/integration 2>&1
          )
          test_failure=$?
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,7 @@ last_run_ai_settings.yaml
 auto-gpt.json
 log.txt
 log-ingestion.txt
-logs
+/logs
 *.log
 *.mp3
 mem.sqlite3
--- a/BULLETIN.md
+++ b/BULLETIN.md
@@ -4,26 +4,23 @@
 📖 *User Guide*: https://docs.agpt.co.
 👩 *Contributors Wiki*: https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing.

-# v0.4.4 RELEASE HIGHLIGHTS! 🚀
+# v0.4.5 RELEASE HIGHLIGHTS! 🚀
 # -----------------------------
-## GPT-4 is back!
-Following OpenAI's recent GPT-4 GA announcement, the SMART_LLM .env setting 
-now defaults to GPT-4, and Auto-GPT will use GPT-4 by default in its main loop.
+This release includes under-the-hood improvements and bug fixes, such as more 
+accurate token counts for OpenAI functions, faster CI builds, improved plugin 
+handling, and refactoring of the Config class for better maintainability.

-### !! High Costs Warning !! 💰💀🚨
-GPT-4 costs ~20x more than GPT-3.5-turbo. 
-Please take note of this before using SMART_LLM. You can use `--gpt3only` 
-or `--gpt4only` to force the use of GPT-3.5-turbo or GPT-4, respectively, 
-at runtime.
+We have also released some documentation updates, including:

-## Re-arch v1 preview release!
-We've released a preview version of the re-arch code, under `autogpt/core`. 
-This is a major milestone for us, and we're excited to continue working on it. 
-We look forward to your feedback. Follow the process here: 
-https://github.com/Significant-Gravitas/Auto-GPT/issues/4770.
+- *How to share your system logs*
+  Visit [docs/share-your-logs.md] to learn how to how to share logs with us 
+  via a log analyzer graciously contributed by https://www.e2b.dev/

-## Other highlights
-Other fixes include plugins regressions, Azure config and security patches.
+- *Auto-GPT re-architecture documentation*
+  You can learn more about the inner-workings of the Auto-GPT re-architecture 
+  released last cycle, via these links:
+  * [autogpt/core/README.md]
+  * [autogpt/core/ARCHITECTURE_NOTES.md]

 Take a look at the Release Notes on Github for the full changelog! 
 https://github.com/Significant-Gravitas/Auto-GPT/releases.
--- a/autogpt/agent/init.py
+++ b/autogpt/agent/init.py
@@ -1,4 +0,0 @@
-from autogpt.agent.agent import Agent
-from autogpt.agent.agent_manager import AgentManager
-
-__all__ = ["Agent", "AgentManager"]
--- a/autogpt/agent/agent_manager.py
+++ b/autogpt/agent/agent_manager.py
@@ -1,145 +0,0 @@
-"""Agent manager for managing GPT agents"""
-from __future__ import annotations
-
-from autogpt.config import Config
-from autogpt.llm.base import ChatSequence
-from autogpt.llm.chat import Message, create_chat_completion
-from autogpt.singleton import Singleton
-
-
-class AgentManager(metaclass=Singleton):
-    """Agent manager for managing GPT agents"""
-
-    def __init__(self, config: Config):
-        self.next_key = 0
-        self.agents: dict[
-            int, tuple[str, list[Message], str]
-        ] = {}  # key, (task, full_message_history, model)
-        self.config = config
-
-    # Create new GPT agent
-    # TODO: Centralise use of create_chat_completion() to globally enforce token limit
-
-    def create_agent(
-        self, task: str, creation_prompt: str, model: str
-    ) -> tuple[int, str]:
-        """Create a new agent and return its key
-
-        Args:
-            task: The task to perform
-            creation_prompt: Prompt passed to the LLM at creation
-            model: The model to use to run this agent
-
-        Returns:
-            The key of the new agent
-        """
-        messages = ChatSequence.for_model(model, [Message("user", creation_prompt)])
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_pre_instruction():
-                continue
-            if plugin_messages := plugin.pre_instruction(messages.raw()):
-                messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
-        # Start GPT instance
-        agent_reply = create_chat_completion(
-            prompt=messages, config=self.config
-        ).content
-
-        messages.add("assistant", agent_reply)
-
-        plugins_reply = ""
-        for i, plugin in enumerate(self.config.plugins):
-            if not plugin.can_handle_on_instruction():
-                continue
-            if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
-                sep = "\n" if i else ""
-                plugins_reply = f"{plugins_reply}{sep}{plugin_result}"
-
-        if plugins_reply and plugins_reply != "":
-            messages.add("assistant", plugins_reply)
-        key = self.next_key
-        # This is done instead of len(agents) to make keys unique even if agents
-        # are deleted
-        self.next_key += 1
-
-        self.agents[key] = (task, list(messages), model)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_post_instruction():
-                continue
-            agent_reply = plugin.post_instruction(agent_reply)
-
-        return key, agent_reply
-
-    def message_agent(self, key: str | int, message: str) -> str:
-        """Send a message to an agent and return its response
-
-        Args:
-            key: The key of the agent to message
-            message: The message to send to the agent
-
-        Returns:
-            The agent's response
-        """
-        task, messages, model = self.agents[int(key)]
-
-        # Add user message to message history before sending to agent
-        messages = ChatSequence.for_model(model, messages)
-        messages.add("user", message)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_pre_instruction():
-                continue
-            if plugin_messages := plugin.pre_instruction([m.raw() for m in messages]):
-                messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
-
-        # Start GPT instance
-        agent_reply = create_chat_completion(
-            prompt=messages, config=self.config
-        ).content
-
-        messages.add("assistant", agent_reply)
-
-        plugins_reply = agent_reply
-        for i, plugin in enumerate(self.config.plugins):
-            if not plugin.can_handle_on_instruction():
-                continue
-            if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
-                sep = "\n" if i else ""
-                plugins_reply = f"{plugins_reply}{sep}{plugin_result}"
-        # Update full message history
-        if plugins_reply and plugins_reply != "":
-            messages.add("assistant", plugins_reply)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_post_instruction():
-                continue
-            agent_reply = plugin.post_instruction(agent_reply)
-
-        return agent_reply
-
-    def list_agents(self) -> list[tuple[str | int, str]]:
-        """Return a list of all agents
-
-        Returns:
-            A list of tuples of the form (key, task)
-        """
-
-        # Return a list of agent keys and their tasks
-        return [(key, task) for key, (task, _, _) in self.agents.items()]
-
-    def delete_agent(self, key: str | int) -> bool:
-        """Delete an agent from the agent manager
-
-        Args:
-            key: The key of the agent to delete
-
-        Returns:
-            True if successful, False otherwise
-        """
-
-        try:
-            del self.agents[int(key)]
-            return True
-        except KeyError:
-            return False
--- a/autogpt/agents/init.py
+++ b/autogpt/agents/init.py
@@ -0,0 +1,3 @@
+from .agent import Agent
+
+__all__ = ["Agent"]
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -9,16 +9,19 @@ from colorama import Fore, Style
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
+from autogpt.llm import ChatModelResponse
 from autogpt.llm.chat import chat_with_ai
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
-from autogpt.log_cycle.log_cycle import (
+from autogpt.logs import (
    FULL_MESSAGE_HISTORY_FILE_NAME,
    NEXT_ACTION_FILE_NAME,
    USER_INPUT_FILE_NAME,
    LogCycleHandler,
+    logger,
+    print_assistant_thoughts,
+    remove_ansi_escape,
 )
-from autogpt.logs import logger, print_assistant_thoughts, remove_ansi_escape
 from autogpt.memory.message_history import MessageHistory
 from autogpt.memory.vector import VectorMemory
 from autogpt.models.command_registry import CommandRegistry
@@ -70,7 +73,7 @@ class Agent:
    ):
        self.ai_name = ai_name
        self.memory = memory
-        self.history = MessageHistory(self)
+        self.history = MessageHistory.for_model(config.smart_llm, agent=self)
        self.next_action_count = next_action_count
        self.command_registry = command_registry
        self.config = config
@@ -84,9 +87,6 @@ class Agent:
        self.smart_token_limit = OPEN_AI_CHAT_MODELS.get(config.smart_llm).max_tokens

    def start_interaction_loop(self):
-        # Avoid circular imports
-        from autogpt.app import execute_command, extract_command
-
        # Interaction Loop
        self.cycle_count = 0
        command_name = None
@@ -167,8 +167,6 @@ class Agent:
                    if self.config.speak_mode:
                        say_text(f"I want to execute {command_name}", self.config)

-                    arguments = self._resolve_pathlike_command_args(arguments)
-
                except Exception as e:
                    logger.error("Error: \n", str(e))
            self.log_cycle_handler.log_cycle(
@@ -308,13 +306,93 @@ class Agent:
                    "SYSTEM: ", Fore.YELLOW, "Unable to execute command"
                )

-    def _resolve_pathlike_command_args(self, command_args):
-        if "directory" in command_args and command_args["directory"] in {"", "/"}:
-            command_args["directory"] = str(self.workspace.root)
-        else:
-            for pathlike in ["filename", "directory", "clone_path"]:
-                if pathlike in command_args:
-                    command_args[pathlike] = str(
-                        self.workspace.get_path(command_args[pathlike])
-                    )
-        return command_args
+
+def extract_command(
+    assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
+):
+    """Parse the response and return the command name and arguments
+
+    Args:
+        assistant_reply_json (dict): The response object from the AI
+        assistant_reply (ChatModelResponse): The model response from the AI
+        config (Config): The config object
+
+    Returns:
+        tuple: The command name and arguments
+
+    Raises:
+        json.decoder.JSONDecodeError: If the response is not valid JSON
+
+        Exception: If any other error occurs
+    """
+    if config.openai_functions:
+        if assistant_reply.function_call is None:
+            return "Error:", "No 'function_call' in assistant reply"
+        assistant_reply_json["command"] = {
+            "name": assistant_reply.function_call.name,
+            "args": json.loads(assistant_reply.function_call.arguments),
+        }
+    try:
+        if "command" not in assistant_reply_json:
+            return "Error:", "Missing 'command' object in JSON"
+
+        if not isinstance(assistant_reply_json, dict):
+            return (
+                "Error:",
+                f"The previous message sent was not a dictionary {assistant_reply_json}",
+            )
+
+        command = assistant_reply_json["command"]
+        if not isinstance(command, dict):
+            return "Error:", "'command' object is not a dictionary"
+
+        if "name" not in command:
+            return "Error:", "Missing 'name' field in 'command' object"
+
+        command_name = command["name"]
+
+        # Use an empty dictionary if 'args' field is not present in 'command' object
+        arguments = command.get("args", {})
+
+        return command_name, arguments
+    except json.decoder.JSONDecodeError:
+        return "Error:", "Invalid JSON"
+    # All other errors, return "Error: + error message"
+    except Exception as e:
+        return "Error:", str(e)
+
+
+def execute_command(
+    command_name: str,
+    arguments: dict[str, str],
+    agent: Agent,
+):
+    """Execute the command and return the result
+
+    Args:
+        command_name (str): The name of the command to execute
+        arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
+
+    Returns:
+        str: The result of the command
+    """
+    try:
+        # Execute a native command with the same name or alias, if it exists
+        if command := agent.command_registry.get_command(command_name):
+            return command(**arguments, agent=agent)
+
+        # Handle non-native commands (e.g. from plugins)
+        for command in agent.ai_config.prompt_generator.commands:
+            if (
+                command_name == command["label"].lower()
+                or command_name == command["name"].lower()
+            ):
+                return command["function"](**arguments)
+
+        raise RuntimeError(
+            f"Cannot execute '{command_name}': unknown command."
+            " Do not try to use this command again."
+        )
+    except Exception as e:
+        return f"Error: {str(e)}"
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -1,114 +0,0 @@
-""" Command and Control """
-import json
-from typing import Dict
-
-from autogpt.agent.agent import Agent
-from autogpt.config import Config
-from autogpt.llm import ChatModelResponse
-
-
-def is_valid_int(value: str) -> bool:
-    """Check if the value is a valid integer
-
-    Args:
-        value (str): The value to check
-
-    Returns:
-        bool: True if the value is a valid integer, False otherwise
-    """
-    try:
-        int(value)
-        return True
-    except ValueError:
-        return False
-
-
-def extract_command(
-    assistant_reply_json: Dict, assistant_reply: ChatModelResponse, config: Config
-):
-    """Parse the response and return the command name and arguments
-
-    Args:
-        assistant_reply_json (dict): The response object from the AI
-        assistant_reply (ChatModelResponse): The model response from the AI
-        config (Config): The config object
-
-    Returns:
-        tuple: The command name and arguments
-
-    Raises:
-        json.decoder.JSONDecodeError: If the response is not valid JSON
-
-        Exception: If any other error occurs
-    """
-    if config.openai_functions:
-        if assistant_reply.function_call is None:
-            return "Error:", "No 'function_call' in assistant reply"
-        assistant_reply_json["command"] = {
-            "name": assistant_reply.function_call.name,
-            "args": json.loads(assistant_reply.function_call.arguments),
-        }
-    try:
-        if "command" not in assistant_reply_json:
-            return "Error:", "Missing 'command' object in JSON"
-
-        if not isinstance(assistant_reply_json, dict):
-            return (
-                "Error:",
-                f"The previous message sent was not a dictionary {assistant_reply_json}",
-            )
-
-        command = assistant_reply_json["command"]
-        if not isinstance(command, dict):
-            return "Error:", "'command' object is not a dictionary"
-
-        if "name" not in command:
-            return "Error:", "Missing 'name' field in 'command' object"
-
-        command_name = command["name"]
-
-        # Use an empty dictionary if 'args' field is not present in 'command' object
-        arguments = command.get("args", {})
-
-        return command_name, arguments
-    except json.decoder.JSONDecodeError:
-        return "Error:", "Invalid JSON"
-    # All other errors, return "Error: + error message"
-    except Exception as e:
-        return "Error:", str(e)
-
-
-def execute_command(
-    command_name: str,
-    arguments: dict[str, str],
-    agent: Agent,
-):
-    """Execute the command and return the result
-
-    Args:
-        command_name (str): The name of the command to execute
-        arguments (dict): The arguments for the command
-        agent (Agent): The agent that is executing the command
-
-    Returns:
-        str: The result of the command
-    """
-    try:
-        # Execute a native command with the same name or alias, if it exists
-        if command := agent.command_registry.get_command(command_name):
-            return command(**arguments, agent=agent)
-
-        # Handle non-native commands (e.g. from plugins)
-        for command in agent.ai_config.prompt_generator.commands:
-            if (
-                command_name == command["label"].lower()
-                or command_name == command["name"].lower()
-            ):
-                return command["function"](**arguments)
-
-        raise RuntimeError(
-            f"Cannot execute '{command_name}': unknown command."
-            " Do not try to use this command again."
-        )
-    except Exception as e:
-        return f"Error: {str(e)}"
--- a/autogpt/commands/decorators.py
+++ b/autogpt/commands/decorators.py
@@ -0,0 +1,64 @@
+import functools
+from pathlib import Path
+from typing import Callable
+
+from autogpt.agents.agent import Agent
+from autogpt.logs import logger
+
+
+def sanitize_path_arg(arg_name: str):
+    def decorator(func: Callable):
+        # Get position of path parameter, in case it is passed as a positional argument
+        try:
+            arg_index = list(func.__annotations__.keys()).index(arg_name)
+        except ValueError:
+            raise TypeError(
+                f"Sanitized parameter '{arg_name}' absent or not annotated on function '{func.__name__}'"
+            )
+
+        # Get position of agent parameter, in case it is passed as a positional argument
+        try:
+            agent_arg_index = list(func.__annotations__.keys()).index("agent")
+        except ValueError:
+            raise TypeError(
+                f"Parameter 'agent' absent or not annotated on function '{func.__name__}'"
+            )
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            logger.debug(f"Sanitizing arg '{arg_name}' on function '{func.__name__}'")
+            logger.debug(f"Function annotations: {func.__annotations__}")
+
+            # Get Agent from the called function's arguments
+            agent = kwargs.get(
+                "agent", len(args) > agent_arg_index and args[agent_arg_index]
+            )
+            logger.debug(f"Args: {args}")
+            logger.debug(f"KWArgs: {kwargs}")
+            logger.debug(f"Agent argument lifted from function call: {agent}")
+            if not isinstance(agent, Agent):
+                raise RuntimeError("Could not get Agent from decorated command's args")
+
+            # Sanitize the specified path argument, if one is given
+            given_path: str | Path | None = kwargs.get(
+                arg_name, len(args) > arg_index and args[arg_index] or None
+            )
+            if given_path:
+                if given_path in {"", "/"}:
+                    sanitized_path = str(agent.workspace.root)
+                else:
+                    sanitized_path = str(agent.workspace.get_path(given_path))
+
+                if arg_name in kwargs:
+                    kwargs[arg_name] = sanitized_path
+                else:
+                    # args is an immutable tuple; must be converted to a list to update
+                    arg_list = list(args)
+                    arg_list[arg_index] = sanitized_path
+                    args = tuple(arg_list)
+
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -7,11 +7,13 @@ import docker
 from docker.errors import DockerException, ImageNotFound
 from docker.models.containers import Container as DockerContainer

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger

+from .decorators import sanitize_path_arg
+
 ALLOWLIST_CONTROL = "allowlist"
 DENYLIST_CONTROL = "denylist"

@@ -43,14 +45,14 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
    Returns:
        str: The STDOUT captured from the code when it ran
    """
-    ai_name = agent.ai_name
+    ai_name = agent.ai_config.ai_name
    code_dir = agent.workspace.get_path(Path(ai_name, "executed_code"))
    os.makedirs(code_dir, exist_ok=True)

    if not name.endswith(".py"):
        name = name + ".py"

-    # The `name` arg is not covered by Agent._resolve_pathlike_command_args(),
+    # The `name` arg is not covered by @sanitize_path_arg,
    # so sanitization must be done here to prevent path traversal.
    file_path = agent.workspace.get_path(code_dir / name)
    if not file_path.is_relative_to(code_dir):
@@ -76,6 +78,7 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
        },
    },
 )
+@sanitize_path_arg("filename")
 def execute_python_file(filename: str, agent: Agent) -> str:
    """Execute a Python file in a Docker container and return the output

@@ -100,6 +103,9 @@ def execute_python_file(filename: str, agent: Agent) -> str:
        )

    if we_are_running_in_a_docker_container():
+        logger.debug(
+            f"Auto-GPT is running in a Docker container; executing {file_path} directly..."
+        )
        result = subprocess.run(
            ["python", str(file_path)],
            capture_output=True,
@@ -111,6 +117,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
        else:
            return f"Error: {result.stderr}"

+    logger.debug("Auto-GPT is not running in a Docker container")
    try:
        client = docker.from_env()
        # You can replace this with the desired Python image/version
@@ -119,10 +126,10 @@ def execute_python_file(filename: str, agent: Agent) -> str:
        image_name = "python:3-alpine"
        try:
            client.images.get(image_name)
-            logger.warn(f"Image '{image_name}' found locally")
+            logger.debug(f"Image '{image_name}' found locally")
        except ImageNotFound:
            logger.info(
-                f"Image '{image_name}' not found locally, pulling from Docker Hub"
+                f"Image '{image_name}' not found locally, pulling from Docker Hub..."
            )
            # Use the low-level API to stream the pull response
            low_level_client = docker.APIClient()
@@ -135,6 +142,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
                elif status:
                    logger.info(status)

+        logger.debug(f"Running {file_path} in a {image_name} container...")
        container: DockerContainer = client.containers.run(
            image_name,
            ["python", str(file_path.relative_to(agent.workspace.root))],
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -1,20 +1,21 @@
 """File operations for AutoGPT"""
 from __future__ import annotations

+import contextlib
 import hashlib
 import os
 import os.path
+from pathlib import Path
 from typing import Generator, Literal

-from confection import Config
-
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
-from autogpt.commands.file_operations_utils import read_textual_file
-from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory

+from .decorators import sanitize_path_arg
+from .file_operations_utils import read_textual_file
+
 Operation = Literal["write", "append", "delete"]


@@ -74,21 +75,26 @@ def file_operations_state(log_path: str) -> dict[str, str]:
    return state


+@sanitize_path_arg("filename")
 def is_duplicate_operation(
-    operation: Operation, filename: str, config: Config, checksum: str | None = None
+    operation: Operation, filename: str, agent: Agent, checksum: str | None = None
 ) -> bool:
    """Check if the operation has already been performed

    Args:
        operation: The operation to check for
        filename: The name of the file to check for
-        config: The agent config
+        agent: The agent
        checksum: The checksum of the contents to be written

    Returns:
        True if the operation has already been performed on the file
    """
-    state = file_operations_state(config.file_logger_path)
+    # Make the filename into a relative path if possible
+    with contextlib.suppress(ValueError):
+        filename = str(Path(filename).relative_to(agent.workspace.root))
+
+    state = file_operations_state(agent.config.file_logger_path)
    if operation == "delete" and filename not in state:
        return True
    if operation == "write" and state.get(filename) == checksum:
@@ -96,8 +102,9 @@ def is_duplicate_operation(
    return False


+@sanitize_path_arg("filename")
 def log_operation(
-    operation: str, filename: str, agent: Agent, checksum: str | None = None
+    operation: Operation, filename: str, agent: Agent, checksum: str | None = None
 ) -> None:
    """Log the file operation to the file_logger.txt

@@ -106,6 +113,10 @@ def log_operation(
        filename: The name of the file the operation was performed on
        checksum: The checksum of the contents to be written
    """
+    # Make the filename into a relative path if possible
+    with contextlib.suppress(ValueError):
+        filename = str(Path(filename).relative_to(agent.workspace.root))
+
    log_entry = f"{operation}: {filename}"
    if checksum is not None:
        log_entry += f" #{checksum}"
@@ -126,6 +137,7 @@ def log_operation(
        }
    },
 )
+@sanitize_path_arg("filename")
 def read_file(filename: str, agent: Agent) -> str:
    """Read a file and return the contents

@@ -191,6 +203,7 @@ def ingest_file(
    },
    aliases=["write_file", "create_file"],
 )
+@sanitize_path_arg("filename")
 def write_to_file(filename: str, text: str, agent: Agent) -> str:
    """Write text to a file

@@ -202,7 +215,7 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
        str: A message indicating success or failure
    """
    checksum = text_checksum(text)
-    if is_duplicate_operation("write", filename, agent.config, checksum):
+    if is_duplicate_operation("write", filename, agent, checksum):
        return "Error: File has already been updated."
    try:
        directory = os.path.dirname(filename)
@@ -231,6 +244,7 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
        },
    },
 )
+@sanitize_path_arg("filename")
 def append_to_file(
    filename: str, text: str, agent: Agent, should_log: bool = True
 ) -> str:
@@ -271,6 +285,7 @@ def append_to_file(
        }
    },
 )
+@sanitize_path_arg("filename")
 def delete_file(filename: str, agent: Agent) -> str:
    """Delete a file

@@ -280,7 +295,7 @@ def delete_file(filename: str, agent: Agent) -> str:
    Returns:
        str: A message indicating success or failure
    """
-    if is_duplicate_operation("delete", filename, agent.config):
+    if is_duplicate_operation("delete", filename, agent):
        return "Error: File has already been deleted."
    try:
        os.remove(filename)
@@ -301,6 +316,7 @@ def delete_file(filename: str, agent: Agent) -> str:
        }
    },
 )
+@sanitize_path_arg("directory")
 def list_files(directory: str, agent: Agent) -> list[str]:
    """lists files in a directory recursively

--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -2,10 +2,12 @@

 from git.repo import Repo

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.url_utils.validators import validate_url

+from .decorators import sanitize_path_arg
+

@command(
    "clone_repository",
@@ -22,9 +24,10 @@ from autogpt.url_utils.validators import validate_url
            "required": True,
        },
    },
-    lambda config: config.github_username and config.github_api_key,
+    lambda config: bool(config.github_username and config.github_api_key),
    "Configure github_username and github_api_key.",
 )
+@sanitize_path_arg("clone_path")
@validate_url
 def clone_repository(url: str, clone_path: str, agent: Agent) -> str:
    """Clone a GitHub repository locally.
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -9,7 +9,7 @@ import openai
 import requests
 from PIL import Image

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger

@@ -24,7 +24,7 @@ from autogpt.logs import logger
            "required": True,
        },
    },
-    lambda config: config.image_provider,
+    lambda config: bool(config.image_provider),
    "Requires a image provider to be set.",
 )
 def generate_image(prompt: str, agent: Agent, size: int = 256) -> str:
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -3,7 +3,7 @@ from __future__ import annotations

 from typing import NoReturn

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger

--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -7,7 +7,7 @@ from itertools import islice

 from duckduckgo_search import DDGS

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command

 DUCKDUCKGO_MAX_ATTEMPTS = 3
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -27,7 +27,7 @@ from webdriver_manager.chrome import ChromeDriverManager
 from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -4,87 +4,145 @@ from __future__ import annotations
 import contextlib
 import os
 import re
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union

 import yaml
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from colorama import Fore
+from pydantic import Field, validator

 from autogpt.core.configuration.schema import Configurable, SystemSettings
 from autogpt.plugins.plugins_config import PluginsConfig

 AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml")
+PLUGINS_CONFIG_FILE = os.path.join(
+    os.path.dirname(__file__), "../..", "plugins_config.yaml"
+)
 GPT_4_MODEL = "gpt-4"
 GPT_3_MODEL = "gpt-3.5-turbo"


-class Config(SystemSettings):
-    fast_llm: str
-    smart_llm: str
-    continuous_mode: bool
-    skip_news: bool
+class Config(SystemSettings, arbitrary_types_allowed=True):
+    name: str = "Auto-GPT configuration"
+    description: str = "Default configuration for the Auto-GPT application."
+    ########################
+    # Application Settings #
+    ########################
+    skip_news: bool = False
+    skip_reprompt: bool = False
+    authorise_key: str = "y"
+    exit_key: str = "n"
+    debug_mode: bool = False
+    plain_output: bool = False
+    chat_messages_enabled: bool = True
+    # TTS configuration
+    speak_mode: bool = False
+    text_to_speech_provider: str = "gtts"
+    streamelements_voice: str = "Brian"
+    elevenlabs_voice_id: Optional[str] = None
+
+    ##########################
+    # Agent Control Settings #
+    ##########################
+    # Paths
+    ai_settings_file: str = "ai_settings.yaml"
+    prompt_settings_file: str = "prompt_settings.yaml"
    workspace_path: Optional[str] = None
    file_logger_path: Optional[str] = None
-    debug_mode: bool
-    plugins_dir: str
-    plugins_config: PluginsConfig
-    continuous_limit: int
-    speak_mode: bool
-    skip_reprompt: bool
-    allow_downloads: bool
-    exit_key: str
-    plain_output: bool
-    disabled_command_categories: list[str]
-    shell_command_control: str
-    shell_denylist: list[str]
-    shell_allowlist: list[str]
-    ai_settings_file: str
-    prompt_settings_file: str
-    embedding_model: str
-    browse_spacy_language_model: str
+    # Model configuration
+    fast_llm: str = "gpt-3.5-turbo"
+    smart_llm: str = "gpt-4"
+    temperature: float = 0
+    openai_functions: bool = False
+    embedding_model: str = "text-embedding-ada-002"
+    browse_spacy_language_model: str = "en_core_web_sm"
+    # Run loop configuration
+    continuous_mode: bool = False
+    continuous_limit: int = 0
+
+    ##########
+    # Memory #
+    ##########
+    memory_backend: str = "json_file"
+    memory_index: str = "auto-gpt-memory"
+    redis_host: str = "localhost"
+    redis_port: int = 6379
+    redis_password: str = ""
+    wipe_redis_on_start: bool = True
+
+    ############
+    # Commands #
+    ############
+    # General
+    disabled_command_categories: list[str] = Field(default_factory=list)
+    # File ops
+    restrict_to_workspace: bool = True
+    allow_downloads: bool = False
+    # Shell commands
+    shell_command_control: str = "denylist"
+    execute_local_commands: bool = False
+    shell_denylist: list[str] = Field(default_factory=lambda: ["sudo", "su"])
+    shell_allowlist: list[str] = Field(default_factory=list)
+    # Text to image
+    image_provider: Optional[str] = None
+    huggingface_image_model: str = "CompVis/stable-diffusion-v1-4"
+    sd_webui_url: Optional[str] = "http://localhost:7860"
+    image_size: int = 256
+    # Audio to text
+    audio_to_text_provider: str = "huggingface"
+    huggingface_audio_to_text_model: Optional[str] = None
+    # Web browsing
+    selenium_web_browser: str = "chrome"
+    selenium_headless: bool = True
+    user_agent: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
+
+    ###################
+    # Plugin Settings #
+    ###################
+    plugins_dir: str = "plugins"
+    plugins_config_file: str = PLUGINS_CONFIG_FILE
+    plugins_config: PluginsConfig = Field(
+        default_factory=lambda: PluginsConfig(plugins={})
+    )
+    plugins: list[AutoGPTPluginTemplate] = Field(default_factory=list, exclude=True)
+    plugins_allowlist: list[str] = Field(default_factory=list)
+    plugins_denylist: list[str] = Field(default_factory=list)
+    plugins_openai: list[str] = Field(default_factory=list)
+
+    ###############
+    # Credentials #
+    ###############
+    # OpenAI
    openai_api_key: Optional[str] = None
-    openai_organization: Optional[str] = None
-    temperature: float
-    use_azure: bool
-    azure_config_file: Optional[str] = None
-    azure_model_to_deployment_id_map: Optional[Dict[str, str]] = None
-    execute_local_commands: bool
-    restrict_to_workspace: bool
    openai_api_type: Optional[str] = None
    openai_api_base: Optional[str] = None
    openai_api_version: Optional[str] = None
-    openai_functions: bool
+    openai_organization: Optional[str] = None
+    use_azure: bool = False
+    azure_config_file: Optional[str] = AZURE_CONFIG_FILE
+    azure_model_to_deployment_id_map: Optional[Dict[str, str]] = None
+    # Elevenlabs
    elevenlabs_api_key: Optional[str] = None
-    streamelements_voice: str
-    text_to_speech_provider: str
+    # Github
    github_api_key: Optional[str] = None
    github_username: Optional[str] = None
+    # Google
    google_api_key: Optional[str] = None
    google_custom_search_engine_id: Optional[str] = None
-    image_provider: Optional[str] = None
-    image_size: int
+    # Huggingface
    huggingface_api_token: Optional[str] = None
-    huggingface_image_model: str
-    audio_to_text_provider: str
-    huggingface_audio_to_text_model: Optional[str] = None
-    sd_webui_url: Optional[str] = None
+    # Stable Diffusion
    sd_webui_auth: Optional[str] = None
-    selenium_web_browser: str
-    selenium_headless: bool
-    user_agent: str
-    memory_backend: str
-    memory_index: str
-    redis_host: str
-    redis_port: int
-    redis_password: str
-    wipe_redis_on_start: bool
-    plugins_allowlist: list[str]
-    plugins_denylist: list[str]
-    plugins_openai: list[str]
-    plugins_config_file: str
-    chat_messages_enabled: bool
-    elevenlabs_voice_id: Optional[str] = None
-    plugins: list[str]
-    authorise_key: str
+
+    @validator("plugins", each_item=True)
+    def validate_plugins(cls, p: AutoGPTPluginTemplate | Any):
+        assert issubclass(
+            p.__class__, AutoGPTPluginTemplate
+        ), f"{p} does not subclass AutoGPTPluginTemplate"
+        assert (
+            p.__class__.__name__ != "AutoGPTPluginTemplate"
+        ), f"Plugins must subclass AutoGPTPluginTemplate; {p} is a template instance"
+        return p

    def get_openai_credentials(self, model: str) -> dict[str, str]:
        credentials = {
@@ -149,73 +207,7 @@ class Config(SystemSettings):


 class ConfigBuilder(Configurable[Config]):
-    default_plugins_config_file = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "..", "..", "plugins_config.yaml"
-    )
-
-    elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
-    if os.getenv("USE_MAC_OS_TTS"):
-        default_tts_provider = "macos"
-    elif elevenlabs_api_key:
-        default_tts_provider = "elevenlabs"
-    elif os.getenv("USE_BRIAN_TTS"):
-        default_tts_provider = "streamelements"
-    else:
-        default_tts_provider = "gtts"
-
-    default_settings = Config(
-        name="Default Server Config",
-        description="This is a default server configuration",
-        smart_llm="gpt-4",
-        fast_llm="gpt-3.5-turbo",
-        continuous_mode=False,
-        continuous_limit=0,
-        skip_news=False,
-        debug_mode=False,
-        plugins_dir="plugins",
-        plugins_config=PluginsConfig(plugins={}),
-        speak_mode=False,
-        skip_reprompt=False,
-        allow_downloads=False,
-        exit_key="n",
-        plain_output=False,
-        disabled_command_categories=[],
-        shell_command_control="denylist",
-        shell_denylist=["sudo", "su"],
-        shell_allowlist=[],
-        ai_settings_file="ai_settings.yaml",
-        prompt_settings_file="prompt_settings.yaml",
-        embedding_model="text-embedding-ada-002",
-        browse_spacy_language_model="en_core_web_sm",
-        temperature=0,
-        use_azure=False,
-        azure_config_file=AZURE_CONFIG_FILE,
-        execute_local_commands=False,
-        restrict_to_workspace=True,
-        openai_functions=False,
-        streamelements_voice="Brian",
-        text_to_speech_provider=default_tts_provider,
-        image_size=256,
-        huggingface_image_model="CompVis/stable-diffusion-v1-4",
-        audio_to_text_provider="huggingface",
-        sd_webui_url="http://localhost:7860",
-        selenium_web_browser="chrome",
-        selenium_headless=True,
-        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
-        memory_backend="json_file",
-        memory_index="auto-gpt-memory",
-        redis_host="localhost",
-        redis_port=6379,
-        wipe_redis_on_start=True,
-        plugins_allowlist=[],
-        plugins_denylist=[],
-        plugins_openai=[],
-        plugins_config_file=default_plugins_config_file,
-        chat_messages_enabled=True,
-        plugins=[],
-        authorise_key="y",
-        redis_password="",
-    )
+    default_settings = Config()

    @classmethod
    def build_config_from_env(cls) -> Config:
@@ -285,14 +277,19 @@ class ConfigBuilder(Configurable[Config]):
        config_dict["elevenlabs_voice_id"] = os.getenv(
            "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
        )
+        elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
+        if os.getenv("USE_MAC_OS_TTS"):
+            default_tts_provider = "macos"
+        elif elevenlabs_api_key:
+            default_tts_provider = "elevenlabs"
+        elif os.getenv("USE_BRIAN_TTS"):
+            default_tts_provider = "streamelements"
+        else:
+            default_tts_provider = "gtts"
+        config_dict["text_to_speech_provider"] = default_tts_provider

        config_dict["plugins_allowlist"] = _safe_split(os.getenv("ALLOWLISTED_PLUGINS"))
        config_dict["plugins_denylist"] = _safe_split(os.getenv("DENYLISTED_PLUGINS"))
-        config_dict["plugins_config"] = PluginsConfig.load_config(
-            config_dict["plugins_config_file"],
-            config_dict["plugins_denylist"],
-            config_dict["plugins_allowlist"],
-        )

        with contextlib.suppress(TypeError):
            config_dict["image_size"] = int(os.getenv("IMAGE_SIZE"))
@@ -316,7 +313,17 @@ class ConfigBuilder(Configurable[Config]):
            k: v for k, v in config_dict.items() if v is not None
        }

-        return cls.build_agent_configuration(config_dict_without_none_values)
+        config = cls.build_agent_configuration(config_dict_without_none_values)
+
+        # Set secondary config variables (that depend on other config variables)
+
+        config.plugins_config = PluginsConfig.load_config(
+            config.plugins_config_file,
+            config.plugins_denylist,
+            config.plugins_allowlist,
+        )
+
+        return config

    @classmethod
    def load_azure_config(cls, config_file: str = AZURE_CONFIG_FILE) -> Dict[str, str]:
@@ -365,7 +372,7 @@ def check_openai_api_key(config: Config) -> None:
            print(
                Fore.GREEN
                + "OpenAI API key successfully set!\n"
-                + Fore.ORANGE
+                + Fore.YELLOW
                + "NOTE: The API key you've set is only temporary.\n"
                + "For longer sessions, please set it in .env file"
                + Fore.RESET
--- a/autogpt/core/ARCHITECTURE_NOTES.md
+++ b/autogpt/core/ARCHITECTURE_NOTES.md
@@ -0,0 +1,272 @@
+# Re-architecture Notes
+
+## Key Documents
+
+- [Planned Agent Workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ)
+- [Original Architecture Diagram](https://www.figma.com/file/fwdj44tPR7ArYtnGGUKknw/Modular-Architecture?type=whiteboard&node-id=0-1) - This is sadly well out of date at this point.
+- [Kanban](https://github.com/orgs/Significant-Gravitas/projects/1/views/1?filterQuery=label%3Are-arch)
+
+## The Motivation
+
+The `master` branch of Auto-GPT is an organically grown amalgamation of many thoughts 
+and ideas about agent-driven autonomous systems.  It lacks clear abstraction boundaries, 
+has issues of global state and poorly encapsulated state, and is generally just hard to 
+make effective changes to.  Mainly it's just a system that's hard to make changes to.  
+And research in the field is moving fast, so we want to be able to try new ideas 
+quickly.  
+
+## Initial Planning
+
+A large group of maintainers and contributors met do discuss the architectural 
+challenges associated with the existing codebase. Many much-desired features (building 
+new user interfaces, enabling project-specific agents, enabling multi-agent systems) 
+are bottlenecked by the global state in the system. We discussed the tradeoffs between 
+an incremental system transition and a big breaking version change and decided to go 
+for the breaking version change. We justified this by saying:
+
+- We can maintain, in essence, the same user experience as now even with a radical 
+  restructuring of the codebase
+- Our developer audience is struggling to use the existing codebase to build 
+  applications and libraries of their own, so this breaking change will largely be 
+  welcome.
+
+## Primary Goals
+
+- Separate the AutoGPT application code from the library code.
+- Remove global state from the system
+- Allow for multiple agents per user (with facilities for running simultaneously)
+- Create a serializable representation of an Agent
+- Encapsulate the core systems in abstractions with clear boundaries.
+
+## Secondary goals
+
+- Use existing tools to ditch any unneccesary cruft in the codebase (document loading, 
+  json parsing, anything easier to replace than to port).
+- Bring in the [core agent loop updates](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ)
+  being developed simultaneously by @Pwuts 
+
+# The Agent Subsystems
+
+## Configuration
+
+We want a lot of things from a configuration system. We lean heavily on it in the 
+`master` branch to allow several parts of the system to communicate with each other.  
+[Recent work](https://github.com/Significant-Gravitas/Auto-GPT/pull/4737) has made it 
+so that the config is no longer a singleton object that is materialized from the import 
+state, but it's still treated as a 
+[god object](https://en.wikipedia.org/wiki/God_object) containing all information about
+the system and _critically_ allowing any system to reference configuration information 
+about other parts of the system.  
+
+### What we want
+
+- It should still be reasonable to collate the entire system configuration in a 
+  sensible way.
+- The configuration should be validatable and validated.
+- The system configuration should be a _serializable_ representation of an `Agent`.
+- The configuration system should provide a clear (albeit very low-level) contract 
+  about user-configurable aspects of the system.
+- The configuration should reasonably manage default values and user-provided overrides.
+- The configuration system needs to handle credentials in a reasonable way.
+- The configuration should be the representation of some amount of system state, like 
+  api budgets and resource usage.  These aspects are recorded in the configuration and 
+  updated by the system itself.
+- Agent systems should have encapsulated views of the configuration.  E.g. the memory 
+  system should know about memory configuration but nothing about command configuration.
+
+## Workspace
+
+There are two ways to think about the workspace:
+
+- The workspace is a scratch space for an agent where it can store files, write code, 
+  and do pretty much whatever else it likes.
+- The workspace is, at any given point in time, the single source of truth for what an 
+  agent is.  It contains the serializable state (the configuration) as well as all 
+  other working state (stored files, databases, memories, custom code).  
+
+In the existing system there is **one** workspace.  And because the workspace holds so 
+much agent state, that means a user can only work with one agent at a time.
+
+## Memory
+
+The memory system has been under extremely active development. 
+See [#3536](https://github.com/Significant-Gravitas/Auto-GPT/issues/3536) and 
+[#4208](https://github.com/Significant-Gravitas/Auto-GPT/pull/4208) for discussion and 
+work in the `master` branch.  The TL;DR is 
+that we noticed a couple of months ago that the `Agent` performed **worse** with 
+permanent memory than without it.  Since then the knowledge storage and retrieval 
+system has been [redesigned](https://whimsical.com/memory-system-8Ae6x6QkjDwQAUe9eVJ6w1) 
+and partially implemented in the `master` branch.
+
+## Planning/Prompt-Engineering
+
+The planning system is the system that translates user desires/agent intentions into
+language model prompts.  In the course of development, it has become pretty clear 
+that `Planning` is the wrong name for this system
+
+### What we want
+
+- It should be incredibly obvious what's being passed to a language model, when it's
+  being passed, and what the language model response is. The landscape of language 
+  model research is developing very rapidly, so building complex abstractions between 
+  users/contributors and the language model interactions is going to make it very 
+  difficult for us to nimbly respond to new research developments.
+- Prompt-engineering should ideally be exposed in a parameterizeable way to users. 
+- We should, where possible, leverage OpenAI's new  
+  [function calling api](https://openai.com/blog/function-calling-and-other-api-updates) 
+  to get outputs in a standard machine-readable format and avoid the deep pit of 
+  parsing json (and fixing unparsable json).
+
+### Planning Strategies
+
+The [new agent workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ) 
+has many, many interaction points for language models.  We really would like to not 
+distribute prompt templates and raw strings all through the system. The re-arch solution 
+is to encapsulate language model interactions into planning strategies. 
+These strategies are defined by 
+
+- The `LanguageModelClassification` they use (`FAST` or `SMART`)
+- A function `build_prompt` that takes strategy specific arguments and constructs a 
+  `LanguageModelPrompt` (a simple container for lists of messages and functions to
+  pass to the language model)
+- A function `parse_content` that parses the response content (a dict) into a better 
+  formatted dict.  Contracts here are intentionally loose and will tighten once we have 
+  at least one other language model provider.
+
+## Resources
+
+Resources are kinds of services we consume from external APIs.  They may have associated 
+credentials and costs we need to manage.  Management of those credentials is implemented 
+as manipulation of the resource configuration.  We have two categories of resources 
+currently
+
+- AI/ML model providers (including language model providers and embedding model providers, ie OpenAI)
+- Memory providers (e.g. Pinecone, Weaviate, ChromaDB, etc.)
+
+### What we want
+
+- Resource abstractions should provide a common interface to different service providers 
+  for a particular kind of service.  
+- Resource abstractions should manipulate the configuration to manage their credentials 
+  and budget/accounting.
+- Resource abstractions should be composable over an API (e.g. I should be able to make 
+  an OpenAI provider that is both a LanguageModelProvider and an EmbeddingModelProvider
+  and use it wherever I need those services).
+
+## Abilities
+
+Along with planning and memory usage, abilities are one of the major augmentations of 
+augmented language models.  They allow us to expand the scope of what language models
+can do by hooking them up to code they can execute to obtain new knowledge or influence
+the world.  
+
+### What we want
+
+- Abilities should have an extremely clear interface that users can write to.
+- Abilities should have an extremely clear interface that a language model can 
+  understand
+- Abilities should be declarative about their dependencies so the system can inject them
+- Abilities should be executable (where sensible) in an async run loop.
+- Abilities should be not have side effects unless those side effects are clear in 
+  their representation to an agent (e.g. the BrowseWeb ability shouldn't write a file,
+  but the WriteFile ability can).
+
+## Plugins
+
+Users want to add lots of features that we don't want to support as first-party. 
+Or solution to this is a plugin system to allow users to plug in their functionality or
+to construct their agent from a public plugin marketplace.  Our primary concern in the
+re-arch is to build a stateless plugin service interface and a simple implementation 
+that can load plugins from installed packages or from zip files.  Future efforts will 
+expand this system to allow plugins to load from a marketplace or some other kind 
+of service.
+
+### What is a Plugin
+
+Plugins are a kind of garbage term.  They refer to a number of things.
+
+- New commands for the agent to execute.  This is the most common usage.
+- Replacements for entire subsystems like memory or language model providers
+- Application plugins that do things like send emails or communicate via whatsapp
+- The repositories contributors create that may themselves have multiple plugins in them.
+
+### Usage in the existing system
+
+The current plugin system is _hook-based_.  This means plugins don't correspond to 
+kinds of objects in the system, but rather to times in the system at which we defer 
+execution to them.  The main advantage of this setup is that user code can hijack 
+pretty much any behavior of the agent by injecting code that supercedes the normal 
+agent execution.  The disadvantages to this approach are numerous:
+
+- We have absolutely no mechanisms to enforce any security measures because the threat 
+  surface is everything.
+- We cannot reason about agent behavior in a cohesive way because control flow can be
+  ceded to user code at pretty much any point and arbitrarily change or break the
+  agent behavior
+- The interface for designing a plugin is kind of terrible and difficult to standardize
+- The hook based implementation means we couple ourselves to a particular flow of 
+  control (or otherwise risk breaking plugin behavior).  E.g. many of the hook targets
+  in the [old workflow](https://whimsical.com/agent-workflow-VAzeKcup3SR7awpNZJKTyK) 
+  are not present or mean something entirely different in the 
+  [new workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ).
+- Etc.
+
+### What we want
+
+- A concrete definition of a plugin that is narrow enough in scope that we can define 
+  it well and reason about how it will work in the system.
+- A set of abstractions that let us define a plugin by its storage format and location 
+- A service interface that knows how to parse the plugin abstractions and turn them 
+  into concrete classes and objects.
+
+
+## Some Notes on how and why we'll use OO in this project
+
+First and foremost, Python itself is an object-oriented language. It's 
+underlying [data model](https://docs.python.org/3/reference/datamodel.html) is built 
+with object-oriented programming in mind. It offers useful tools like abstract base 
+classes to communicate interfaces to developers who want to, e.g., write plugins, or 
+help work on implementations. If we were working in a different language that offered 
+different tools, we'd use a different paradigm.
+
+While many things are classes in the re-arch, they are not classes in the same way. 
+There are three kinds of things (roughly) that are written as classes in the re-arch:
+1.  **Configuration**:  Auto-GPT has *a lot* of configuration.  This configuration 
+    is *data* and we use **[Pydantic](https://docs.pydantic.dev/latest/)** to manage it as 
+    pydantic is basically industry standard for this stuff. It provides runtime validation 
+    for all the configuration and allows us to easily serialize configuration to both basic 
+    python types (dicts, lists, and primatives) as well as serialize to json, which is 
+    important for us being able to put representations of agents 
+    [on the wire](https://en.wikipedia.org/wiki/Wire_protocol) for web applications and 
+    agent-to-agent communication. *These are essentially 
+    [structs](https://en.wikipedia.org/wiki/Struct_(C_programming_language)) rather than 
+    traditional classes.*
+2.  **Internal Data**: Very similar to configuration, Auto-GPT passes around boatloads 
+    of internal data.  We are interacting with language models and language model APIs 
+    which means we are handling lots of *structured* but *raw* text.  Here we also 
+    leverage **pydantic** to both *parse* and *validate* the internal data and also to 
+    give us concrete types which we can use static type checkers to validate against 
+    and discover problems before they show up as bugs at runtime. *These are 
+    essentially [structs](https://en.wikipedia.org/wiki/Struct_(C_programming_language)) 
+    rather than traditional classes.*
+3.  **System Interfaces**: This is our primary traditional use of classes in the 
+    re-arch.  We have a bunch of systems. We want many of those systems to have 
+    alternative implementations (e.g. via plugins). We use abstract base classes to 
+    define interfaces to communicate with people who might want to provide those 
+    plugins. We provide a single concrete implementation of most of those systems as a 
+    subclass of the interface. This should not be controversial.
+
+The approach is consistent with 
+[prior](https://github.com/Significant-Gravitas/Auto-GPT/issues/2458)
+[work](https://github.com/Significant-Gravitas/Auto-GPT/pull/2442) done by other 
+maintainers in this direction.
+
+From an organization standpoint, OO programming is by far the most popular programming 
+paradigm (especially for Python). It's the one most often taught in programming classes
+and the one with the most available online training for people interested in 
+contributing.   
+
+Finally, and importantly, we scoped the plan and initial design of the re-arch as a 
+large group of maintainers and collaborators early on. This is consistent with the 
+design we chose and no-one offered alternatives.
+ 
--- a/autogpt/core/README.md
+++ b/autogpt/core/README.md
@@ -1,7 +1,33 @@
-# Run instructions
+# Auto-GPT Core
+
+This subpackage contains the ongoing work for the 
+[Auto-GPT Re-arch](https://github.com/Significant-Gravitas/Auto-GPT/issues/4770). It is 
+a work in progress and is not yet feature complete.  In particular, it does not yet
+have many of the Auto-GPT commands implemented and is pending ongoing work to 
+[re-incorporate vector-based memory and knowledge retrieval](https://github.com/Significant-Gravitas/Auto-GPT/issues/3536).
+
+## [Overview](ARCHITECTURE_NOTES.md)
+
+The Auto-GPT Re-arch is a re-implementation of the Auto-GPT agent that is designed to be more modular,
+more extensible, and more maintainable than the original Auto-GPT agent.  It is also designed to be
+more accessible to new developers and to be easier to contribute to. The re-arch is a work in progress
+and is not yet feature complete.  It is also not yet ready for production use.
+
+## Running the Re-arch Code

 There are two client applications for Auto-GPT included. 

+Unlike the main version of Auto-GPT, the re-arch requires you to actually install Auto-GPT in your python 
+environment to run this application.  To do so, run
+
+```
+pip install -e REPOSITORY_ROOT
+```
+
+where `REPOSITORY_ROOT` is the root of the Auto-GPT repository on your machine. The `REPOSITORY_ROOT` 
+is the directory that contains the `setup.py` file and is the main, top-level directory of the repository 
+when you clone it.
+
 ## CLI Application

 :star2: **This is the reference application I'm working with for now** :star2: 
@@ -11,21 +37,23 @@ The first app is a straight CLI application.  I have not done anything yet to po
 - [Entry Point](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_app/cli.py)
 - [Client Application](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_app/main.py)

-Auto-GPT must be installed in your python environment to run this application.  To do so, run
-
-```
-pip install -e REPOSITORY_ROOT
-```
-
-where `REPOSITORY_ROOT` is the root of the Auto-GPT repository on your machine.
-
 You'll then need a settings file.  Run

 ```
 python REPOSITORY_ROOT/autogpt/core/runner/cli_app/cli.py make-settings
 ```

-This will write a file called `default_agent_settings.yaml` with all the user-modifiable configuration keys to `~/auto-gpt/default_agent_settings.yml` and make the `auto-gpt` directory in your user directory if it doesn't exist).  At a bare minimum, you'll need to set `openai.credentials.api_key` to your OpenAI API Key to run the model.
+This will write a file called `default_agent_settings.yaml` with all the user-modifiable 
+configuration keys to `~/auto-gpt/default_agent_settings.yml` and make the `auto-gpt` directory 
+in your user directory if it doesn't exist). Your user directory is located in different places 
+depending on your operating system:
+
+- On Linux, it's `/home/USERNAME`
+- On Windows, it's `C:\Users\USERNAME`
+- On Mac, it's `/Users/USERNAME`
+
+At a bare minimum, you'll need to set `openai.credentials.api_key` to your OpenAI API Key to run 
+the model.

 You can then run Auto-GPT with 

@@ -35,9 +63,15 @@ python REPOSITORY_ROOT/autogpt/core/runner/cli_app/cli.py run

 to launch the interaction loop.

-## CLI Web App
+### CLI Web App

-The second app is still a CLI, but it sets up a local webserver that the client application talks to rather than invoking calls to the Agent library code directly.  This application is essentially a sketch at this point as the folks who were driving it have had less time (and likely not enough clarity) to proceed.
+:warning: I am not actively developing this application.  I am primarily working with the traditional CLI app
+described above.  It is a very good place to get involved if you have web application design experience and are 
+looking to get involved in the re-arch.
+
+The second app is still a CLI, but it sets up a local webserver that the client application talks to
+rather than invoking calls to the Agent library code directly.  This application is essentially a sketch 
+at this point as the folks who were driving it have had less time (and likely not enough clarity) to proceed.

 - [Entry Point](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_web_app/cli.py)
 - [Client Application](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_web_app/client/client.py)
@@ -58,5 +92,3 @@ python REPOSITORY_ROOT/autogpt/core/runner/cli_web_app/cli.py client
 ```

 This will launch a webserver and then start the client cli application to communicate with it.
-
-:warning: I am not actively developing this application.  It is a very good place to get involved if you have web application design experience and are looking to get involved in the re-arch.
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -1,13 +1,14 @@
 from __future__ import annotations

+from copy import deepcopy
 from dataclasses import dataclass, field
 from math import ceil, floor
-from typing import TYPE_CHECKING, List, Literal, Optional, TypedDict
+from typing import TYPE_CHECKING, Literal, Optional, Type, TypedDict, TypeVar, overload

 if TYPE_CHECKING:
    from autogpt.llm.providers.openai import OpenAIFunctionCall

-MessageRole = Literal["system", "user", "assistant"]
+MessageRole = Literal["system", "user", "assistant", "function"]
 MessageType = Literal["ai_response", "action_result"]

 TText = list[int]
@@ -19,6 +20,17 @@ class MessageDict(TypedDict):
    content: str


+class ResponseMessageDict(TypedDict):
+    role: Literal["assistant"]
+    content: Optional[str]
+    function_call: Optional[FunctionCallDict]
+
+
+class FunctionCallDict(TypedDict):
+    name: str
+    arguments: str
+
+
@dataclass
 class Message:
    """OpenAI Message object containing a role and the message content"""
@@ -68,15 +80,31 @@ class EmbeddingModelInfo(ModelInfo):
    embedding_dimensions: int


+# Can be replaced by Self in Python 3.11
+TChatSequence = TypeVar("TChatSequence", bound="ChatSequence")
+
+
@dataclass
 class ChatSequence:
    """Utility container for a chat sequence"""

    model: ChatModelInfo
-    messages: list[Message] = field(default_factory=list)
+    messages: list[Message] = field(default_factory=list[Message])

-    def __getitem__(self, i: int):
-        return self.messages[i]
+    @overload
+    def __getitem__(self, key: int) -> Message:
+        ...
+
+    @overload
+    def __getitem__(self: TChatSequence, key: slice) -> TChatSequence:
+        ...
+
+    def __getitem__(self: TChatSequence, key: int | slice) -> Message | TChatSequence:
+        if isinstance(key, slice):
+            copy = deepcopy(self)
+            copy.messages = self.messages[key]
+            return copy
+        return self.messages[key]

    def __iter__(self):
        return iter(self.messages)
@@ -84,6 +112,14 @@ class ChatSequence:
    def __len__(self):
        return len(self.messages)

+    def add(
+        self,
+        message_role: MessageRole,
+        content: str,
+        type: MessageType | None = None,
+    ) -> None:
+        self.append(Message(message_role, content, type))
+
    def append(self, message: Message):
        return self.messages.append(message)

@@ -95,21 +131,23 @@ class ChatSequence:
            self.messages.insert(index, message)

    @classmethod
-    def for_model(cls, model_name: str, messages: list[Message] | ChatSequence = []):
+    def for_model(
+        cls: Type[TChatSequence],
+        model_name: str,
+        messages: list[Message] | ChatSequence = [],
+        **kwargs,
+    ) -> TChatSequence:
        from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS

        if not model_name in OPEN_AI_CHAT_MODELS:
            raise ValueError(f"Unknown chat model '{model_name}'")

-        return ChatSequence(
-            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages)
+        return cls(
+            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages), **kwargs
        )

-    def add(self, message_role: MessageRole, content: str):
-        self.messages.append(Message(message_role, content))
-
    @property
-    def token_length(self):
+    def token_length(self) -> int:
        from autogpt.llm.utils import count_message_tokens

        return count_message_tokens(self.messages, self.model.name)
@@ -128,7 +166,7 @@ class ChatSequence:
            [f"{separator(m.role)}\n{m.content}" for m in self.messages]
        )
        return f"""
-============== ChatSequence ==============
+============== {__class__.__name__} ==============
 Length: {self.token_length} tokens; {len(self.messages)} messages
 {formatted_messages}
 ==========================================
@@ -140,24 +178,18 @@ class LLMResponse:
    """Standard response struct for a response from an LLM model."""

    model_info: ModelInfo
-    prompt_tokens_used: int = 0
-    completion_tokens_used: int = 0


@dataclass
 class EmbeddingModelResponse(LLMResponse):
    """Standard response struct for a response from an embedding model."""

-    embedding: List[float] = field(default_factory=list)
-
-    def __post_init__(self):
-        if self.completion_tokens_used:
-            raise ValueError("Embeddings should not have completion tokens used.")
+    embedding: list[float] = field(default_factory=list)


@dataclass
 class ChatModelResponse(LLMResponse):
-    """Standard response struct for a response from an LLM model."""
+    """Standard response struct for a response from a chat LLM."""

-    content: Optional[str] = None
-    function_call: Optional[OpenAIFunctionCall] = None
+    content: Optional[str]
+    function_call: Optional[OpenAIFunctionCall]
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -3,17 +3,18 @@ from __future__ import annotations
 import time
 from typing import TYPE_CHECKING

-from autogpt.llm.providers.openai import get_openai_command_specs
-
 if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
+    from autogpt.agents.agent import Agent

 from autogpt.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.providers.openai import (
+    count_openai_functions_tokens,
+    get_openai_command_specs,
+)
 from autogpt.llm.utils import count_message_tokens, create_chat_completion
-from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME
-from autogpt.logs import logger
+from autogpt.logs import CURRENT_CONTEXT_FILE_NAME, logger


 # TODO: Change debug from hardcode to argument
@@ -73,33 +74,28 @@ def chat_with_ai(
        ],
    )

-    # Add messages from the full message history until we reach the token limit
-    next_message_to_add_index = len(agent.history) - 1
-    insertion_index = len(message_sequence)
    # Count the currently used tokens
    current_tokens_used = message_sequence.token_length
+    insertion_index = len(message_sequence)

-    # while current_tokens_used > 2500:
-    #     # remove memories until we are under 2500 tokens
-    #     relevant_memory = relevant_memory[:-1]
-    #     (
-    #         next_message_to_add_index,
-    #         current_tokens_used,
-    #         insertion_index,
-    #         current_context,
-    #     ) = generate_context(
-    #         prompt, relevant_memory, agent.history, model
-    #     )
+    # Account for tokens used by OpenAI functions
+    openai_functions = None
+    if agent.config.openai_functions:
+        openai_functions = get_openai_command_specs(agent.command_registry)
+        functions_tlength = count_openai_functions_tokens(openai_functions, model)
+        current_tokens_used += functions_tlength
+        logger.debug(f"OpenAI Functions take up {functions_tlength} tokens in API call")

    # Account for user input (appended later)
    user_input_msg = Message("user", triggering_prompt)
-    current_tokens_used += count_message_tokens([user_input_msg], model)
+    current_tokens_used += count_message_tokens(user_input_msg, model)

-    current_tokens_used += 500  # Reserve space for new_summary_message
+    current_tokens_used += agent.history.max_summary_tlength  # Reserve space
    current_tokens_used += 500  # Reserve space for the openai functions TODO improve

-    # Add Messages until the token limit is reached or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle(agent.config))):
+    # Add historical Messages until the token limit is reached
+    #  or there are no more messages to add.
+    for cycle in reversed(list(agent.history.per_cycle())):
        messages_to_add = [msg for msg in cycle if msg is not None]
        tokens_to_add = count_message_tokens(messages_to_add, model)
        if current_tokens_used + tokens_to_add > send_token_limit:
@@ -115,9 +111,9 @@ def chat_with_ai(
        new_summary_message, trimmed_messages = agent.history.trim_messages(
            current_message_chain=list(message_sequence), config=agent.config
        )
-        tokens_to_add = count_message_tokens([new_summary_message], model)
+        tokens_to_add = count_message_tokens(new_summary_message, model)
        message_sequence.insert(insertion_index, new_summary_message)
-        current_tokens_used += tokens_to_add - 500
+        current_tokens_used += tokens_to_add - agent.history.max_summary_tlength

        # FIXME: uncomment when memory is back in use
        # memory_store = get_memory(config)
@@ -143,7 +139,7 @@ def chat_with_ai(
        )
        logger.debug(budget_message)
        message_sequence.add("system", budget_message)
-        current_tokens_used += count_message_tokens([message_sequence[-1]], model)
+        current_tokens_used += count_message_tokens(message_sequence[-1], model)

    # Append user input, the length of this is accounted for above
    message_sequence.append(user_input_msg)
@@ -157,14 +153,14 @@ def chat_with_ai(
        )
        if not plugin_response or plugin_response == "":
            continue
-        tokens_to_add = count_message_tokens(
-            [Message("system", plugin_response)], model
-        )
+        tokens_to_add = count_message_tokens(Message("system", plugin_response), model)
        if current_tokens_used + tokens_to_add > send_token_limit:
            logger.debug(f"Plugin response too long, skipping: {plugin_response}")
            logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
            break
        message_sequence.add("system", plugin_response)
+        current_tokens_used += tokens_to_add
+
    # Calculate remaining tokens
    tokens_remaining = token_limit - current_tokens_used
    # assert tokens_remaining >= 0, "Tokens remaining is negative.
@@ -196,7 +192,7 @@ def chat_with_ai(
    assistant_reply = create_chat_completion(
        prompt=message_sequence,
        config=agent.config,
-        functions=get_openai_command_specs(agent),
+        functions=openai_functions,
        max_tokens=tokens_remaining,
    )

--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import functools
 import time
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List, Optional
+from typing import Callable, List, Optional
 from unittest.mock import patch

 import openai
@@ -12,9 +12,6 @@ from colorama import Fore, Style
 from openai.error import APIError, RateLimitError, ServiceUnavailableError, Timeout
 from openai.openai_object import OpenAIObject

-if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
-
 from autogpt.llm.base import (
    ChatModelInfo,
    EmbeddingModelInfo,
@@ -23,6 +20,7 @@ from autogpt.llm.base import (
    TText,
 )
 from autogpt.logs import logger
+from autogpt.models.command_registry import CommandRegistry

 OPEN_AI_CHAT_MODELS = {
    info.name: info
@@ -114,7 +112,7 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
 }


-def meter_api(func):
+def meter_api(func: Callable):
    """Adds ApiManager metering to functions which make OpenAI API calls"""
    from autogpt.llm.api_manager import ApiManager

@@ -152,7 +150,7 @@ def meter_api(func):


 def retry_api(
-    num_retries: int = 10,
+    max_retries: int = 10,
    backoff_base: float = 2.0,
    warn_user: bool = True,
 ):
@@ -164,43 +162,49 @@ def retry_api(
        warn_user bool: Whether to warn the user. Defaults to True.
    """
    error_messages = {
-        ServiceUnavailableError: f"{Fore.RED}Error: The OpenAI API engine is currently overloaded, passing...{Fore.RESET}",
-        RateLimitError: f"{Fore.RED}Error: Reached rate limit, passing...{Fore.RESET}",
+        ServiceUnavailableError: f"{Fore.RED}Error: The OpenAI API engine is currently overloaded{Fore.RESET}",
+        RateLimitError: f"{Fore.RED}Error: Reached rate limit{Fore.RESET}",
    }
    api_key_error_msg = (
        f"Please double check that you have setup a "
        f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
        f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
    )
-    backoff_msg = (
-        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
-    )
+    backoff_msg = f"{Fore.RED}Waiting {{backoff}} seconds...{Fore.RESET}"

-    def _wrapper(func):
+    def _wrapper(func: Callable):
        @functools.wraps(func)
        def _wrapped(*args, **kwargs):
            user_warned = not warn_user
-            num_attempts = num_retries + 1  # +1 for the first attempt
-            for attempt in range(1, num_attempts + 1):
+            max_attempts = max_retries + 1  # +1 for the first attempt
+            for attempt in range(1, max_attempts + 1):
                try:
                    return func(*args, **kwargs)

                except (RateLimitError, ServiceUnavailableError) as e:
-                    if attempt == num_attempts:
+                    if attempt >= max_attempts or (
+                        # User's API quota exceeded
+                        isinstance(e, RateLimitError)
+                        and (err := getattr(e, "error", {}))
+                        and err.get("code") == "insufficient_quota"
+                    ):
                        raise

                    error_msg = error_messages[type(e)]
-                    logger.debug(error_msg)
+                    logger.warn(error_msg)
                    if not user_warned:
                        logger.double_check(api_key_error_msg)
+                        logger.debug(f"Status: {e.http_status}")
+                        logger.debug(f"Response body: {e.json_body}")
+                        logger.debug(f"Response headers: {e.headers}")
                        user_warned = True

                except (APIError, Timeout) as e:
-                    if (e.http_status not in [429, 502]) or (attempt == num_attempts):
+                    if (e.http_status not in [429, 502]) or (attempt == max_attempts):
                        raise

                backoff = backoff_base ** (attempt + 2)
-                logger.debug(backoff_msg.format(backoff=backoff))
+                logger.warn(backoff_msg.format(backoff=backoff))
                time.sleep(backoff)

        return _wrapped
@@ -301,13 +305,13 @@ class OpenAIFunctionSpec:
    @dataclass
    class ParameterSpec:
        name: str
-        type: str
+        type: str  # TODO: add enum support
        description: Optional[str]
        required: bool = False

    @property
-    def __dict__(self):
-        """Output an OpenAI-consumable function specification"""
+    def schema(self) -> dict[str, str | dict | list]:
+        """Returns an OpenAI-consumable function specification"""
        return {
            "name": self.name,
            "description": self.description,
@@ -326,14 +330,44 @@ class OpenAIFunctionSpec:
            },
        }

+    @property
+    def prompt_format(self) -> str:
+        """Returns the function formatted similarly to the way OpenAI does it internally:
+        https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18

-def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
+        Example:
+        ```ts
+        // Get the current weather in a given location
+        type get_current_weather = (_: {
+        // The city and state, e.g. San Francisco, CA
+        location: string,
+        unit?: "celsius" | "fahrenheit",
+        }) => any;
+        ```
+        """
+
+        def param_signature(p_spec: OpenAIFunctionSpec.ParameterSpec) -> str:
+            # TODO: enum type support
+            return (
+                f"// {p_spec.description}\n" if p_spec.description else ""
+            ) + f"{p_spec.name}{'' if p_spec.required else '?'}: {p_spec.type},"
+
+        return "\n".join(
+            [
+                f"// {self.description}",
+                f"type {self.name} = (_ :{{",
+                *[param_signature(p) for p in self.parameters.values()],
+                "}) => any;",
+            ]
+        )
+
+
+def get_openai_command_specs(
+    command_registry: CommandRegistry,
+) -> list[OpenAIFunctionSpec]:
    """Get OpenAI-consumable function specs for the agent's available commands.
    see https://platform.openai.com/docs/guides/gpt/function-calling
    """
-    if not agent.config.openai_functions:
-        return []
-
    return [
        OpenAIFunctionSpec(
            name=command.name,
@@ -348,5 +382,48 @@ def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
                for param in command.parameters
            },
        )
-        for command in agent.command_registry.commands.values()
+        for command in command_registry.commands.values()
    ]
+
+
+def count_openai_functions_tokens(
+    functions: list[OpenAIFunctionSpec], for_model: str
+) -> int:
+    """Returns the number of tokens taken up by a set of function definitions
+
+    Reference: https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
+    """
+    from autogpt.llm.utils import count_string_tokens
+
+    return count_string_tokens(
+        f"# Tools\n\n## functions\n\n{format_function_specs_as_typescript_ns(functions)}",
+        for_model,
+    )
+
+
+def format_function_specs_as_typescript_ns(functions: list[OpenAIFunctionSpec]) -> str:
+    """Returns a function signature block in the format used by OpenAI internally:
+    https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
+
+    For use with `count_string_tokens` to determine token usage of provided functions.
+
+    Example:
+    ```ts
+    namespace functions {
+
+    // Get the current weather in a given location
+    type get_current_weather = (_: {
+    // The city and state, e.g. San Francisco, CA
+    location: string,
+    unit?: "celsius" | "fahrenheit",
+    }) => any;
+
+    } // namespace functions
+    ```
+    """
+
+    return (
+        "namespace functions {\n\n"
+        + "\n\n".join(f.prompt_format for f in functions)
+        + "\n\n} // namespace functions"
+    )
--- a/autogpt/llm/utils/init.py
+++ b/autogpt/llm/utils/init.py
@@ -7,12 +7,19 @@ from colorama import Fore
 from autogpt.config import Config

 from ..api_manager import ApiManager
-from ..base import ChatModelResponse, ChatSequence, Message
+from ..base import (
+    ChatModelResponse,
+    ChatSequence,
+    FunctionCallDict,
+    Message,
+    ResponseMessageDict,
+)
 from ..providers import openai as iopenai
 from ..providers.openai import (
    OPEN_AI_CHAT_MODELS,
    OpenAIFunctionCall,
    OpenAIFunctionSpec,
+    count_openai_functions_tokens,
 )
 from .token_counter import *

@@ -111,7 +118,13 @@ def create_chat_completion(
    if temperature is None:
        temperature = config.temperature
    if max_tokens is None:
-        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt.token_length
+        prompt_tlength = prompt.token_length
+        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt_tlength
+        logger.debug(f"Prompt length: {prompt_tlength} tokens")
+        if functions:
+            functions_tlength = count_openai_functions_tokens(functions, model)
+            max_tokens -= functions_tlength
+            logger.debug(f"Functions take up {functions_tlength} tokens in API call")

    logger.debug(
        f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
@@ -138,9 +151,8 @@ def create_chat_completion(

    if functions:
        chat_completion_kwargs["functions"] = [
-            function.__dict__ for function in functions
+            function.schema for function in functions
        ]
-        logger.debug(f"Function dicts: {chat_completion_kwargs['functions']}")

    response = iopenai.create_chat_completion(
        messages=prompt.raw(),
@@ -152,19 +164,24 @@ def create_chat_completion(
        logger.error(response.error)
        raise RuntimeError(response.error)

-    first_message = response.choices[0].message
+    first_message: ResponseMessageDict = response.choices[0].message
    content: str | None = first_message.get("content")
-    function_call: OpenAIFunctionCall | None = first_message.get("function_call")
+    function_call: FunctionCallDict | None = first_message.get("function_call")

    for plugin in config.plugins:
        if not plugin.can_handle_on_response():
            continue
+        # TODO: function call support in plugin.on_response()
        content = plugin.on_response(content)

    return ChatModelResponse(
        model_info=OPEN_AI_CHAT_MODELS[model],
        content=content,
-        function_call=function_call,
+        function_call=OpenAIFunctionCall(
+            name=function_call["name"], arguments=function_call["arguments"]
+        )
+        if function_call
+        else None,
    )


--- a/autogpt/llm/utils/token_counter.py
+++ b/autogpt/llm/utils/token_counter.py
@@ -1,7 +1,7 @@
 """Functions for counting the number of tokens in a message or string."""
 from __future__ import annotations

-from typing import List
+from typing import List, overload

 import tiktoken

@@ -9,8 +9,18 @@ from autogpt.llm.base import Message
 from autogpt.logs import logger


+@overload
+def count_message_tokens(messages: Message, model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
+@overload
+def count_message_tokens(messages: List[Message], model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
 def count_message_tokens(
-    messages: List[Message], model: str = "gpt-3.5-turbo-0301"
+    messages: Message | List[Message], model: str = "gpt-3.5-turbo"
 ) -> int:
    """
    Returns the number of tokens used by a list of messages.
@@ -24,6 +34,9 @@ def count_message_tokens(
    Returns:
        int: The number of tokens used by the list of messages.
    """
+    if isinstance(messages, Message):
+        messages = [messages]
+
    if model.startswith("gpt-3.5-turbo"):
        tokens_per_message = (
            4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
--- a/autogpt/log_cycle/init.py
+++ b/autogpt/log_cycle/init.py
--- a/autogpt/log_cycle/json_handler.py
+++ b/autogpt/log_cycle/json_handler.py
@@ -1,20 +0,0 @@
-import json
-import logging
-
-
-class JsonFileHandler(logging.FileHandler):
-    def __init__(self, filename, mode="a", encoding=None, delay=False):
-        super().__init__(filename, mode, encoding, delay)
-
-    def emit(self, record):
-        json_data = json.loads(self.format(record))
-        with open(self.baseFilename, "w", encoding="utf-8") as f:
-            json.dump(json_data, f, ensure_ascii=False, indent=4)
-
-
-import logging
-
-
-class JsonFormatter(logging.Formatter):
-    def format(self, record):
-        return record.msg
--- a/autogpt/logs/init.py
+++ b/autogpt/logs/init.py
@@ -0,0 +1,15 @@
+from .formatters import AutoGptFormatter, JsonFormatter, remove_color_codes
+from .handlers import ConsoleHandler, JsonFileHandler, TypingConsoleHandler
+from .log_cycle import (
+    CURRENT_CONTEXT_FILE_NAME,
+    FULL_MESSAGE_HISTORY_FILE_NAME,
+    NEXT_ACTION_FILE_NAME,
+    PROMPT_SUMMARY_FILE_NAME,
+    PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
+    SUMMARY_FILE_NAME,
+    SUPERVISOR_FEEDBACK_FILE_NAME,
+    USER_INPUT_FILE_NAME,
+    LogCycleHandler,
+)
+from .logger import Logger, logger
+from .utils import print_assistant_thoughts, remove_ansi_escape
--- a/autogpt/logs/formatters.py
+++ b/autogpt/logs/formatters.py
@@ -0,0 +1,41 @@
+import logging
+import re
+
+from colorama import Style
+
+
+class AutoGptFormatter(logging.Formatter):
+    """
+    Allows to handle custom placeholders 'title_color' and 'message_no_color'.
+    To use this formatter, make sure to pass 'color', 'title' as log extras.
+    """
+
+    def format(self, record: logging.LogRecord) -> str:
+        if hasattr(record, "color"):
+            record.title_color = (
+                getattr(record, "color")
+                + getattr(record, "title", "")
+                + " "
+                + Style.RESET_ALL
+            )
+        else:
+            record.title_color = getattr(record, "title", "")
+
+        # Add this line to set 'title' to an empty string if it doesn't exist
+        record.title = getattr(record, "title", "")
+
+        if hasattr(record, "msg"):
+            record.message_no_color = remove_color_codes(getattr(record, "msg"))
+        else:
+            record.message_no_color = ""
+        return super().format(record)
+
+
+def remove_color_codes(s: str) -> str:
+    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
+    return ansi_escape.sub("", s)
+
+
+class JsonFormatter(logging.Formatter):
+    def format(self, record: logging.LogRecord):
+        return record.msg
--- a/autogpt/logs/handlers.py
+++ b/autogpt/logs/handlers.py
@@ -0,0 +1,47 @@
+import json
+import logging
+import random
+import time
+
+
+class ConsoleHandler(logging.StreamHandler):
+    def emit(self, record: logging.LogRecord) -> None:
+        msg = self.format(record)
+        try:
+            print(msg)
+        except Exception:
+            self.handleError(record)
+
+
+class TypingConsoleHandler(logging.StreamHandler):
+    """Output stream to console using simulated typing"""
+
+    def emit(self, record: logging.LogRecord):
+        min_typing_speed = 0.05
+        max_typing_speed = 0.01
+
+        msg = self.format(record)
+        try:
+            words = msg.split()
+            for i, word in enumerate(words):
+                print(word, end="", flush=True)
+                if i < len(words) - 1:
+                    print(" ", end="", flush=True)
+                typing_speed = random.uniform(min_typing_speed, max_typing_speed)
+                time.sleep(typing_speed)
+                # type faster after each word
+                min_typing_speed = min_typing_speed * 0.95
+                max_typing_speed = max_typing_speed * 0.95
+            print()
+        except Exception:
+            self.handleError(record)
+
+
+class JsonFileHandler(logging.FileHandler):
+    def __init__(self, filename: str, mode="a", encoding=None, delay=False):
+        super().__init__(filename, mode, encoding, delay)
+
+    def emit(self, record: logging.LogRecord):
+        json_data = json.loads(self.format(record))
+        with open(self.baseFilename, "w", encoding="utf-8") as f:
+            json.dump(json_data, f, ensure_ascii=False, indent=4)
--- a/autogpt/log_cycle/log_cycle.py
+++ b/autogpt/log_cycle/log_cycle.py
@@ -2,7 +2,7 @@ import json
 import os
 from typing import Any, Dict, Union

-from autogpt.logs import logger
+from .logger import logger

 DEFAULT_PREFIX = "agent"
 FULL_MESSAGE_HISTORY_FILE_NAME = "full_message_history.json"
@@ -42,7 +42,7 @@ class LogCycleHandler:

        return outer_folder_path

-    def get_agent_short_name(self, ai_name):
+    def get_agent_short_name(self, ai_name: str) -> str:
        return ai_name[:15].rstrip() if ai_name else DEFAULT_PREFIX

    def create_inner_directory(self, outer_folder_path: str, cycle_count: int) -> str:
--- a/autogpt/logs/logger.py
+++ b/autogpt/logs/logger.py
@@ -3,20 +3,18 @@ from __future__ import annotations

 import logging
 import os
-import random
-import re
-import time
-from logging import LogRecord
 from typing import TYPE_CHECKING, Any, Optional

-from colorama import Fore, Style
+from colorama import Fore

 if TYPE_CHECKING:
    from autogpt.config import Config

-from autogpt.log_cycle.json_handler import JsonFileHandler, JsonFormatter
 from autogpt.singleton import Singleton

+from .formatters import AutoGptFormatter, JsonFormatter
+from .handlers import ConsoleHandler, JsonFileHandler, TypingConsoleHandler
+

 class Logger(metaclass=Singleton):
    """
@@ -100,8 +98,13 @@ class Logger(metaclass=Singleton):
            self.typing_logger.addHandler(self.console_handler)

    def typewriter_log(
-        self, title="", title_color="", content="", speak_text=False, level=logging.INFO
-    ):
+        self,
+        title: str = "",
+        title_color: str = "",
+        content: str = "",
+        speak_text: bool = False,
+        level: int = logging.INFO,
+    ) -> None:
        from autogpt.speech import say_text

        if speak_text and self.config and self.config.speak_mode:
@@ -122,29 +125,29 @@ class Logger(metaclass=Singleton):

    def debug(
        self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
        self._log(title, title_color, message, logging.DEBUG)

    def info(
        self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
        self._log(title, title_color, message, logging.INFO)

    def warn(
        self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
        self._log(title, title_color, message, logging.WARN)

-    def error(self, title, message=""):
+    def error(self, title: str, message: str = "") -> None:
        self._log(title, Fore.RED, message, logging.ERROR)

    def _log(
@@ -152,8 +155,8 @@ class Logger(metaclass=Singleton):
        title: str = "",
        title_color: str = "",
        message: str = "",
-        level=logging.INFO,
-    ):
+        level: int = logging.INFO,
+    ) -> None:
        if message:
            if isinstance(message, list):
                message = " ".join(message)
@@ -161,11 +164,11 @@ class Logger(metaclass=Singleton):
            level, message, extra={"title": str(title), "color": str(title_color)}
        )

-    def set_level(self, level):
+    def set_level(self, level: logging._Level) -> None:
        self.logger.setLevel(level)
        self.typing_logger.setLevel(level)

-    def double_check(self, additionalText=None):
+    def double_check(self, additionalText: Optional[str] = None) -> None:
        if not additionalText:
            additionalText = (
                "Please ensure you've setup and configured everything"
@@ -191,131 +194,10 @@ class Logger(metaclass=Singleton):
        self.json_logger.debug(data)
        self.json_logger.removeHandler(json_data_handler)

-    def get_log_directory(self):
+    def get_log_directory(self) -> str:
        this_files_dir_path = os.path.dirname(__file__)
-        log_dir = os.path.join(this_files_dir_path, "../logs")
+        log_dir = os.path.join(this_files_dir_path, "../../logs")
        return os.path.abspath(log_dir)


-"""
-Output stream to console using simulated typing
-"""
-
-
-class TypingConsoleHandler(logging.StreamHandler):
-    def emit(self, record):
-        min_typing_speed = 0.05
-        max_typing_speed = 0.01
-
-        msg = self.format(record)
-        try:
-            words = msg.split()
-            for i, word in enumerate(words):
-                print(word, end="", flush=True)
-                if i < len(words) - 1:
-                    print(" ", end="", flush=True)
-                typing_speed = random.uniform(min_typing_speed, max_typing_speed)
-                time.sleep(typing_speed)
-                # type faster after each word
-                min_typing_speed = min_typing_speed * 0.95
-                max_typing_speed = max_typing_speed * 0.95
-            print()
-        except Exception:
-            self.handleError(record)
-
-
-class ConsoleHandler(logging.StreamHandler):
-    def emit(self, record) -> None:
-        msg = self.format(record)
-        try:
-            print(msg)
-        except Exception:
-            self.handleError(record)
-
-
-class AutoGptFormatter(logging.Formatter):
-    """
-    Allows to handle custom placeholders 'title_color' and 'message_no_color'.
-    To use this formatter, make sure to pass 'color', 'title' as log extras.
-    """
-
-    def format(self, record: LogRecord) -> str:
-        if hasattr(record, "color"):
-            record.title_color = (
-                getattr(record, "color")
-                + getattr(record, "title", "")
-                + " "
-                + Style.RESET_ALL
-            )
-        else:
-            record.title_color = getattr(record, "title", "")
-
-        # Add this line to set 'title' to an empty string if it doesn't exist
-        record.title = getattr(record, "title", "")
-
-        if hasattr(record, "msg"):
-            record.message_no_color = remove_color_codes(getattr(record, "msg"))
-        else:
-            record.message_no_color = ""
-        return super().format(record)
-
-
-def remove_color_codes(s: str) -> str:
-    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
-    return ansi_escape.sub("", s)
-
-
-def remove_ansi_escape(s: str) -> str:
-    return s.replace("\x1B", "")
-
-
 logger = Logger()
-
-
-def print_assistant_thoughts(
-    ai_name: object,
-    assistant_reply_json_valid: object,
-    config: Config,
-) -> None:
-    from autogpt.speech import say_text
-
-    assistant_thoughts_reasoning = None
-    assistant_thoughts_plan = None
-    assistant_thoughts_speak = None
-    assistant_thoughts_criticism = None
-
-    assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
-    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
-    if assistant_thoughts:
-        assistant_thoughts_reasoning = remove_ansi_escape(
-            assistant_thoughts.get("reasoning")
-        )
-        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan"))
-        assistant_thoughts_criticism = remove_ansi_escape(
-            assistant_thoughts.get("criticism")
-        )
-        assistant_thoughts_speak = remove_ansi_escape(assistant_thoughts.get("speak"))
-    logger.typewriter_log(
-        f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, f"{assistant_thoughts_text}"
-    )
-    logger.typewriter_log("REASONING:", Fore.YELLOW, f"{assistant_thoughts_reasoning}")
-    if assistant_thoughts_plan:
-        logger.typewriter_log("PLAN:", Fore.YELLOW, "")
-        # If it's a list, join it into a string
-        if isinstance(assistant_thoughts_plan, list):
-            assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
-        elif isinstance(assistant_thoughts_plan, dict):
-            assistant_thoughts_plan = str(assistant_thoughts_plan)
-
-        # Split the input_string using the newline character and dashes
-        lines = assistant_thoughts_plan.split("\n")
-        for line in lines:
-            line = line.lstrip("- ")
-            logger.typewriter_log("- ", Fore.GREEN, line.strip())
-    logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
-    # Speak the assistant's thoughts
-    if assistant_thoughts_speak:
-        if config.speak_mode:
-            say_text(assistant_thoughts_speak, config)
-        else:
-            logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
--- a/autogpt/logs/utils.py
+++ b/autogpt/logs/utils.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from colorama import Fore
+
+if TYPE_CHECKING:
+    from autogpt.config import Config
+
+from .logger import logger
+
+
+def print_assistant_thoughts(
+    ai_name: str,
+    assistant_reply_json_valid: dict,
+    config: Config,
+) -> None:
+    from autogpt.speech import say_text
+
+    assistant_thoughts_reasoning = None
+    assistant_thoughts_plan = None
+    assistant_thoughts_speak = None
+    assistant_thoughts_criticism = None
+
+    assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
+    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
+    if assistant_thoughts:
+        assistant_thoughts_reasoning = remove_ansi_escape(
+            assistant_thoughts.get("reasoning", "")
+        )
+        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan", ""))
+        assistant_thoughts_criticism = remove_ansi_escape(
+            assistant_thoughts.get("criticism", "")
+        )
+        assistant_thoughts_speak = remove_ansi_escape(
+            assistant_thoughts.get("speak", "")
+        )
+    logger.typewriter_log(
+        f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, assistant_thoughts_text
+    )
+    logger.typewriter_log("REASONING:", Fore.YELLOW, str(assistant_thoughts_reasoning))
+    if assistant_thoughts_plan:
+        logger.typewriter_log("PLAN:", Fore.YELLOW, "")
+        # If it's a list, join it into a string
+        if isinstance(assistant_thoughts_plan, list):
+            assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
+        elif isinstance(assistant_thoughts_plan, dict):
+            assistant_thoughts_plan = str(assistant_thoughts_plan)
+
+        # Split the input_string using the newline character and dashes
+        lines = assistant_thoughts_plan.split("\n")
+        for line in lines:
+            line = line.lstrip("- ")
+            logger.typewriter_log("- ", Fore.GREEN, line.strip())
+    logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
+    # Speak the assistant's thoughts
+    if assistant_thoughts_speak:
+        if config.speak_mode:
+            say_text(assistant_thoughts_speak, config)
+        else:
+            logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
+
+
+def remove_ansi_escape(s: str) -> str:
+    return s.replace("\x1B", "")
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -6,7 +6,7 @@ from typing import Optional

 from colorama import Fore, Style

-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config.config import ConfigBuilder, check_openai_api_key
 from autogpt.configurator import create_config
 from autogpt.logs import logger
@@ -28,7 +28,6 @@ COMMAND_CATEGORIES = [
    "autogpt.commands.file_operations",
    "autogpt.commands.web_search",
    "autogpt.commands.web_selenium",
-    "autogpt.app",
    "autogpt.commands.task_statuses",
 ]

--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -2,49 +2,45 @@ from __future__ import annotations

 import copy
 import json
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional

 if TYPE_CHECKING:
-    from autogpt.agent import Agent
+    from autogpt.agents import Agent

 from autogpt.config import Config
 from autogpt.json_utils.utilities import extract_json_from_response
-from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
+from autogpt.llm.base import ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-from autogpt.llm.utils import count_string_tokens, create_chat_completion
-from autogpt.log_cycle.log_cycle import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME
-from autogpt.logs import logger
+from autogpt.llm.utils import (
+    count_message_tokens,
+    count_string_tokens,
+    create_chat_completion,
+)
+from autogpt.logs import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME, logger


@dataclass
-class MessageHistory:
-    agent: Agent
-
-    messages: list[Message] = field(default_factory=list)
+class MessageHistory(ChatSequence):
+    max_summary_tlength: int = 500
+    agent: Optional[Agent] = None
    summary: str = "I was created"
-
    last_trimmed_index: int = 0

-    def __getitem__(self, i: int):
-        return self.messages[i]
+    SUMMARIZATION_PROMPT = '''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.

-    def __iter__(self):
-        return iter(self.messages)
+You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.

-    def __len__(self):
-        return len(self.messages)
+Summary So Far:
+"""
+{summary}
+"""

-    def add(
-        self,
-        role: MessageRole,
-        content: str,
-        type: MessageType | None = None,
-    ):
-        return self.append(Message(role, content, type))
-
-    def append(self, message: Message):
-        return self.messages.append(message)
+Latest Development:
+"""
+{new_events}
+"""
+'''

    def trim_messages(
        self, current_message_chain: list[Message], config: Config
@@ -84,7 +80,7 @@ class MessageHistory:

        return new_summary_message, new_messages_not_in_chain

-    def per_cycle(self, config: Config, messages: list[Message] | None = None):
+    def per_cycle(self, messages: list[Message] | None = None):
        """
        Yields:
            Message: a message containing user input
@@ -119,26 +115,33 @@ class MessageHistory:
        )

    def update_running_summary(
-        self, new_events: list[Message], config: Config
+        self,
+        new_events: list[Message],
+        config: Config,
+        max_summary_length: Optional[int] = None,
    ) -> Message:
        """
-        This function takes a list of dictionaries representing new events and combines them with the current summary,
-        focusing on key and potentially important information to remember. The updated summary is returned in a message
-        formatted in the 1st person past tense.
+        This function takes a list of Message objects and updates the running summary
+        to include the events they describe. The updated summary is returned
+        in a Message formatted in the 1st person past tense.

        Args:
-            new_events (List[Dict]): A list of dictionaries containing the latest events to be added to the summary.
+            new_events: A list of Messages containing the latest events to be added to the summary.

        Returns:
-            str: A message containing the updated summary of actions, formatted in the 1st person past tense.
+            Message: a Message containing the updated running summary.

        Example:
+            ```py
            new_events = [{"event": "entered the kitchen."}, {"event": "found a scrawled note with the number 7"}]
            update_running_summary(new_events)
            # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
+            ```
        """
        if not new_events:
            return self.summary_message()
+        if not max_summary_length:
+            max_summary_length = self.max_summary_tlength

        # Create a copy of the new_events list to prevent modifying the original list
        new_events = copy.deepcopy(new_events)
@@ -166,29 +169,29 @@ class MessageHistory:
            elif event.role == "user":
                new_events.remove(event)

-        # Summarize events and current summary in batch to a new running summary
+        summ_model = OPEN_AI_CHAT_MODELS[config.fast_llm]

-        # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
-        # TODO make this default dynamic
-        prompt_template_length = 100
-        max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens
-        summary_tlength = count_string_tokens(str(self.summary), config.fast_llm)
+        # Determine token lengths for use in batching
+        prompt_template_length = len(
+            MessageHistory.SUMMARIZATION_PROMPT.format(summary="", new_events="")
+        )
+        max_input_tokens = summ_model.max_tokens - max_summary_length
+        summary_tlength = count_string_tokens(self.summary, summ_model.name)
        batch = []
        batch_tlength = 0

-        # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
+        # TODO: Put a cap on length of total new events and drop some previous events to
+        # save API cost. Need to think thru more how to do it without losing the context.
        for event in new_events:
-            event_tlength = count_string_tokens(str(event), config.fast_llm)
+            event_tlength = count_message_tokens(event, summ_model.name)

            if (
                batch_tlength + event_tlength
-                > max_tokens - prompt_template_length - summary_tlength
+                > max_input_tokens - prompt_template_length - summary_tlength
            ):
                # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, config)
-                summary_tlength = count_string_tokens(
-                    str(self.summary), config.fast_llm
-                )
+                self.summarize_batch(batch, config, max_summary_length)
+                summary_tlength = count_string_tokens(self.summary, summ_model.name)
                batch = [event]
                batch_tlength = event_tlength
            else:
@@ -197,41 +200,36 @@ class MessageHistory:

        if batch:
            # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, config)
+            self.summarize_batch(batch, config, max_summary_length)

        return self.summary_message()

-    def summarize_batch(self, new_events_batch, config):
-        prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
-
-You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
-
-Summary So Far:
-"""
-{self.summary}
-"""
-
-Latest Development:
-"""
-{new_events_batch or "Nothing new happened."}
-"""
-'''
+    def summarize_batch(
+        self, new_events_batch: list[Message], config: Config, max_output_length: int
+    ):
+        prompt = MessageHistory.SUMMARIZATION_PROMPT.format(
+            summary=self.summary, new_events=new_events_batch
+        )

        prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            prompt.raw(),
-            PROMPT_SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                prompt.raw(),
+                PROMPT_SUMMARY_FILE_NAME,
+            )

-        self.summary = create_chat_completion(prompt, config).content
+        self.summary = create_chat_completion(
+            prompt, config, max_tokens=max_output_length
+        ).content

-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            self.summary,
-            SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                self.summary,
+                SUMMARY_FILE_NAME,
+            )
--- a/autogpt/memory/vector/providers/base.py
+++ b/autogpt/memory/vector/providers/base.py
@@ -6,13 +6,12 @@ import numpy as np

 from autogpt.config.config import Config
 from autogpt.logs import logger
-from autogpt.singleton import AbstractSingleton

 from .. import MemoryItem, MemoryItemRelevance
 from ..utils import Embedding, get_embedding


-class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
+class VectorMemoryProvider(MutableSet[MemoryItem]):
    @abc.abstractmethod
    def __init__(self, config: Config):
        pass
--- a/autogpt/models/command_registry.py
+++ b/autogpt/models/command_registry.py
@@ -15,8 +15,12 @@ class CommandRegistry:
    directory.
    """

-    commands: dict[str, Command] = {}
-    commands_aliases: dict[str, Command] = {}
+    commands: dict[str, Command]
+    commands_aliases: dict[str, Command]
+
+    def __init__(self):
+        self.commands = {}
+        self.commands_aliases = {}

    def __contains__(self, command_name: str):
        return command_name in self.commands or command_name in self.commands_aliases
--- a/autogpt/workspace/workspace.py
+++ b/autogpt/workspace/workspace.py
@@ -123,7 +123,11 @@ class Workspace:
        logger.debug(f"Resolved root as '{root}'")

        # Allow exception for absolute paths if they are contained in your workspace directory.
-        if relative_path.is_absolute() and not relative_path.is_relative_to(root):
+        if (
+            relative_path.is_absolute()
+            and restrict_to_root
+            and not relative_path.is_relative_to(root)
+        ):
            raise ValueError(
                f"Attempted to access absolute path '{relative_path}' in workspace '{root}'."
            )
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -1,4 +1,4 @@
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
 from autogpt.main import COMMAND_CATEGORIES
 from autogpt.memory.vector import get_memory
--- a/docs/imgs/e2b-dashboard.png
+++ b/docs/imgs/e2b-dashboard.png
--- a/docs/imgs/e2b-log-url.png
+++ b/docs/imgs/e2b-log-url.png
--- a/docs/imgs/e2b-new-tag.png
+++ b/docs/imgs/e2b-new-tag.png
--- a/docs/imgs/e2b-tag-button.png
+++ b/docs/imgs/e2b-tag-button.png
--- a/docs/share-your-logs.md
+++ b/docs/share-your-logs.md
@@ -0,0 +1,52 @@
+## Share your logs with us to help improve Auto-GPT
+
+Do you notice weird behavior with your agent? Do you have an interesting use case? Do you have a bug you want to report?
+Follow the steps below to enable your logs and upload them. You can include these logs when making an issue report or discussing an issue with us.
+
+### Enable Debug Logs
+Activity, Error, and Debug logs are located in `./logs`
+
+To print out debug logs:
+
+``` shell
+./run.sh --debug     # on Linux / macOS
+
+.\run.bat --debug    # on Windows
+
+docker-compose run --rm auto-gpt --debug    # in Docker
+```
+
+### Inspect and share logs
+You can inspect and share logs via [e2b](https://e2b.dev).
+![E2b logs dashboard](./imgs/e2b-dashboard.png)
+
+
+
+1. Go to [autogpt.e2b.dev](https://autogpt.e2b.dev) and sign in.
+2. You'll see logs from other members of the AutoGPT team that you can inspect.
+3. Or you upload your own logs. Click on the "Upload log folder" button and select the debug logs dir that you generated. Wait a 1-2 seconds and the page reloads.
+4. You can share logs via sharing the URL in your browser.
+![E2b log URL](./imgs/e2b-log-url.png)
+
+
+### Add tags to logs
+You can add custom tags to logs for other members of your team. This is useful if you want to indicate that the agent is for example having issues with challenges.
+
+E2b offers 3 types of severity:
+
+- Success
+- Warning
+- Error
+
+You can name your tag any way you want.
+
+#### How to add a tag
+1. Click on the "plus" button on the left from the logs folder name.
+
+![E2b tag button](./imgs/e2b-tag-button.png)
+
+2. Type the name of a new tag.
+
+3. Select the severity.
+
+![E2b new tag](./imgs/e2b-new-tag.png)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -89,12 +89,20 @@ This may give your bot increased intelligence.

 ## Logs

-Activity and error logs are located in the `./output/logs`
+Activity, Error, and Debug logs are located in `./logs`
+
+!!! tip 
+    Do you notice weird behavior with your agent? Do you have an interesting use case? Do you have a bug you want to report?
+    Follow the step below to enable your logs. You can include these logs when making an issue report or discussing an issue with us.

 To print out debug logs:

 ``` shell
-./run.sh --debug
+./run.sh --debug     # on Linux / macOS
+
+.\run.bat --debug    # on Windows
+
+docker-compose run --rm auto-gpt --debug    # in Docker
 ```

 ## Disabling Command Categories
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,7 +12,8 @@ nav:
    - Voice: configuration/voice.md
    - Image Generation: configuration/imagegen.md

-  - Contributing:
+  - Help us improve Auto-GPT:
+    - Share your debug logs with us: share-your-logs.md
    - Contribution guide: contributing.md
    - Running tests: testing.md
    - Code of Conduct: code-of-conduct.md
--- a/netlify.toml
+++ b/netlify.toml
@@ -3,4 +3,4 @@
 [build]
  publish = "public/"
  command = "mkdocs build -d public"
-  ignore = "git diff --quiet HEAD^ HEAD docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"
+  ignore = "git diff --quiet $CACHED_COMMIT_REF $COMMIT_REF docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -7,7 +7,6 @@ constraints: [
 resources: [
  'Internet access for searches and information gathering.',
  'Long Term memory management.',
-  'GPT-3.5 powered Agents for delegation of simple tasks.',
  'File output.'
 ]
 performance_evaluations: [
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"

 [project]
 name = "agpt"
-version = "0.4.4"
+version = "0.4.5"
 authors = [
  { name="Torantulino", email="support@agpt.co" },
 ]
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -3,7 +3,7 @@ from pathlib import Path
 import pytest
 from pytest_mock import MockerFixture

-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.commands.execute_code import execute_python_file
 from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -6,7 +6,7 @@ from typing import Any, Generator

 import pytest

-from autogpt.log_cycle.log_cycle import LogCycleHandler
+from autogpt.logs import LogCycleHandler
 from autogpt.workspace import Workspace
 from benchmarks import run_task
 from tests.challenges.schema import Task
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,10 +6,11 @@ import pytest
 import yaml
 from pytest_mock import MockerFixture

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
 from autogpt.config.ai_config import AIConfig
 from autogpt.llm.api_manager import ApiManager
+from autogpt.logs import logger
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
@@ -52,6 +53,9 @@ def config(
    if not os.environ.get("OPENAI_API_KEY"):
        os.environ["OPENAI_API_KEY"] = "sk-dummy"

+    # HACK: this is necessary to ensure PLAIN_OUTPUT takes effect
+    logger.config = config
+
    config.plugins_dir = "tests/unit/data/test_plugins"
    config.plugins_config_file = temp_plugins_config_file

--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -1,6 +1,6 @@
 import pytest

-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
--- a/tests/integration/memory/test_json_file_memory.py
+++ b/tests/integration/memory/test_json_file_memory.py
@@ -8,12 +8,6 @@ from autogpt.memory.vector import JSONFileMemory, MemoryItem
 from autogpt.workspace import Workspace


-@pytest.fixture(autouse=True)
-def cleanup_sut_singleton():
-    if JSONFileMemory in JSONFileMemory._instances:
-        del JSONFileMemory._instances[JSONFileMemory]
-
-
 def test_json_memory_init_without_backing_file(config: Config, workspace: Workspace):
    index_file = workspace.root / f"{config.memory_index}.json"

--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,12 +1,13 @@
 import os
 import random
+import re
 import string
 import tempfile

 import pytest

 import autogpt.commands.execute_code as sut  # system under testing
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import Config


@@ -88,13 +89,9 @@ def test_execute_python_file_invalid(agent: Agent):


 def test_execute_python_file_not_found(agent: Agent):
-    assert all(
-        s in sut.execute_python_file("notexist.py", agent).lower()
-        for s in [
-            "python: can't open file 'notexist.py'",
-            "[errno 2] no such file or directory",
-        ]
-    )
+    result = sut.execute_python_file("notexist.py", agent).lower()
+    assert re.match(r"python: can't open file '([A-Z]:)?[/\\\-\w]*notexist.py'", result)
+    assert "[errno 2] no such file or directory" in result


 def test_execute_shell(random_string: str, agent: Agent):
--- a/tests/integration/test_image_gen.py
+++ b/tests/integration/test_image_gen.py
@@ -6,7 +6,7 @@ from unittest.mock import patch
 import pytest
 from PIL import Image

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui


--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,7 +1,7 @@
 import pytest
 from pytest_mock import MockerFixture

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.web_selenium import browse_website


@@ -12,6 +12,6 @@ def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
    question = "How to execute a barrel roll"

    response = browse_website(url, question, agent)
-    assert "Error" in response
+    assert "error" in response.lower()
    # Sanity check that the response is not too long
    assert len(response) < 200
--- a/tests/unit/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -1,46 +1,27 @@
-from unittest.mock import MagicMock
-
-import pytest
-
-from autogpt.agent import Agent
-from autogpt.config import AIConfig
-from autogpt.config.config import Config
-
-
-@pytest.fixture
-def agent(config: Config):
-    ai_name = "Test AI"
-    memory = MagicMock()
-    next_action_count = 0
-    command_registry = MagicMock()
-    ai_config = AIConfig(ai_name=ai_name)
-    system_prompt = "System prompt"
-    triggering_prompt = "Triggering prompt"
-    workspace_directory = "workspace_directory"
-
-    agent = Agent(
-        ai_name=ai_name,
-        memory=memory,
-        next_action_count=next_action_count,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=config,
-        system_prompt=system_prompt,
-        triggering_prompt=triggering_prompt,
-        workspace_directory=workspace_directory,
-    )
-    return agent
+from autogpt.agents.agent import Agent, execute_command


 def test_agent_initialization(agent: Agent):
-    assert agent.ai_name == "Test AI"
-    assert agent.memory == agent.memory
+    assert agent.ai_name == "Base"
    assert agent.history.messages == []
    assert agent.next_action_count == 0
-    assert agent.command_registry == agent.command_registry
-    assert agent.ai_config == agent.ai_config
-    assert agent.system_prompt == "System prompt"
-    assert agent.triggering_prompt == "Triggering prompt"
+
+
+def test_execute_command_plugin(agent: Agent):
+    """Test that executing a command that came from a plugin works as expected"""
+    command_name = "check_plan"
+    agent.ai_config.prompt_generator.add_command(
+        command_name,
+        "Read the plan.md with the next goals to achieve",
+        {},
+        lambda: "hi",
+    )
+    command_result = execute_command(
+        command_name=command_name,
+        arguments={},
+        agent=agent,
+    )
+    assert command_result == "hi"


 # More test methods can be added for specific agent interactions
--- a/tests/unit/test_agent_manager.py
+++ b/tests/unit/test_agent_manager.py
@@ -1,70 +0,0 @@
-import pytest
-
-from autogpt.agent.agent_manager import AgentManager
-from autogpt.llm import ChatModelResponse
-from autogpt.llm.chat import create_chat_completion
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-
-
-@pytest.fixture
-def agent_manager(config):
-    # Hack, real gross. Singletons are not good times.
-    yield AgentManager(config)
-    del AgentManager._instances[AgentManager]
-
-
-@pytest.fixture
-def task():
-    return "translate English to French"
-
-
-@pytest.fixture
-def prompt():
-    return "Translate the following English text to French: 'Hello, how are you?'"
-
-
-@pytest.fixture
-def model():
-    return "gpt-3.5-turbo"
-
-
-@pytest.fixture(autouse=True)
-def mock_create_chat_completion(mocker, config):
-    mock_create_chat_completion = mocker.patch(
-        "autogpt.agent.agent_manager.create_chat_completion",
-        wraps=create_chat_completion,
-    )
-    mock_create_chat_completion.return_value = ChatModelResponse(
-        model_info=OPEN_AI_CHAT_MODELS[config.fast_llm],
-        content="irrelevant",
-        function_call={},
-    )
-    return mock_create_chat_completion
-
-
-def test_create_agent(agent_manager: AgentManager, task, prompt, model):
-    key, agent_reply = agent_manager.create_agent(task, prompt, model)
-    assert isinstance(key, int)
-    assert isinstance(agent_reply, str)
-    assert key in agent_manager.agents
-
-
-def test_message_agent(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    user_message = "Please translate 'Good morning' to French."
-    agent_reply = agent_manager.message_agent(key, user_message)
-    assert isinstance(agent_reply, str)
-
-
-def test_list_agents(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    agents_list = agent_manager.list_agents()
-    assert isinstance(agents_list, list)
-    assert (key, task) in agents_list
-
-
-def test_delete_agent(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    success = agent_manager.delete_agent(key)
-    assert success
-    assert key not in agent_manager.agents
--- a/tests/unit/test_execute_command.py
+++ b/tests/unit/test_execute_command.py
@@ -1,23 +0,0 @@
-from autogpt.agent import Agent
-from autogpt.app import execute_command
-
-
-def check_plan():
-    return "hi"
-
-
-def test_execute_command_plugin(agent: Agent):
-    """Test that executing a command that came from a plugin works as expected"""
-    command_name = "check_plan"
-    agent.ai_config.prompt_generator.add_command(
-        command_name,
-        "Read the plan.md with the next goals to achieve",
-        {},
-        check_plan,
-    )
-    command_result = execute_command(
-        command_name=command_name,
-        arguments={},
-        agent=agent,
-    )
-    assert command_result == "hi"
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -12,7 +12,7 @@ import pytest
 from pytest_mock import MockerFixture

 import autogpt.commands.file_operations as file_ops
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import Config
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
@@ -44,8 +44,13 @@ def mock_MemoryItem_from_text(


@pytest.fixture()
-def test_file_path(workspace: Workspace):
-    return workspace.get_path("test_file.txt")
+def test_file_name():
+    return Path("test_file.txt")
+
+
+@pytest.fixture
+def test_file_path(test_file_name: Path, workspace: Workspace):
+    return workspace.get_path(test_file_name)


@pytest.fixture()
@@ -130,42 +135,34 @@ def test_is_duplicate_operation(agent: Agent, mocker: MockerFixture):
    # Test cases with write operations
    assert (
        file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", agent.config, "checksum1"
+            "write", "path/to/file1.txt", agent, "checksum1"
        )
        is True
    )
    assert (
        file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", agent.config, "checksum2"
+            "write", "path/to/file1.txt", agent, "checksum2"
        )
        is False
    )
    assert (
        file_ops.is_duplicate_operation(
-            "write", "path/to/file3.txt", agent.config, "checksum3"
+            "write", "path/to/file3.txt", agent, "checksum3"
        )
        is False
    )
    # Test cases with append operations
    assert (
        file_ops.is_duplicate_operation(
-            "append", "path/to/file1.txt", agent.config, "checksum1"
+            "append", "path/to/file1.txt", agent, "checksum1"
        )
        is False
    )
    # Test cases with delete operations
    assert (
-        file_ops.is_duplicate_operation(
-            "delete", "path/to/file1.txt", config=agent.config
-        )
-        is False
-    )
-    assert (
-        file_ops.is_duplicate_operation(
-            "delete", "path/to/file3.txt", config=agent.config
-        )
-        is True
+        file_ops.is_duplicate_operation("delete", "path/to/file1.txt", agent) is False
    )
+    assert file_ops.is_duplicate_operation("delete", "path/to/file3.txt", agent) is True


 # Test logging a file operation
@@ -206,7 +203,15 @@ def test_read_file_not_found(agent: Agent):
    assert "Error:" in content and filename in content and "no such file" in content


-def test_write_to_file(test_file_path: Path, agent: Agent):
+def test_write_to_file_relative_path(test_file_name: Path, agent: Agent):
+    new_content = "This is new content.\n"
+    file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
+    with open(agent.workspace.get_path(test_file_name), "r", encoding="utf-8") as f:
+        content = f.read()
+    assert content == new_content
+
+
+def test_write_to_file_absolute_path(test_file_path: Path, agent: Agent):
    new_content = "This is new content.\n"
    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
    with open(test_file_path, "r", encoding="utf-8") as f:
@@ -214,24 +219,24 @@ def test_write_to_file(test_file_path: Path, agent: Agent):
    assert content == new_content


-def test_write_file_logs_checksum(test_file_path: Path, agent: Agent):
+def test_write_file_logs_checksum(test_file_name: Path, agent: Agent):
    new_content = "This is new content.\n"
    new_checksum = file_ops.text_checksum(new_content)
-    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
        log_entry = f.read()
-    assert log_entry == f"write: {test_file_path} #{new_checksum}\n"
+    assert log_entry == f"write: {test_file_name} #{new_checksum}\n"


-def test_write_file_fails_if_content_exists(test_file_path: Path, agent: Agent):
+def test_write_file_fails_if_content_exists(test_file_name: Path, agent: Agent):
    new_content = "This is new content.\n"
    file_ops.log_operation(
        "write",
-        str(test_file_path),
+        str(test_file_name),
        agent=agent,
        checksum=file_ops.text_checksum(new_content),
    )
-    result = file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    result = file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
    assert result == "Error: File has already been updated."


@@ -258,11 +263,11 @@ def test_append_to_file(test_nested_file: Path, agent: Agent):


 def test_append_to_file_uses_checksum_from_appended_file(
-    test_file_path: Path, agent: Agent
+    test_file_name: Path, agent: Agent
 ):
    append_text = "This is appended text.\n"
-    file_ops.append_to_file(test_file_path, append_text, agent=agent)
-    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    file_ops.append_to_file(test_file_name, append_text, agent=agent)
+    file_ops.append_to_file(test_file_name, append_text, agent=agent)
    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
        log_contents = f.read()

@@ -272,8 +277,8 @@ def test_append_to_file_uses_checksum_from_appended_file(
    digest.update(append_text.encode("utf-8"))
    checksum2 = digest.hexdigest()
    assert log_contents == (
-        f"append: {test_file_path} #{checksum1}\n"
-        f"append: {test_file_path} #{checksum2}\n"
+        f"append: {test_file_name} #{checksum1}\n"
+        f"append: {test_file_name} #{checksum2}\n"
    )


@@ -288,7 +293,7 @@ def test_delete_missing_file(agent: Agent):
    # confuse the log
    file_ops.log_operation("write", filename, agent=agent, checksum="fake")
    try:
-        os.remove(filename)
+        os.remove(agent.workspace.get_path(filename))
    except FileNotFoundError as err:
        assert str(err) in file_ops.delete_file(filename, agent=agent)
        return
--- a/tests/unit/test_git_commands.py
+++ b/tests/unit/test_git_commands.py
@@ -2,7 +2,7 @@ import pytest
 from git.exc import GitCommandError
 from git.repo.base import Repo

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.git_operations import clone_repository


--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -4,7 +4,7 @@ from unittest.mock import MagicMock

 import pytest

-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
@@ -38,8 +38,8 @@ def agent(config: Config):
    return agent


-def test_message_history_batch_summary(mocker, agent, config):
-    history = MessageHistory(agent)
+def test_message_history_batch_summary(mocker, agent: Agent, config: Config):
+    history = MessageHistory.for_model(agent.config.smart_llm, agent=agent)
    model = config.fast_llm
    message_tlength = 0
    message_count = 0
@@ -48,7 +48,7 @@ def test_message_history_batch_summary(mocker, agent, config):
    mock_summary_response = ChatModelResponse(
        model_info=OPEN_AI_CHAT_MODELS[model],
        content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
-        function_call={},
+        function_call=None,
    )
    mock_summary = mocker.patch(
        "autogpt.memory.message_history.create_chat_completion",
@@ -105,7 +105,7 @@ def test_message_history_batch_summary(mocker, agent, config):
        result = (
            "Command browse_website returned: Answer gathered from website: The text in job"
            + str(i)
-            + " does not provide information on specific job requirements or a job URL.]",
+            + " does not provide information on specific job requirements or a job URL.]"
        )
        msg = Message("system", result, "action_result")
        history.append(msg)
@@ -117,7 +117,7 @@ def test_message_history_batch_summary(mocker, agent, config):
        history.append(user_input_msg)

    # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle(config))):
+    for cycle in reversed(list(history.per_cycle())):
        messages_to_add = [msg for msg in cycle if msg is not None]
        message_sequence.insert(insertion_index, *messages_to_add)
        break
@@ -134,7 +134,7 @@ def test_message_history_batch_summary(mocker, agent, config):
    )

    expected_call_count = math.ceil(
-        message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens)
+        message_tlength / (OPEN_AI_CHAT_MODELS[config.fast_llm].max_tokens)
    )
    # Expecting 2 batches because of over max token
    assert mock_summary.call_count == expected_call_count  # 2 at the time of writing
--- a/tests/unit/test_retry_provider_openai.py
+++ b/tests/unit/test_retry_provider_openai.py
@@ -20,7 +20,7 @@ def error_factory(error_instance, error_count, retry_count, warn_user=True):
            self.count = 0

        @openai.retry_api(
-            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
+            max_retries=retry_count, backoff_base=0.001, warn_user=warn_user
        )
        def __call__(self):
            self.count += 1
@@ -69,16 +69,11 @@ def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure

    if error_count and retry_count:
        if type(error) == RateLimitError:
-            assert "Reached rate limit, passing..." in output.out
+            assert "Reached rate limit" in output.out
            assert "Please double check" in output.out
        if type(error) == ServiceUnavailableError:
-            assert (
-                "The OpenAI API engine is currently overloaded, passing..."
-                in output.out
-            )
+            assert "The OpenAI API engine is currently overloaded" in output.out
            assert "Please double check" in output.out
-        if type(error) == APIError:
-            assert "API Bad gateway" in output.out
    else:
        assert output.out == ""

@@ -96,7 +91,7 @@ def test_retry_open_api_rate_limit_no_warn(capsys):

    output = capsys.readouterr()

-    assert "Reached rate limit, passing..." in output.out
+    assert "Reached rate limit" in output.out
    assert "Please double check" not in output.out


@@ -115,7 +110,7 @@ def test_retry_open_api_service_unavairable_no_warn(capsys):

    output = capsys.readouterr()

-    assert "The OpenAI API engine is currently overloaded, passing..." in output.out
+    assert "The OpenAI API engine is currently overloaded" in output.out
    assert "Please double check" not in output.out


--- a/tests/unit/test_web_search.py
+++ b/tests/unit/test_web_search.py
@@ -3,7 +3,7 @@ import json
 import pytest
 from googleapiclient.errors import HttpError

-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.web_search import google, safe_google_results, web_search


--- a/tests/vcr/init.py
+++ b/tests/vcr/init.py
@@ -1,10 +1,16 @@
 import os
+from hashlib import sha256

 import openai.api_requestor
 import pytest
 from pytest_mock import MockerFixture

-from .vcr_filter import PROXY, before_record_request, before_record_response
+from .vcr_filter import (
+    PROXY,
+    before_record_request,
+    before_record_response,
+    freeze_request_body,
+)

 DEFAULT_RECORD_MODE = "new_episodes"
 BASE_VCR_CONFIG = {
@@ -12,10 +18,13 @@ BASE_VCR_CONFIG = {
    "before_record_response": before_record_response,
    "filter_headers": [
        "Authorization",
+        "AGENT-MODE",
+        "AGENT-TYPE",
+        "OpenAI-Organization",
        "X-OpenAI-Client-User-Agent",
        "User-Agent",
    ],
-    "match_on": ["method", "body"],
+    "match_on": ["method", "headers"],
 }


@@ -41,7 +50,7 @@ def vcr_cassette_dir(request):
    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)


-def patch_api_base(requestor):
+def patch_api_base(requestor: openai.api_requestor.APIRequestor):
    new_api_base = f"{PROXY}/v1"
    requestor.api_base = new_api_base
    return requestor
@@ -49,23 +58,35 @@ def patch_api_base(requestor):

@pytest.fixture
 def patched_api_requestor(mocker: MockerFixture):
-    original_init = openai.api_requestor.APIRequestor.__init__
-    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
+    init_requestor = openai.api_requestor.APIRequestor.__init__
+    prepare_request = openai.api_requestor.APIRequestor._prepare_request_raw

-    def patched_init(requestor, *args, **kwargs):
-        original_init(requestor, *args, **kwargs)
+    def patched_init_requestor(requestor, *args, **kwargs):
+        init_requestor(requestor, *args, **kwargs)
        patch_api_base(requestor)

-    def patched_validate_headers(self, supplied_headers):
-        headers = original_validate_headers(self, supplied_headers)
-        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
-        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
-        return headers
+    def patched_prepare_request(self, *args, **kwargs):
+        url, headers, data = prepare_request(self, *args, **kwargs)
+
+        if PROXY:
+            headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
+            headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
+
+        # Add hash header for cheap & fast matching on cassette playback
+        headers["X-Content-Hash"] = sha256(
+            freeze_request_body(data), usedforsecurity=False
+        ).hexdigest()
+
+        return url, headers, data

    if PROXY:
-        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
        mocker.patch.object(
            openai.api_requestor.APIRequestor,
-            "_validate_headers",
-            new=patched_validate_headers,
+            "__init__",
+            new=patched_init_requestor,
        )
+    mocker.patch.object(
+        openai.api_requestor.APIRequestor,
+        "_prepare_request_raw",
+        new=patched_prepare_request,
+    )
--- a/tests/vcr/openai_filter.py
+++ b/tests/vcr/openai_filter.py
@@ -1,52 +0,0 @@
-import json
-import re
-
-
-def replace_timestamp_in_request(request):
-    # Check if the request body contains a JSON object
-
-    try:
-        if not request or not request.body:
-            return request
-        body = json.loads(request.body)
-    except ValueError:
-        return request
-
-    if "messages" not in body:
-        return request
-
-    for message in body["messages"]:
-        if "content" in message and "role" in message and message["role"] == "system":
-            timestamp_regex = re.compile(r"\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \d{4}")
-            message["content"] = timestamp_regex.sub(
-                "Tue Jan 01 00:00:00 2000", message["content"]
-            )
-
-    request.body = json.dumps(body)
-    return request
-
-
-def before_record_response(response):
-    if "Transfer-Encoding" in response["headers"]:
-        del response["headers"]["Transfer-Encoding"]
-    return response
-
-
-def before_record_request(request):
-    filtered_request = filter_hostnames(request)
-    filtered_request_without_dynamic_data = replace_timestamp_in_request(
-        filtered_request
-    )
-    return filtered_request_without_dynamic_data
-
-
-def filter_hostnames(request):
-    allowed_hostnames = [
-        "api.openai.com",
-        "localhost:50337",
-    ]  # List of hostnames you want to allow
-
-    if any(hostname in request.url for hostname in allowed_hostnames):
-        return request
-    else:
-        return None
--- a/tests/vcr/vcr_filter.py
+++ b/tests/vcr/vcr_filter.py
@@ -1,8 +1,12 @@
+import contextlib
 import json
 import os
 import re
+from io import BytesIO
 from typing import Any, Dict, List

+from vcr.request import Request
+
 PROXY = os.environ.get("PROXY")

 REPLACEMENTS: List[Dict[str, str]] = [
@@ -39,19 +43,20 @@ def replace_message_content(content: str, replacements: List[Dict[str, str]]) ->
    return content


-def replace_timestamp_in_request(request: Any) -> Any:
+def freeze_request_body(json_body: str | bytes) -> bytes:
+    """Remove any dynamic items from the request body"""
+
    try:
-        if not request or not request.body:
-            return request
-        body = json.loads(request.body)
+        body = json.loads(json_body)
    except ValueError:
-        return request
+        return json_body if type(json_body) == bytes else json_body.encode()

    if "messages" not in body:
-        return request
-    body[
-        "max_tokens"
-    ] = 0  # this field is inconsistent between requests and not used at the moment.
+        return json.dumps(body, sort_keys=True).encode()
+
+    if "max_tokens" in body:
+        del body["max_tokens"]
+
    for message in body["messages"]:
        if "content" in message and "role" in message:
            if message["role"] == "system":
@@ -59,7 +64,20 @@ def replace_timestamp_in_request(request: Any) -> Any:
                    message["content"], REPLACEMENTS
                )

-    request.body = json.dumps(body)
+    return json.dumps(body, sort_keys=True).encode()
+
+
+def freeze_request(request: Request) -> Request:
+    if not request or not request.body:
+        return request
+
+    with contextlib.suppress(ValueError):
+        request.body = freeze_request_body(
+            request.body.getvalue()
+            if isinstance(request.body, BytesIO)
+            else request.body
+        )
+
    return request


@@ -69,20 +87,23 @@ def before_record_response(response: Dict[str, Any]) -> Dict[str, Any]:
    return response


-def before_record_request(request: Any) -> Any:
+def before_record_request(request: Request) -> Request | None:
    request = replace_request_hostname(request, ORIGINAL_URL, NEW_URL)

    filtered_request = filter_hostnames(request)
-    filtered_request_without_dynamic_data = replace_timestamp_in_request(
-        filtered_request
-    )
+    if not filtered_request:
+        return None
+
+    filtered_request_without_dynamic_data = freeze_request(filtered_request)
    return filtered_request_without_dynamic_data


 from urllib.parse import urlparse, urlunparse


-def replace_request_hostname(request: Any, original_url: str, new_hostname: str) -> Any:
+def replace_request_hostname(
+    request: Request, original_url: str, new_hostname: str
+) -> Request:
    parsed_url = urlparse(request.uri)

    if parsed_url.hostname in original_url:
@@ -94,7 +115,7 @@ def replace_request_hostname(request: Any, original_url: str, new_hostname: str)
    return request


-def filter_hostnames(request: Any) -> Any:
+def filter_hostnames(request: Request) -> Request | None:
    # Add your implementation here for filtering hostnames
    if any(hostname in request.url for hostname in ALLOWED_HOSTNAMES):
        return request