diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index e40abf2f..195ebeff 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -27,8 +27,8 @@ jobs:
         with:
           ref: master
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+      - name: Set up Python ${{ matrix.config.python-version }}
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.config.python-version }}
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dde98cf9..109d2d5c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,7 +33,7 @@ jobs:
           repository: ${{ github.event.pull_request.head.repo.full_name }}
 
       - name: Set up Python ${{ env.min-python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ env.min-python-version }}
 
@@ -132,7 +132,7 @@ jobs:
           fi
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
 
@@ -153,14 +153,16 @@ jobs:
 
       - name: Run pytest with coverage
         run: |
-          pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+          pytest -vv --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+            --numprocesses=logical --durations=10 \
             tests/unit tests/integration tests/challenges
           python tests/challenges/utils/build_current_score.py
         env:
           CI: true
-          PROXY: ${{ secrets.PROXY }}
-          AGENT_MODE: ${{ secrets.AGENT_MODE }}
-          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PROXY: ${{ github.event_name == 'pull_request_target' && secrets.PROXY || '' }}
+          AGENT_MODE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_MODE || '' }}
+          AGENT_TYPE: ${{ github.event_name == 'pull_request_target' && secrets.AGENT_TYPE || '' }}
+          OPENAI_API_KEY: ${{ github.event_name == 'pull_request' && secrets.OPENAI_API_KEY || '' }}
           PLAIN_OUTPUT: True
 
       - name: Upload coverage reports to Codecov
@@ -251,7 +253,7 @@ jobs:
             gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
           fi
 
-      - name: Upload logs as artifact
+      - name: Upload logs to artifact
         if: always()
         uses: actions/upload-artifact@v3
         with:
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index 3da88891..cbdd2f13 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -73,16 +73,13 @@ jobs:
       run: .github/workflows/scripts/docker-ci-summary.sh >> $GITHUB_STEP_SUMMARY
       continue-on-error: true
 
-  # Docker setup needs fixing before this is going to work: #1843
   test:
     runs-on: ubuntu-latest
-    timeout-minutes: 30
-    needs: build
+    timeout-minutes: 10
     steps:
       - name: Check out repository
         uses: actions/checkout@v3
         with:
-          fetch-depth: 0
           submodules: true
 
       - name: Set up Docker Buildx
@@ -102,14 +99,15 @@ jobs:
       - id: test
         name: Run tests
         env:
-          PLAIN_OUTPUT: True
           CI: true
+          PLAIN_OUTPUT: True
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: |
           set +e
           test_output=$(
             docker run --env CI --env OPENAI_API_KEY --entrypoint python ${{ env.IMAGE_NAME }} -m \
-            pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing \
+            pytest -v --cov=autogpt --cov-branch --cov-report term-missing \
+              --numprocesses=4 --durations=10 \
               tests/unit tests/integration 2>&1
           )
           test_failure=$?
diff --git a/.gitignore b/.gitignore
index 1376ba5d..9695cf4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,7 +12,7 @@ last_run_ai_settings.yaml
 auto-gpt.json
 log.txt
 log-ingestion.txt
-logs
+/logs
 *.log
 *.mp3
 mem.sqlite3
diff --git a/BULLETIN.md b/BULLETIN.md
index 117a436a..a857a7ce 100644
--- a/BULLETIN.md
+++ b/BULLETIN.md
@@ -4,26 +4,23 @@
 📖 *User Guide*: https://docs.agpt.co.
 👩 *Contributors Wiki*: https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing.
 
-# v0.4.4 RELEASE HIGHLIGHTS! 🚀
+# v0.4.5 RELEASE HIGHLIGHTS! 🚀
 # -----------------------------
-## GPT-4 is back!
-Following OpenAI's recent GPT-4 GA announcement, the SMART_LLM .env setting 
-now defaults to GPT-4, and Auto-GPT will use GPT-4 by default in its main loop.
+This release includes under-the-hood improvements and bug fixes, such as more 
+accurate token counts for OpenAI functions, faster CI builds, improved plugin 
+handling, and refactoring of the Config class for better maintainability.
 
-### !! High Costs Warning !! 💰💀🚨
-GPT-4 costs ~20x more than GPT-3.5-turbo. 
-Please take note of this before using SMART_LLM. You can use `--gpt3only` 
-or `--gpt4only` to force the use of GPT-3.5-turbo or GPT-4, respectively, 
-at runtime.
+We have also released some documentation updates, including:
 
-## Re-arch v1 preview release!
-We've released a preview version of the re-arch code, under `autogpt/core`. 
-This is a major milestone for us, and we're excited to continue working on it. 
-We look forward to your feedback. Follow the process here: 
-https://github.com/Significant-Gravitas/Auto-GPT/issues/4770.
+- *How to share your system logs*
+  Visit [docs/share-your-logs.md] to learn how to how to share logs with us 
+  via a log analyzer graciously contributed by https://www.e2b.dev/
 
-## Other highlights
-Other fixes include plugins regressions, Azure config and security patches.
+- *Auto-GPT re-architecture documentation*
+  You can learn more about the inner-workings of the Auto-GPT re-architecture 
+  released last cycle, via these links:
+  * [autogpt/core/README.md]
+  * [autogpt/core/ARCHITECTURE_NOTES.md]
 
 Take a look at the Release Notes on Github for the full changelog! 
 https://github.com/Significant-Gravitas/Auto-GPT/releases.
diff --git a/autogpt/agent/__init__.py b/autogpt/agent/__init__.py
deleted file mode 100644
index e928af22..00000000
--- a/autogpt/agent/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from autogpt.agent.agent import Agent
-from autogpt.agent.agent_manager import AgentManager
-
-__all__ = ["Agent", "AgentManager"]
diff --git a/autogpt/agent/agent_manager.py b/autogpt/agent/agent_manager.py
deleted file mode 100644
index eaecbf3b..00000000
--- a/autogpt/agent/agent_manager.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""Agent manager for managing GPT agents"""
-from __future__ import annotations
-
-from autogpt.config import Config
-from autogpt.llm.base import ChatSequence
-from autogpt.llm.chat import Message, create_chat_completion
-from autogpt.singleton import Singleton
-
-
-class AgentManager(metaclass=Singleton):
-    """Agent manager for managing GPT agents"""
-
-    def __init__(self, config: Config):
-        self.next_key = 0
-        self.agents: dict[
-            int, tuple[str, list[Message], str]
-        ] = {}  # key, (task, full_message_history, model)
-        self.config = config
-
-    # Create new GPT agent
-    # TODO: Centralise use of create_chat_completion() to globally enforce token limit
-
-    def create_agent(
-        self, task: str, creation_prompt: str, model: str
-    ) -> tuple[int, str]:
-        """Create a new agent and return its key
-
-        Args:
-            task: The task to perform
-            creation_prompt: Prompt passed to the LLM at creation
-            model: The model to use to run this agent
-
-        Returns:
-            The key of the new agent
-        """
-        messages = ChatSequence.for_model(model, [Message("user", creation_prompt)])
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_pre_instruction():
-                continue
-            if plugin_messages := plugin.pre_instruction(messages.raw()):
-                messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
-        # Start GPT instance
-        agent_reply = create_chat_completion(
-            prompt=messages, config=self.config
-        ).content
-
-        messages.add("assistant", agent_reply)
-
-        plugins_reply = ""
-        for i, plugin in enumerate(self.config.plugins):
-            if not plugin.can_handle_on_instruction():
-                continue
-            if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
-                sep = "\n" if i else ""
-                plugins_reply = f"{plugins_reply}{sep}{plugin_result}"
-
-        if plugins_reply and plugins_reply != "":
-            messages.add("assistant", plugins_reply)
-        key = self.next_key
-        # This is done instead of len(agents) to make keys unique even if agents
-        # are deleted
-        self.next_key += 1
-
-        self.agents[key] = (task, list(messages), model)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_post_instruction():
-                continue
-            agent_reply = plugin.post_instruction(agent_reply)
-
-        return key, agent_reply
-
-    def message_agent(self, key: str | int, message: str) -> str:
-        """Send a message to an agent and return its response
-
-        Args:
-            key: The key of the agent to message
-            message: The message to send to the agent
-
-        Returns:
-            The agent's response
-        """
-        task, messages, model = self.agents[int(key)]
-
-        # Add user message to message history before sending to agent
-        messages = ChatSequence.for_model(model, messages)
-        messages.add("user", message)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_pre_instruction():
-                continue
-            if plugin_messages := plugin.pre_instruction([m.raw() for m in messages]):
-                messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
-
-        # Start GPT instance
-        agent_reply = create_chat_completion(
-            prompt=messages, config=self.config
-        ).content
-
-        messages.add("assistant", agent_reply)
-
-        plugins_reply = agent_reply
-        for i, plugin in enumerate(self.config.plugins):
-            if not plugin.can_handle_on_instruction():
-                continue
-            if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
-                sep = "\n" if i else ""
-                plugins_reply = f"{plugins_reply}{sep}{plugin_result}"
-        # Update full message history
-        if plugins_reply and plugins_reply != "":
-            messages.add("assistant", plugins_reply)
-
-        for plugin in self.config.plugins:
-            if not plugin.can_handle_post_instruction():
-                continue
-            agent_reply = plugin.post_instruction(agent_reply)
-
-        return agent_reply
-
-    def list_agents(self) -> list[tuple[str | int, str]]:
-        """Return a list of all agents
-
-        Returns:
-            A list of tuples of the form (key, task)
-        """
-
-        # Return a list of agent keys and their tasks
-        return [(key, task) for key, (task, _, _) in self.agents.items()]
-
-    def delete_agent(self, key: str | int) -> bool:
-        """Delete an agent from the agent manager
-
-        Args:
-            key: The key of the agent to delete
-
-        Returns:
-            True if successful, False otherwise
-        """
-
-        try:
-            del self.agents[int(key)]
-            return True
-        except KeyError:
-            return False
diff --git a/autogpt/agents/__init__.py b/autogpt/agents/__init__.py
new file mode 100644
index 00000000..a6df24ad
--- /dev/null
+++ b/autogpt/agents/__init__.py
@@ -0,0 +1,3 @@
+from .agent import Agent
+
+__all__ = ["Agent"]
diff --git a/autogpt/agent/agent.py b/autogpt/agents/agent.py
similarity index 79%
rename from autogpt/agent/agent.py
rename to autogpt/agents/agent.py
index 01a1b133..316cc4d4 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agents/agent.py
@@ -9,16 +9,19 @@ from colorama import Fore, Style
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
+from autogpt.llm import ChatModelResponse
 from autogpt.llm.chat import chat_with_ai
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
-from autogpt.log_cycle.log_cycle import (
+from autogpt.logs import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
+    logger,
+    print_assistant_thoughts,
+    remove_ansi_escape,
 )
-from autogpt.logs import logger, print_assistant_thoughts, remove_ansi_escape
 from autogpt.memory.message_history import MessageHistory
 from autogpt.memory.vector import VectorMemory
 from autogpt.models.command_registry import CommandRegistry
@@ -70,7 +73,7 @@ class Agent:
     ):
         self.ai_name = ai_name
         self.memory = memory
-        self.history = MessageHistory(self)
+        self.history = MessageHistory.for_model(config.smart_llm, agent=self)
         self.next_action_count = next_action_count
         self.command_registry = command_registry
         self.config = config
@@ -84,9 +87,6 @@ class Agent:
         self.smart_token_limit = OPEN_AI_CHAT_MODELS.get(config.smart_llm).max_tokens
 
     def start_interaction_loop(self):
-        # Avoid circular imports
-        from autogpt.app import execute_command, extract_command
-
         # Interaction Loop
         self.cycle_count = 0
         command_name = None
@@ -167,8 +167,6 @@ class Agent:
                     if self.config.speak_mode:
                         say_text(f"I want to execute {command_name}", self.config)
 
-                    arguments = self._resolve_pathlike_command_args(arguments)
-
                 except Exception as e:
                     logger.error("Error: \n", str(e))
             self.log_cycle_handler.log_cycle(
@@ -308,13 +306,93 @@ class Agent:
                     "SYSTEM: ", Fore.YELLOW, "Unable to execute command"
                 )
 
-    def _resolve_pathlike_command_args(self, command_args):
-        if "directory" in command_args and command_args["directory"] in {"", "/"}:
-            command_args["directory"] = str(self.workspace.root)
-        else:
-            for pathlike in ["filename", "directory", "clone_path"]:
-                if pathlike in command_args:
-                    command_args[pathlike] = str(
-                        self.workspace.get_path(command_args[pathlike])
-                    )
-        return command_args
+
+def extract_command(
+    assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
+):
+    """Parse the response and return the command name and arguments
+
+    Args:
+        assistant_reply_json (dict): The response object from the AI
+        assistant_reply (ChatModelResponse): The model response from the AI
+        config (Config): The config object
+
+    Returns:
+        tuple: The command name and arguments
+
+    Raises:
+        json.decoder.JSONDecodeError: If the response is not valid JSON
+
+        Exception: If any other error occurs
+    """
+    if config.openai_functions:
+        if assistant_reply.function_call is None:
+            return "Error:", "No 'function_call' in assistant reply"
+        assistant_reply_json["command"] = {
+            "name": assistant_reply.function_call.name,
+            "args": json.loads(assistant_reply.function_call.arguments),
+        }
+    try:
+        if "command" not in assistant_reply_json:
+            return "Error:", "Missing 'command' object in JSON"
+
+        if not isinstance(assistant_reply_json, dict):
+            return (
+                "Error:",
+                f"The previous message sent was not a dictionary {assistant_reply_json}",
+            )
+
+        command = assistant_reply_json["command"]
+        if not isinstance(command, dict):
+            return "Error:", "'command' object is not a dictionary"
+
+        if "name" not in command:
+            return "Error:", "Missing 'name' field in 'command' object"
+
+        command_name = command["name"]
+
+        # Use an empty dictionary if 'args' field is not present in 'command' object
+        arguments = command.get("args", {})
+
+        return command_name, arguments
+    except json.decoder.JSONDecodeError:
+        return "Error:", "Invalid JSON"
+    # All other errors, return "Error: + error message"
+    except Exception as e:
+        return "Error:", str(e)
+
+
+def execute_command(
+    command_name: str,
+    arguments: dict[str, str],
+    agent: Agent,
+):
+    """Execute the command and return the result
+
+    Args:
+        command_name (str): The name of the command to execute
+        arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
+
+    Returns:
+        str: The result of the command
+    """
+    try:
+        # Execute a native command with the same name or alias, if it exists
+        if command := agent.command_registry.get_command(command_name):
+            return command(**arguments, agent=agent)
+
+        # Handle non-native commands (e.g. from plugins)
+        for command in agent.ai_config.prompt_generator.commands:
+            if (
+                command_name == command["label"].lower()
+                or command_name == command["name"].lower()
+            ):
+                return command["function"](**arguments)
+
+        raise RuntimeError(
+            f"Cannot execute '{command_name}': unknown command."
+            " Do not try to use this command again."
+        )
+    except Exception as e:
+        return f"Error: {str(e)}"
diff --git a/autogpt/app.py b/autogpt/app.py
deleted file mode 100644
index ea5072f8..00000000
--- a/autogpt/app.py
+++ /dev/null
@@ -1,114 +0,0 @@
-""" Command and Control """
-import json
-from typing import Dict
-
-from autogpt.agent.agent import Agent
-from autogpt.config import Config
-from autogpt.llm import ChatModelResponse
-
-
-def is_valid_int(value: str) -> bool:
-    """Check if the value is a valid integer
-
-    Args:
-        value (str): The value to check
-
-    Returns:
-        bool: True if the value is a valid integer, False otherwise
-    """
-    try:
-        int(value)
-        return True
-    except ValueError:
-        return False
-
-
-def extract_command(
-    assistant_reply_json: Dict, assistant_reply: ChatModelResponse, config: Config
-):
-    """Parse the response and return the command name and arguments
-
-    Args:
-        assistant_reply_json (dict): The response object from the AI
-        assistant_reply (ChatModelResponse): The model response from the AI
-        config (Config): The config object
-
-    Returns:
-        tuple: The command name and arguments
-
-    Raises:
-        json.decoder.JSONDecodeError: If the response is not valid JSON
-
-        Exception: If any other error occurs
-    """
-    if config.openai_functions:
-        if assistant_reply.function_call is None:
-            return "Error:", "No 'function_call' in assistant reply"
-        assistant_reply_json["command"] = {
-            "name": assistant_reply.function_call.name,
-            "args": json.loads(assistant_reply.function_call.arguments),
-        }
-    try:
-        if "command" not in assistant_reply_json:
-            return "Error:", "Missing 'command' object in JSON"
-
-        if not isinstance(assistant_reply_json, dict):
-            return (
-                "Error:",
-                f"The previous message sent was not a dictionary {assistant_reply_json}",
-            )
-
-        command = assistant_reply_json["command"]
-        if not isinstance(command, dict):
-            return "Error:", "'command' object is not a dictionary"
-
-        if "name" not in command:
-            return "Error:", "Missing 'name' field in 'command' object"
-
-        command_name = command["name"]
-
-        # Use an empty dictionary if 'args' field is not present in 'command' object
-        arguments = command.get("args", {})
-
-        return command_name, arguments
-    except json.decoder.JSONDecodeError:
-        return "Error:", "Invalid JSON"
-    # All other errors, return "Error: + error message"
-    except Exception as e:
-        return "Error:", str(e)
-
-
-def execute_command(
-    command_name: str,
-    arguments: dict[str, str],
-    agent: Agent,
-):
-    """Execute the command and return the result
-
-    Args:
-        command_name (str): The name of the command to execute
-        arguments (dict): The arguments for the command
-        agent (Agent): The agent that is executing the command
-
-    Returns:
-        str: The result of the command
-    """
-    try:
-        # Execute a native command with the same name or alias, if it exists
-        if command := agent.command_registry.get_command(command_name):
-            return command(**arguments, agent=agent)
-
-        # Handle non-native commands (e.g. from plugins)
-        for command in agent.ai_config.prompt_generator.commands:
-            if (
-                command_name == command["label"].lower()
-                or command_name == command["name"].lower()
-            ):
-                return command["function"](**arguments)
-
-        raise RuntimeError(
-            f"Cannot execute '{command_name}': unknown command."
-            " Do not try to use this command again."
-        )
-    except Exception as e:
-        return f"Error: {str(e)}"
diff --git a/autogpt/commands/decorators.py b/autogpt/commands/decorators.py
new file mode 100644
index 00000000..b63c76d5
--- /dev/null
+++ b/autogpt/commands/decorators.py
@@ -0,0 +1,64 @@
+import functools
+from pathlib import Path
+from typing import Callable
+
+from autogpt.agents.agent import Agent
+from autogpt.logs import logger
+
+
+def sanitize_path_arg(arg_name: str):
+    def decorator(func: Callable):
+        # Get position of path parameter, in case it is passed as a positional argument
+        try:
+            arg_index = list(func.__annotations__.keys()).index(arg_name)
+        except ValueError:
+            raise TypeError(
+                f"Sanitized parameter '{arg_name}' absent or not annotated on function '{func.__name__}'"
+            )
+
+        # Get position of agent parameter, in case it is passed as a positional argument
+        try:
+            agent_arg_index = list(func.__annotations__.keys()).index("agent")
+        except ValueError:
+            raise TypeError(
+                f"Parameter 'agent' absent or not annotated on function '{func.__name__}'"
+            )
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            logger.debug(f"Sanitizing arg '{arg_name}' on function '{func.__name__}'")
+            logger.debug(f"Function annotations: {func.__annotations__}")
+
+            # Get Agent from the called function's arguments
+            agent = kwargs.get(
+                "agent", len(args) > agent_arg_index and args[agent_arg_index]
+            )
+            logger.debug(f"Args: {args}")
+            logger.debug(f"KWArgs: {kwargs}")
+            logger.debug(f"Agent argument lifted from function call: {agent}")
+            if not isinstance(agent, Agent):
+                raise RuntimeError("Could not get Agent from decorated command's args")
+
+            # Sanitize the specified path argument, if one is given
+            given_path: str | Path | None = kwargs.get(
+                arg_name, len(args) > arg_index and args[arg_index] or None
+            )
+            if given_path:
+                if given_path in {"", "/"}:
+                    sanitized_path = str(agent.workspace.root)
+                else:
+                    sanitized_path = str(agent.workspace.get_path(given_path))
+
+                if arg_name in kwargs:
+                    kwargs[arg_name] = sanitized_path
+                else:
+                    # args is an immutable tuple; must be converted to a list to update
+                    arg_list = list(args)
+                    arg_list[arg_index] = sanitized_path
+                    args = tuple(arg_list)
+
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index c11b56e2..2403b2ba 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -7,11 +7,13 @@ import docker
 from docker.errors import DockerException, ImageNotFound
 from docker.models.containers import Container as DockerContainer
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger
 
+from .decorators import sanitize_path_arg
+
 ALLOWLIST_CONTROL = "allowlist"
 DENYLIST_CONTROL = "denylist"
 
@@ -43,14 +45,14 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
     Returns:
         str: The STDOUT captured from the code when it ran
     """
-    ai_name = agent.ai_name
+    ai_name = agent.ai_config.ai_name
     code_dir = agent.workspace.get_path(Path(ai_name, "executed_code"))
     os.makedirs(code_dir, exist_ok=True)
 
     if not name.endswith(".py"):
         name = name + ".py"
 
-    # The `name` arg is not covered by Agent._resolve_pathlike_command_args(),
+    # The `name` arg is not covered by @sanitize_path_arg,
     # so sanitization must be done here to prevent path traversal.
     file_path = agent.workspace.get_path(code_dir / name)
     if not file_path.is_relative_to(code_dir):
@@ -76,6 +78,7 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
         },
     },
 )
+@sanitize_path_arg("filename")
 def execute_python_file(filename: str, agent: Agent) -> str:
     """Execute a Python file in a Docker container and return the output
 
@@ -100,6 +103,9 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         )
 
     if we_are_running_in_a_docker_container():
+        logger.debug(
+            f"Auto-GPT is running in a Docker container; executing {file_path} directly..."
+        )
         result = subprocess.run(
             ["python", str(file_path)],
             capture_output=True,
@@ -111,6 +117,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         else:
             return f"Error: {result.stderr}"
 
+    logger.debug("Auto-GPT is not running in a Docker container")
     try:
         client = docker.from_env()
         # You can replace this with the desired Python image/version
@@ -119,10 +126,10 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         image_name = "python:3-alpine"
         try:
             client.images.get(image_name)
-            logger.warn(f"Image '{image_name}' found locally")
+            logger.debug(f"Image '{image_name}' found locally")
         except ImageNotFound:
             logger.info(
-                f"Image '{image_name}' not found locally, pulling from Docker Hub"
+                f"Image '{image_name}' not found locally, pulling from Docker Hub..."
             )
             # Use the low-level API to stream the pull response
             low_level_client = docker.APIClient()
@@ -135,6 +142,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
                 elif status:
                     logger.info(status)
 
+        logger.debug(f"Running {file_path} in a {image_name} container...")
         container: DockerContainer = client.containers.run(
             image_name,
             ["python", str(file_path.relative_to(agent.workspace.root))],
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 1d044822..939b7dc1 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -1,20 +1,21 @@
 """File operations for AutoGPT"""
 from __future__ import annotations
 
+import contextlib
 import hashlib
 import os
 import os.path
+from pathlib import Path
 from typing import Generator, Literal
 
-from confection import Config
-
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
-from autogpt.commands.file_operations_utils import read_textual_file
-from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
 
+from .decorators import sanitize_path_arg
+from .file_operations_utils import read_textual_file
+
 Operation = Literal["write", "append", "delete"]
 
 
@@ -74,21 +75,26 @@ def file_operations_state(log_path: str) -> dict[str, str]:
     return state
 
 
+@sanitize_path_arg("filename")
 def is_duplicate_operation(
-    operation: Operation, filename: str, config: Config, checksum: str | None = None
+    operation: Operation, filename: str, agent: Agent, checksum: str | None = None
 ) -> bool:
     """Check if the operation has already been performed
 
     Args:
         operation: The operation to check for
         filename: The name of the file to check for
-        config: The agent config
+        agent: The agent
         checksum: The checksum of the contents to be written
 
     Returns:
         True if the operation has already been performed on the file
     """
-    state = file_operations_state(config.file_logger_path)
+    # Make the filename into a relative path if possible
+    with contextlib.suppress(ValueError):
+        filename = str(Path(filename).relative_to(agent.workspace.root))
+
+    state = file_operations_state(agent.config.file_logger_path)
     if operation == "delete" and filename not in state:
         return True
     if operation == "write" and state.get(filename) == checksum:
@@ -96,8 +102,9 @@ def is_duplicate_operation(
     return False
 
 
+@sanitize_path_arg("filename")
 def log_operation(
-    operation: str, filename: str, agent: Agent, checksum: str | None = None
+    operation: Operation, filename: str, agent: Agent, checksum: str | None = None
 ) -> None:
     """Log the file operation to the file_logger.txt
 
@@ -106,6 +113,10 @@ def log_operation(
         filename: The name of the file the operation was performed on
         checksum: The checksum of the contents to be written
     """
+    # Make the filename into a relative path if possible
+    with contextlib.suppress(ValueError):
+        filename = str(Path(filename).relative_to(agent.workspace.root))
+
     log_entry = f"{operation}: {filename}"
     if checksum is not None:
         log_entry += f" #{checksum}"
@@ -126,6 +137,7 @@ def log_operation(
         }
     },
 )
+@sanitize_path_arg("filename")
 def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
 
@@ -191,6 +203,7 @@ def ingest_file(
     },
     aliases=["write_file", "create_file"],
 )
+@sanitize_path_arg("filename")
 def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """Write text to a file
 
@@ -202,7 +215,7 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
         str: A message indicating success or failure
     """
     checksum = text_checksum(text)
-    if is_duplicate_operation("write", filename, agent.config, checksum):
+    if is_duplicate_operation("write", filename, agent, checksum):
         return "Error: File has already been updated."
     try:
         directory = os.path.dirname(filename)
@@ -231,6 +244,7 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
         },
     },
 )
+@sanitize_path_arg("filename")
 def append_to_file(
     filename: str, text: str, agent: Agent, should_log: bool = True
 ) -> str:
@@ -271,6 +285,7 @@ def append_to_file(
         }
     },
 )
+@sanitize_path_arg("filename")
 def delete_file(filename: str, agent: Agent) -> str:
     """Delete a file
 
@@ -280,7 +295,7 @@ def delete_file(filename: str, agent: Agent) -> str:
     Returns:
         str: A message indicating success or failure
     """
-    if is_duplicate_operation("delete", filename, agent.config):
+    if is_duplicate_operation("delete", filename, agent):
         return "Error: File has already been deleted."
     try:
         os.remove(filename)
@@ -301,6 +316,7 @@ def delete_file(filename: str, agent: Agent) -> str:
         }
     },
 )
+@sanitize_path_arg("directory")
 def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index fc967e40..021157fb 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -2,10 +2,12 @@
 
 from git.repo import Repo
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.url_utils.validators import validate_url
 
+from .decorators import sanitize_path_arg
+
 
 @command(
     "clone_repository",
@@ -22,9 +24,10 @@ from autogpt.url_utils.validators import validate_url
             "required": True,
         },
     },
-    lambda config: config.github_username and config.github_api_key,
+    lambda config: bool(config.github_username and config.github_api_key),
     "Configure github_username and github_api_key.",
 )
+@sanitize_path_arg("clone_path")
 @validate_url
 def clone_repository(url: str, clone_path: str, agent: Agent) -> str:
     """Clone a GitHub repository locally.
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index c295392c..abae6149 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -9,7 +9,7 @@ import openai
 import requests
 from PIL import Image
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 
@@ -24,7 +24,7 @@ from autogpt.logs import logger
             "required": True,
         },
     },
-    lambda config: config.image_provider,
+    lambda config: bool(config.image_provider),
     "Requires a image provider to be set.",
 )
 def generate_image(prompt: str, agent: Agent, size: int = 256) -> str:
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index 062ebe3a..34908928 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 
 from typing import NoReturn
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 
diff --git a/autogpt/commands/web_search.py b/autogpt/commands/web_search.py
index d47d680b..9ea0d206 100644
--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -7,7 +7,7 @@ from itertools import islice
 
 from duckduckgo_search import DDGS
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 
 DUCKDUCKGO_MAX_ATTEMPTS = 3
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 821957f3..948d799e 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -27,7 +27,7 @@ from webdriver_manager.chrome import ChromeDriverManager
 from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index b1ff0a0a..b41ff68a 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -4,87 +4,145 @@ from __future__ import annotations
 import contextlib
 import os
 import re
-from typing import Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 
 import yaml
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from colorama import Fore
+from pydantic import Field, validator
 
 from autogpt.core.configuration.schema import Configurable, SystemSettings
 from autogpt.plugins.plugins_config import PluginsConfig
 
 AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "../..", "azure.yaml")
+PLUGINS_CONFIG_FILE = os.path.join(
+    os.path.dirname(__file__), "../..", "plugins_config.yaml"
+)
 GPT_4_MODEL = "gpt-4"
 GPT_3_MODEL = "gpt-3.5-turbo"
 
 
-class Config(SystemSettings):
-    fast_llm: str
-    smart_llm: str
-    continuous_mode: bool
-    skip_news: bool
+class Config(SystemSettings, arbitrary_types_allowed=True):
+    name: str = "Auto-GPT configuration"
+    description: str = "Default configuration for the Auto-GPT application."
+    ########################
+    # Application Settings #
+    ########################
+    skip_news: bool = False
+    skip_reprompt: bool = False
+    authorise_key: str = "y"
+    exit_key: str = "n"
+    debug_mode: bool = False
+    plain_output: bool = False
+    chat_messages_enabled: bool = True
+    # TTS configuration
+    speak_mode: bool = False
+    text_to_speech_provider: str = "gtts"
+    streamelements_voice: str = "Brian"
+    elevenlabs_voice_id: Optional[str] = None
+
+    ##########################
+    # Agent Control Settings #
+    ##########################
+    # Paths
+    ai_settings_file: str = "ai_settings.yaml"
+    prompt_settings_file: str = "prompt_settings.yaml"
     workspace_path: Optional[str] = None
     file_logger_path: Optional[str] = None
-    debug_mode: bool
-    plugins_dir: str
-    plugins_config: PluginsConfig
-    continuous_limit: int
-    speak_mode: bool
-    skip_reprompt: bool
-    allow_downloads: bool
-    exit_key: str
-    plain_output: bool
-    disabled_command_categories: list[str]
-    shell_command_control: str
-    shell_denylist: list[str]
-    shell_allowlist: list[str]
-    ai_settings_file: str
-    prompt_settings_file: str
-    embedding_model: str
-    browse_spacy_language_model: str
+    # Model configuration
+    fast_llm: str = "gpt-3.5-turbo"
+    smart_llm: str = "gpt-4"
+    temperature: float = 0
+    openai_functions: bool = False
+    embedding_model: str = "text-embedding-ada-002"
+    browse_spacy_language_model: str = "en_core_web_sm"
+    # Run loop configuration
+    continuous_mode: bool = False
+    continuous_limit: int = 0
+
+    ##########
+    # Memory #
+    ##########
+    memory_backend: str = "json_file"
+    memory_index: str = "auto-gpt-memory"
+    redis_host: str = "localhost"
+    redis_port: int = 6379
+    redis_password: str = ""
+    wipe_redis_on_start: bool = True
+
+    ############
+    # Commands #
+    ############
+    # General
+    disabled_command_categories: list[str] = Field(default_factory=list)
+    # File ops
+    restrict_to_workspace: bool = True
+    allow_downloads: bool = False
+    # Shell commands
+    shell_command_control: str = "denylist"
+    execute_local_commands: bool = False
+    shell_denylist: list[str] = Field(default_factory=lambda: ["sudo", "su"])
+    shell_allowlist: list[str] = Field(default_factory=list)
+    # Text to image
+    image_provider: Optional[str] = None
+    huggingface_image_model: str = "CompVis/stable-diffusion-v1-4"
+    sd_webui_url: Optional[str] = "http://localhost:7860"
+    image_size: int = 256
+    # Audio to text
+    audio_to_text_provider: str = "huggingface"
+    huggingface_audio_to_text_model: Optional[str] = None
+    # Web browsing
+    selenium_web_browser: str = "chrome"
+    selenium_headless: bool = True
+    user_agent: str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
+
+    ###################
+    # Plugin Settings #
+    ###################
+    plugins_dir: str = "plugins"
+    plugins_config_file: str = PLUGINS_CONFIG_FILE
+    plugins_config: PluginsConfig = Field(
+        default_factory=lambda: PluginsConfig(plugins={})
+    )
+    plugins: list[AutoGPTPluginTemplate] = Field(default_factory=list, exclude=True)
+    plugins_allowlist: list[str] = Field(default_factory=list)
+    plugins_denylist: list[str] = Field(default_factory=list)
+    plugins_openai: list[str] = Field(default_factory=list)
+
+    ###############
+    # Credentials #
+    ###############
+    # OpenAI
     openai_api_key: Optional[str] = None
-    openai_organization: Optional[str] = None
-    temperature: float
-    use_azure: bool
-    azure_config_file: Optional[str] = None
-    azure_model_to_deployment_id_map: Optional[Dict[str, str]] = None
-    execute_local_commands: bool
-    restrict_to_workspace: bool
     openai_api_type: Optional[str] = None
     openai_api_base: Optional[str] = None
     openai_api_version: Optional[str] = None
-    openai_functions: bool
+    openai_organization: Optional[str] = None
+    use_azure: bool = False
+    azure_config_file: Optional[str] = AZURE_CONFIG_FILE
+    azure_model_to_deployment_id_map: Optional[Dict[str, str]] = None
+    # Elevenlabs
     elevenlabs_api_key: Optional[str] = None
-    streamelements_voice: str
-    text_to_speech_provider: str
+    # Github
     github_api_key: Optional[str] = None
     github_username: Optional[str] = None
+    # Google
     google_api_key: Optional[str] = None
     google_custom_search_engine_id: Optional[str] = None
-    image_provider: Optional[str] = None
-    image_size: int
+    # Huggingface
     huggingface_api_token: Optional[str] = None
-    huggingface_image_model: str
-    audio_to_text_provider: str
-    huggingface_audio_to_text_model: Optional[str] = None
-    sd_webui_url: Optional[str] = None
+    # Stable Diffusion
     sd_webui_auth: Optional[str] = None
-    selenium_web_browser: str
-    selenium_headless: bool
-    user_agent: str
-    memory_backend: str
-    memory_index: str
-    redis_host: str
-    redis_port: int
-    redis_password: str
-    wipe_redis_on_start: bool
-    plugins_allowlist: list[str]
-    plugins_denylist: list[str]
-    plugins_openai: list[str]
-    plugins_config_file: str
-    chat_messages_enabled: bool
-    elevenlabs_voice_id: Optional[str] = None
-    plugins: list[str]
-    authorise_key: str
+
+    @validator("plugins", each_item=True)
+    def validate_plugins(cls, p: AutoGPTPluginTemplate | Any):
+        assert issubclass(
+            p.__class__, AutoGPTPluginTemplate
+        ), f"{p} does not subclass AutoGPTPluginTemplate"
+        assert (
+            p.__class__.__name__ != "AutoGPTPluginTemplate"
+        ), f"Plugins must subclass AutoGPTPluginTemplate; {p} is a template instance"
+        return p
 
     def get_openai_credentials(self, model: str) -> dict[str, str]:
         credentials = {
@@ -149,73 +207,7 @@ class Config(SystemSettings):
 
 
 class ConfigBuilder(Configurable[Config]):
-    default_plugins_config_file = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "..", "..", "plugins_config.yaml"
-    )
-
-    elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
-    if os.getenv("USE_MAC_OS_TTS"):
-        default_tts_provider = "macos"
-    elif elevenlabs_api_key:
-        default_tts_provider = "elevenlabs"
-    elif os.getenv("USE_BRIAN_TTS"):
-        default_tts_provider = "streamelements"
-    else:
-        default_tts_provider = "gtts"
-
-    default_settings = Config(
-        name="Default Server Config",
-        description="This is a default server configuration",
-        smart_llm="gpt-4",
-        fast_llm="gpt-3.5-turbo",
-        continuous_mode=False,
-        continuous_limit=0,
-        skip_news=False,
-        debug_mode=False,
-        plugins_dir="plugins",
-        plugins_config=PluginsConfig(plugins={}),
-        speak_mode=False,
-        skip_reprompt=False,
-        allow_downloads=False,
-        exit_key="n",
-        plain_output=False,
-        disabled_command_categories=[],
-        shell_command_control="denylist",
-        shell_denylist=["sudo", "su"],
-        shell_allowlist=[],
-        ai_settings_file="ai_settings.yaml",
-        prompt_settings_file="prompt_settings.yaml",
-        embedding_model="text-embedding-ada-002",
-        browse_spacy_language_model="en_core_web_sm",
-        temperature=0,
-        use_azure=False,
-        azure_config_file=AZURE_CONFIG_FILE,
-        execute_local_commands=False,
-        restrict_to_workspace=True,
-        openai_functions=False,
-        streamelements_voice="Brian",
-        text_to_speech_provider=default_tts_provider,
-        image_size=256,
-        huggingface_image_model="CompVis/stable-diffusion-v1-4",
-        audio_to_text_provider="huggingface",
-        sd_webui_url="http://localhost:7860",
-        selenium_web_browser="chrome",
-        selenium_headless=True,
-        user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
-        memory_backend="json_file",
-        memory_index="auto-gpt-memory",
-        redis_host="localhost",
-        redis_port=6379,
-        wipe_redis_on_start=True,
-        plugins_allowlist=[],
-        plugins_denylist=[],
-        plugins_openai=[],
-        plugins_config_file=default_plugins_config_file,
-        chat_messages_enabled=True,
-        plugins=[],
-        authorise_key="y",
-        redis_password="",
-    )
+    default_settings = Config()
 
     @classmethod
     def build_config_from_env(cls) -> Config:
@@ -285,14 +277,19 @@ class ConfigBuilder(Configurable[Config]):
         config_dict["elevenlabs_voice_id"] = os.getenv(
             "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
         )
+        elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
+        if os.getenv("USE_MAC_OS_TTS"):
+            default_tts_provider = "macos"
+        elif elevenlabs_api_key:
+            default_tts_provider = "elevenlabs"
+        elif os.getenv("USE_BRIAN_TTS"):
+            default_tts_provider = "streamelements"
+        else:
+            default_tts_provider = "gtts"
+        config_dict["text_to_speech_provider"] = default_tts_provider
 
         config_dict["plugins_allowlist"] = _safe_split(os.getenv("ALLOWLISTED_PLUGINS"))
         config_dict["plugins_denylist"] = _safe_split(os.getenv("DENYLISTED_PLUGINS"))
-        config_dict["plugins_config"] = PluginsConfig.load_config(
-            config_dict["plugins_config_file"],
-            config_dict["plugins_denylist"],
-            config_dict["plugins_allowlist"],
-        )
 
         with contextlib.suppress(TypeError):
             config_dict["image_size"] = int(os.getenv("IMAGE_SIZE"))
@@ -316,7 +313,17 @@ class ConfigBuilder(Configurable[Config]):
             k: v for k, v in config_dict.items() if v is not None
         }
 
-        return cls.build_agent_configuration(config_dict_without_none_values)
+        config = cls.build_agent_configuration(config_dict_without_none_values)
+
+        # Set secondary config variables (that depend on other config variables)
+
+        config.plugins_config = PluginsConfig.load_config(
+            config.plugins_config_file,
+            config.plugins_denylist,
+            config.plugins_allowlist,
+        )
+
+        return config
 
     @classmethod
     def load_azure_config(cls, config_file: str = AZURE_CONFIG_FILE) -> Dict[str, str]:
@@ -365,7 +372,7 @@ def check_openai_api_key(config: Config) -> None:
             print(
                 Fore.GREEN
                 + "OpenAI API key successfully set!\n"
-                + Fore.ORANGE
+                + Fore.YELLOW
                 + "NOTE: The API key you've set is only temporary.\n"
                 + "For longer sessions, please set it in .env file"
                 + Fore.RESET
diff --git a/autogpt/core/ARCHITECTURE_NOTES.md b/autogpt/core/ARCHITECTURE_NOTES.md
new file mode 100644
index 00000000..b9fd2021
--- /dev/null
+++ b/autogpt/core/ARCHITECTURE_NOTES.md
@@ -0,0 +1,272 @@
+# Re-architecture Notes
+
+## Key Documents
+
+- [Planned Agent Workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ)
+- [Original Architecture Diagram](https://www.figma.com/file/fwdj44tPR7ArYtnGGUKknw/Modular-Architecture?type=whiteboard&node-id=0-1) - This is sadly well out of date at this point.
+- [Kanban](https://github.com/orgs/Significant-Gravitas/projects/1/views/1?filterQuery=label%3Are-arch)
+
+## The Motivation
+
+The `master` branch of Auto-GPT is an organically grown amalgamation of many thoughts 
+and ideas about agent-driven autonomous systems.  It lacks clear abstraction boundaries, 
+has issues of global state and poorly encapsulated state, and is generally just hard to 
+make effective changes to.  Mainly it's just a system that's hard to make changes to.  
+And research in the field is moving fast, so we want to be able to try new ideas 
+quickly.  
+
+## Initial Planning
+
+A large group of maintainers and contributors met do discuss the architectural 
+challenges associated with the existing codebase. Many much-desired features (building 
+new user interfaces, enabling project-specific agents, enabling multi-agent systems) 
+are bottlenecked by the global state in the system. We discussed the tradeoffs between 
+an incremental system transition and a big breaking version change and decided to go 
+for the breaking version change. We justified this by saying:
+
+- We can maintain, in essence, the same user experience as now even with a radical 
+  restructuring of the codebase
+- Our developer audience is struggling to use the existing codebase to build 
+  applications and libraries of their own, so this breaking change will largely be 
+  welcome.
+
+## Primary Goals
+
+- Separate the AutoGPT application code from the library code.
+- Remove global state from the system
+- Allow for multiple agents per user (with facilities for running simultaneously)
+- Create a serializable representation of an Agent
+- Encapsulate the core systems in abstractions with clear boundaries.
+
+## Secondary goals
+
+- Use existing tools to ditch any unneccesary cruft in the codebase (document loading, 
+  json parsing, anything easier to replace than to port).
+- Bring in the [core agent loop updates](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ)
+  being developed simultaneously by @Pwuts 
+
+# The Agent Subsystems
+
+## Configuration
+
+We want a lot of things from a configuration system. We lean heavily on it in the 
+`master` branch to allow several parts of the system to communicate with each other.  
+[Recent work](https://github.com/Significant-Gravitas/Auto-GPT/pull/4737) has made it 
+so that the config is no longer a singleton object that is materialized from the import 
+state, but it's still treated as a 
+[god object](https://en.wikipedia.org/wiki/God_object) containing all information about
+the system and _critically_ allowing any system to reference configuration information 
+about other parts of the system.  
+
+### What we want
+
+- It should still be reasonable to collate the entire system configuration in a 
+  sensible way.
+- The configuration should be validatable and validated.
+- The system configuration should be a _serializable_ representation of an `Agent`.
+- The configuration system should provide a clear (albeit very low-level) contract 
+  about user-configurable aspects of the system.
+- The configuration should reasonably manage default values and user-provided overrides.
+- The configuration system needs to handle credentials in a reasonable way.
+- The configuration should be the representation of some amount of system state, like 
+  api budgets and resource usage.  These aspects are recorded in the configuration and 
+  updated by the system itself.
+- Agent systems should have encapsulated views of the configuration.  E.g. the memory 
+  system should know about memory configuration but nothing about command configuration.
+
+## Workspace
+
+There are two ways to think about the workspace:
+
+- The workspace is a scratch space for an agent where it can store files, write code, 
+  and do pretty much whatever else it likes.
+- The workspace is, at any given point in time, the single source of truth for what an 
+  agent is.  It contains the serializable state (the configuration) as well as all 
+  other working state (stored files, databases, memories, custom code).  
+
+In the existing system there is **one** workspace.  And because the workspace holds so 
+much agent state, that means a user can only work with one agent at a time.
+
+## Memory
+
+The memory system has been under extremely active development. 
+See [#3536](https://github.com/Significant-Gravitas/Auto-GPT/issues/3536) and 
+[#4208](https://github.com/Significant-Gravitas/Auto-GPT/pull/4208) for discussion and 
+work in the `master` branch.  The TL;DR is 
+that we noticed a couple of months ago that the `Agent` performed **worse** with 
+permanent memory than without it.  Since then the knowledge storage and retrieval 
+system has been [redesigned](https://whimsical.com/memory-system-8Ae6x6QkjDwQAUe9eVJ6w1) 
+and partially implemented in the `master` branch.
+
+## Planning/Prompt-Engineering
+
+The planning system is the system that translates user desires/agent intentions into
+language model prompts.  In the course of development, it has become pretty clear 
+that `Planning` is the wrong name for this system
+
+### What we want
+
+- It should be incredibly obvious what's being passed to a language model, when it's
+  being passed, and what the language model response is. The landscape of language 
+  model research is developing very rapidly, so building complex abstractions between 
+  users/contributors and the language model interactions is going to make it very 
+  difficult for us to nimbly respond to new research developments.
+- Prompt-engineering should ideally be exposed in a parameterizeable way to users. 
+- We should, where possible, leverage OpenAI's new  
+  [function calling api](https://openai.com/blog/function-calling-and-other-api-updates) 
+  to get outputs in a standard machine-readable format and avoid the deep pit of 
+  parsing json (and fixing unparsable json).
+
+### Planning Strategies
+
+The [new agent workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ) 
+has many, many interaction points for language models.  We really would like to not 
+distribute prompt templates and raw strings all through the system. The re-arch solution 
+is to encapsulate language model interactions into planning strategies. 
+These strategies are defined by 
+
+- The `LanguageModelClassification` they use (`FAST` or `SMART`)
+- A function `build_prompt` that takes strategy specific arguments and constructs a 
+  `LanguageModelPrompt` (a simple container for lists of messages and functions to
+  pass to the language model)
+- A function `parse_content` that parses the response content (a dict) into a better 
+  formatted dict.  Contracts here are intentionally loose and will tighten once we have 
+  at least one other language model provider.
+
+## Resources
+
+Resources are kinds of services we consume from external APIs.  They may have associated 
+credentials and costs we need to manage.  Management of those credentials is implemented 
+as manipulation of the resource configuration.  We have two categories of resources 
+currently
+
+- AI/ML model providers (including language model providers and embedding model providers, ie OpenAI)
+- Memory providers (e.g. Pinecone, Weaviate, ChromaDB, etc.)
+
+### What we want
+
+- Resource abstractions should provide a common interface to different service providers 
+  for a particular kind of service.  
+- Resource abstractions should manipulate the configuration to manage their credentials 
+  and budget/accounting.
+- Resource abstractions should be composable over an API (e.g. I should be able to make 
+  an OpenAI provider that is both a LanguageModelProvider and an EmbeddingModelProvider
+  and use it wherever I need those services).
+
+## Abilities
+
+Along with planning and memory usage, abilities are one of the major augmentations of 
+augmented language models.  They allow us to expand the scope of what language models
+can do by hooking them up to code they can execute to obtain new knowledge or influence
+the world.  
+
+### What we want
+
+- Abilities should have an extremely clear interface that users can write to.
+- Abilities should have an extremely clear interface that a language model can 
+  understand
+- Abilities should be declarative about their dependencies so the system can inject them
+- Abilities should be executable (where sensible) in an async run loop.
+- Abilities should be not have side effects unless those side effects are clear in 
+  their representation to an agent (e.g. the BrowseWeb ability shouldn't write a file,
+  but the WriteFile ability can).
+
+## Plugins
+
+Users want to add lots of features that we don't want to support as first-party. 
+Or solution to this is a plugin system to allow users to plug in their functionality or
+to construct their agent from a public plugin marketplace.  Our primary concern in the
+re-arch is to build a stateless plugin service interface and a simple implementation 
+that can load plugins from installed packages or from zip files.  Future efforts will 
+expand this system to allow plugins to load from a marketplace or some other kind 
+of service.
+
+### What is a Plugin
+
+Plugins are a kind of garbage term.  They refer to a number of things.
+
+- New commands for the agent to execute.  This is the most common usage.
+- Replacements for entire subsystems like memory or language model providers
+- Application plugins that do things like send emails or communicate via whatsapp
+- The repositories contributors create that may themselves have multiple plugins in them.
+
+### Usage in the existing system
+
+The current plugin system is _hook-based_.  This means plugins don't correspond to 
+kinds of objects in the system, but rather to times in the system at which we defer 
+execution to them.  The main advantage of this setup is that user code can hijack 
+pretty much any behavior of the agent by injecting code that supercedes the normal 
+agent execution.  The disadvantages to this approach are numerous:
+
+- We have absolutely no mechanisms to enforce any security measures because the threat 
+  surface is everything.
+- We cannot reason about agent behavior in a cohesive way because control flow can be
+  ceded to user code at pretty much any point and arbitrarily change or break the
+  agent behavior
+- The interface for designing a plugin is kind of terrible and difficult to standardize
+- The hook based implementation means we couple ourselves to a particular flow of 
+  control (or otherwise risk breaking plugin behavior).  E.g. many of the hook targets
+  in the [old workflow](https://whimsical.com/agent-workflow-VAzeKcup3SR7awpNZJKTyK) 
+  are not present or mean something entirely different in the 
+  [new workflow](https://whimsical.com/agent-workflow-v2-NmnTQ8R7sVo7M3S43XgXmZ).
+- Etc.
+
+### What we want
+
+- A concrete definition of a plugin that is narrow enough in scope that we can define 
+  it well and reason about how it will work in the system.
+- A set of abstractions that let us define a plugin by its storage format and location 
+- A service interface that knows how to parse the plugin abstractions and turn them 
+  into concrete classes and objects.
+
+
+## Some Notes on how and why we'll use OO in this project
+
+First and foremost, Python itself is an object-oriented language. It's 
+underlying [data model](https://docs.python.org/3/reference/datamodel.html) is built 
+with object-oriented programming in mind. It offers useful tools like abstract base 
+classes to communicate interfaces to developers who want to, e.g., write plugins, or 
+help work on implementations. If we were working in a different language that offered 
+different tools, we'd use a different paradigm.
+
+While many things are classes in the re-arch, they are not classes in the same way. 
+There are three kinds of things (roughly) that are written as classes in the re-arch:
+1.  **Configuration**:  Auto-GPT has *a lot* of configuration.  This configuration 
+    is *data* and we use **[Pydantic](https://docs.pydantic.dev/latest/)** to manage it as 
+    pydantic is basically industry standard for this stuff. It provides runtime validation 
+    for all the configuration and allows us to easily serialize configuration to both basic 
+    python types (dicts, lists, and primatives) as well as serialize to json, which is 
+    important for us being able to put representations of agents 
+    [on the wire](https://en.wikipedia.org/wiki/Wire_protocol) for web applications and 
+    agent-to-agent communication. *These are essentially 
+    [structs](https://en.wikipedia.org/wiki/Struct_(C_programming_language)) rather than 
+    traditional classes.*
+2.  **Internal Data**: Very similar to configuration, Auto-GPT passes around boatloads 
+    of internal data.  We are interacting with language models and language model APIs 
+    which means we are handling lots of *structured* but *raw* text.  Here we also 
+    leverage **pydantic** to both *parse* and *validate* the internal data and also to 
+    give us concrete types which we can use static type checkers to validate against 
+    and discover problems before they show up as bugs at runtime. *These are 
+    essentially [structs](https://en.wikipedia.org/wiki/Struct_(C_programming_language)) 
+    rather than traditional classes.*
+3.  **System Interfaces**: This is our primary traditional use of classes in the 
+    re-arch.  We have a bunch of systems. We want many of those systems to have 
+    alternative implementations (e.g. via plugins). We use abstract base classes to 
+    define interfaces to communicate with people who might want to provide those 
+    plugins. We provide a single concrete implementation of most of those systems as a 
+    subclass of the interface. This should not be controversial.
+
+The approach is consistent with 
+[prior](https://github.com/Significant-Gravitas/Auto-GPT/issues/2458)
+[work](https://github.com/Significant-Gravitas/Auto-GPT/pull/2442) done by other 
+maintainers in this direction.
+
+From an organization standpoint, OO programming is by far the most popular programming 
+paradigm (especially for Python). It's the one most often taught in programming classes
+and the one with the most available online training for people interested in 
+contributing.   
+
+Finally, and importantly, we scoped the plan and initial design of the re-arch as a 
+large group of maintainers and collaborators early on. This is consistent with the 
+design we chose and no-one offered alternatives.
+ 
\ No newline at end of file
diff --git a/autogpt/core/README.md b/autogpt/core/README.md
index f7bdf2d7..e5bbc108 100644
--- a/autogpt/core/README.md
+++ b/autogpt/core/README.md
@@ -1,7 +1,33 @@
-# Run instructions
+# Auto-GPT Core
+
+This subpackage contains the ongoing work for the 
+[Auto-GPT Re-arch](https://github.com/Significant-Gravitas/Auto-GPT/issues/4770). It is 
+a work in progress and is not yet feature complete.  In particular, it does not yet
+have many of the Auto-GPT commands implemented and is pending ongoing work to 
+[re-incorporate vector-based memory and knowledge retrieval](https://github.com/Significant-Gravitas/Auto-GPT/issues/3536).
+
+## [Overview](ARCHITECTURE_NOTES.md)
+
+The Auto-GPT Re-arch is a re-implementation of the Auto-GPT agent that is designed to be more modular,
+more extensible, and more maintainable than the original Auto-GPT agent.  It is also designed to be
+more accessible to new developers and to be easier to contribute to. The re-arch is a work in progress
+and is not yet feature complete.  It is also not yet ready for production use.
+
+## Running the Re-arch Code
 
 There are two client applications for Auto-GPT included. 
 
+Unlike the main version of Auto-GPT, the re-arch requires you to actually install Auto-GPT in your python 
+environment to run this application.  To do so, run
+
+```
+pip install -e REPOSITORY_ROOT
+```
+
+where `REPOSITORY_ROOT` is the root of the Auto-GPT repository on your machine. The `REPOSITORY_ROOT` 
+is the directory that contains the `setup.py` file and is the main, top-level directory of the repository 
+when you clone it.
+
 ## CLI Application
 
 :star2: **This is the reference application I'm working with for now** :star2: 
@@ -11,21 +37,23 @@ The first app is a straight CLI application.  I have not done anything yet to po
 - [Entry Point](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_app/cli.py)
 - [Client Application](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_app/main.py)
 
-Auto-GPT must be installed in your python environment to run this application.  To do so, run
-
-```
-pip install -e REPOSITORY_ROOT
-```
-
-where `REPOSITORY_ROOT` is the root of the Auto-GPT repository on your machine.
-
 You'll then need a settings file.  Run
 
 ```
  python REPOSITORY_ROOT/autogpt/core/runner/cli_app/cli.py make-settings
  ```
 
-This will write a file called `default_agent_settings.yaml` with all the user-modifiable configuration keys to `~/auto-gpt/default_agent_settings.yml` and make the `auto-gpt` directory in your user directory if it doesn't exist).  At a bare minimum, you'll need to set `openai.credentials.api_key` to your OpenAI API Key to run the model.
+This will write a file called `default_agent_settings.yaml` with all the user-modifiable 
+configuration keys to `~/auto-gpt/default_agent_settings.yml` and make the `auto-gpt` directory 
+in your user directory if it doesn't exist). Your user directory is located in different places 
+depending on your operating system:
+
+- On Linux, it's `/home/USERNAME`
+- On Windows, it's `C:\Users\USERNAME`
+- On Mac, it's `/Users/USERNAME`
+
+At a bare minimum, you'll need to set `openai.credentials.api_key` to your OpenAI API Key to run 
+the model.
 
 You can then run Auto-GPT with 
 
@@ -35,9 +63,15 @@ python REPOSITORY_ROOT/autogpt/core/runner/cli_app/cli.py run
 
 to launch the interaction loop.
 
-## CLI Web App
+### CLI Web App
 
-The second app is still a CLI, but it sets up a local webserver that the client application talks to rather than invoking calls to the Agent library code directly.  This application is essentially a sketch at this point as the folks who were driving it have had less time (and likely not enough clarity) to proceed.
+:warning: I am not actively developing this application.  I am primarily working with the traditional CLI app
+described above.  It is a very good place to get involved if you have web application design experience and are 
+looking to get involved in the re-arch.
+
+The second app is still a CLI, but it sets up a local webserver that the client application talks to
+rather than invoking calls to the Agent library code directly.  This application is essentially a sketch 
+at this point as the folks who were driving it have had less time (and likely not enough clarity) to proceed.
 
 - [Entry Point](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_web_app/cli.py)
 - [Client Application](https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/core/runner/cli_web_app/client/client.py)
@@ -58,5 +92,3 @@ python REPOSITORY_ROOT/autogpt/core/runner/cli_web_app/cli.py client
 ```
 
 This will launch a webserver and then start the client cli application to communicate with it.
-
-:warning: I am not actively developing this application.  It is a very good place to get involved if you have web application design experience and are looking to get involved in the re-arch.
\ No newline at end of file
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 4ff80dc7..14a146b3 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
+from copy import deepcopy
 from dataclasses import dataclass, field
 from math import ceil, floor
-from typing import TYPE_CHECKING, List, Literal, Optional, TypedDict
+from typing import TYPE_CHECKING, Literal, Optional, Type, TypedDict, TypeVar, overload
 
 if TYPE_CHECKING:
     from autogpt.llm.providers.openai import OpenAIFunctionCall
 
-MessageRole = Literal["system", "user", "assistant"]
+MessageRole = Literal["system", "user", "assistant", "function"]
 MessageType = Literal["ai_response", "action_result"]
 
 TText = list[int]
@@ -19,6 +20,17 @@ class MessageDict(TypedDict):
     content: str
 
 
+class ResponseMessageDict(TypedDict):
+    role: Literal["assistant"]
+    content: Optional[str]
+    function_call: Optional[FunctionCallDict]
+
+
+class FunctionCallDict(TypedDict):
+    name: str
+    arguments: str
+
+
 @dataclass
 class Message:
     """OpenAI Message object containing a role and the message content"""
@@ -68,15 +80,31 @@ class EmbeddingModelInfo(ModelInfo):
     embedding_dimensions: int
 
 
+# Can be replaced by Self in Python 3.11
+TChatSequence = TypeVar("TChatSequence", bound="ChatSequence")
+
+
 @dataclass
 class ChatSequence:
     """Utility container for a chat sequence"""
 
     model: ChatModelInfo
-    messages: list[Message] = field(default_factory=list)
+    messages: list[Message] = field(default_factory=list[Message])
 
-    def __getitem__(self, i: int):
-        return self.messages[i]
+    @overload
+    def __getitem__(self, key: int) -> Message:
+        ...
+
+    @overload
+    def __getitem__(self: TChatSequence, key: slice) -> TChatSequence:
+        ...
+
+    def __getitem__(self: TChatSequence, key: int | slice) -> Message | TChatSequence:
+        if isinstance(key, slice):
+            copy = deepcopy(self)
+            copy.messages = self.messages[key]
+            return copy
+        return self.messages[key]
 
     def __iter__(self):
         return iter(self.messages)
@@ -84,6 +112,14 @@ class ChatSequence:
     def __len__(self):
         return len(self.messages)
 
+    def add(
+        self,
+        message_role: MessageRole,
+        content: str,
+        type: MessageType | None = None,
+    ) -> None:
+        self.append(Message(message_role, content, type))
+
     def append(self, message: Message):
         return self.messages.append(message)
 
@@ -95,21 +131,23 @@ class ChatSequence:
             self.messages.insert(index, message)
 
     @classmethod
-    def for_model(cls, model_name: str, messages: list[Message] | ChatSequence = []):
+    def for_model(
+        cls: Type[TChatSequence],
+        model_name: str,
+        messages: list[Message] | ChatSequence = [],
+        **kwargs,
+    ) -> TChatSequence:
         from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 
         if not model_name in OPEN_AI_CHAT_MODELS:
             raise ValueError(f"Unknown chat model '{model_name}'")
 
-        return ChatSequence(
-            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages)
+        return cls(
+            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages), **kwargs
         )
 
-    def add(self, message_role: MessageRole, content: str):
-        self.messages.append(Message(message_role, content))
-
     @property
-    def token_length(self):
+    def token_length(self) -> int:
         from autogpt.llm.utils import count_message_tokens
 
         return count_message_tokens(self.messages, self.model.name)
@@ -128,7 +166,7 @@ class ChatSequence:
             [f"{separator(m.role)}\n{m.content}" for m in self.messages]
         )
         return f"""
-============== ChatSequence ==============
+============== {__class__.__name__} ==============
 Length: {self.token_length} tokens; {len(self.messages)} messages
 {formatted_messages}
 ==========================================
@@ -140,24 +178,18 @@ class LLMResponse:
     """Standard response struct for a response from an LLM model."""
 
     model_info: ModelInfo
-    prompt_tokens_used: int = 0
-    completion_tokens_used: int = 0
 
 
 @dataclass
 class EmbeddingModelResponse(LLMResponse):
     """Standard response struct for a response from an embedding model."""
 
-    embedding: List[float] = field(default_factory=list)
-
-    def __post_init__(self):
-        if self.completion_tokens_used:
-            raise ValueError("Embeddings should not have completion tokens used.")
+    embedding: list[float] = field(default_factory=list)
 
 
 @dataclass
 class ChatModelResponse(LLMResponse):
-    """Standard response struct for a response from an LLM model."""
+    """Standard response struct for a response from a chat LLM."""
 
-    content: Optional[str] = None
-    function_call: Optional[OpenAIFunctionCall] = None
+    content: Optional[str]
+    function_call: Optional[OpenAIFunctionCall]
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index e97b7936..f08fdab4 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -3,17 +3,18 @@ from __future__ import annotations
 import time
 from typing import TYPE_CHECKING
 
-from autogpt.llm.providers.openai import get_openai_command_specs
-
 if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
+    from autogpt.agents.agent import Agent
 
 from autogpt.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.providers.openai import (
+    count_openai_functions_tokens,
+    get_openai_command_specs,
+)
 from autogpt.llm.utils import count_message_tokens, create_chat_completion
-from autogpt.log_cycle.log_cycle import CURRENT_CONTEXT_FILE_NAME
-from autogpt.logs import logger
+from autogpt.logs import CURRENT_CONTEXT_FILE_NAME, logger
 
 
 # TODO: Change debug from hardcode to argument
@@ -73,33 +74,28 @@ def chat_with_ai(
         ],
     )
 
-    # Add messages from the full message history until we reach the token limit
-    next_message_to_add_index = len(agent.history) - 1
-    insertion_index = len(message_sequence)
     # Count the currently used tokens
     current_tokens_used = message_sequence.token_length
+    insertion_index = len(message_sequence)
 
-    # while current_tokens_used > 2500:
-    #     # remove memories until we are under 2500 tokens
-    #     relevant_memory = relevant_memory[:-1]
-    #     (
-    #         next_message_to_add_index,
-    #         current_tokens_used,
-    #         insertion_index,
-    #         current_context,
-    #     ) = generate_context(
-    #         prompt, relevant_memory, agent.history, model
-    #     )
+    # Account for tokens used by OpenAI functions
+    openai_functions = None
+    if agent.config.openai_functions:
+        openai_functions = get_openai_command_specs(agent.command_registry)
+        functions_tlength = count_openai_functions_tokens(openai_functions, model)
+        current_tokens_used += functions_tlength
+        logger.debug(f"OpenAI Functions take up {functions_tlength} tokens in API call")
 
     # Account for user input (appended later)
     user_input_msg = Message("user", triggering_prompt)
-    current_tokens_used += count_message_tokens([user_input_msg], model)
+    current_tokens_used += count_message_tokens(user_input_msg, model)
 
-    current_tokens_used += 500  # Reserve space for new_summary_message
+    current_tokens_used += agent.history.max_summary_tlength  # Reserve space
     current_tokens_used += 500  # Reserve space for the openai functions TODO improve
 
-    # Add Messages until the token limit is reached or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle(agent.config))):
+    # Add historical Messages until the token limit is reached
+    #  or there are no more messages to add.
+    for cycle in reversed(list(agent.history.per_cycle())):
         messages_to_add = [msg for msg in cycle if msg is not None]
         tokens_to_add = count_message_tokens(messages_to_add, model)
         if current_tokens_used + tokens_to_add > send_token_limit:
@@ -115,9 +111,9 @@ def chat_with_ai(
         new_summary_message, trimmed_messages = agent.history.trim_messages(
             current_message_chain=list(message_sequence), config=agent.config
         )
-        tokens_to_add = count_message_tokens([new_summary_message], model)
+        tokens_to_add = count_message_tokens(new_summary_message, model)
         message_sequence.insert(insertion_index, new_summary_message)
-        current_tokens_used += tokens_to_add - 500
+        current_tokens_used += tokens_to_add - agent.history.max_summary_tlength
 
         # FIXME: uncomment when memory is back in use
         # memory_store = get_memory(config)
@@ -143,7 +139,7 @@ def chat_with_ai(
         )
         logger.debug(budget_message)
         message_sequence.add("system", budget_message)
-        current_tokens_used += count_message_tokens([message_sequence[-1]], model)
+        current_tokens_used += count_message_tokens(message_sequence[-1], model)
 
     # Append user input, the length of this is accounted for above
     message_sequence.append(user_input_msg)
@@ -157,14 +153,14 @@ def chat_with_ai(
         )
         if not plugin_response or plugin_response == "":
             continue
-        tokens_to_add = count_message_tokens(
-            [Message("system", plugin_response)], model
-        )
+        tokens_to_add = count_message_tokens(Message("system", plugin_response), model)
         if current_tokens_used + tokens_to_add > send_token_limit:
             logger.debug(f"Plugin response too long, skipping: {plugin_response}")
             logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
             break
         message_sequence.add("system", plugin_response)
+        current_tokens_used += tokens_to_add
+
     # Calculate remaining tokens
     tokens_remaining = token_limit - current_tokens_used
     # assert tokens_remaining >= 0, "Tokens remaining is negative.
@@ -196,7 +192,7 @@ def chat_with_ai(
     assistant_reply = create_chat_completion(
         prompt=message_sequence,
         config=agent.config,
-        functions=get_openai_command_specs(agent),
+        functions=openai_functions,
         max_tokens=tokens_remaining,
     )
 
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index baf7ab87..f00a1f28 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import functools
 import time
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List, Optional
+from typing import Callable, List, Optional
 from unittest.mock import patch
 
 import openai
@@ -12,9 +12,6 @@ from colorama import Fore, Style
 from openai.error import APIError, RateLimitError, ServiceUnavailableError, Timeout
 from openai.openai_object import OpenAIObject
 
-if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
-
 from autogpt.llm.base import (
     ChatModelInfo,
     EmbeddingModelInfo,
@@ -23,6 +20,7 @@ from autogpt.llm.base import (
     TText,
 )
 from autogpt.logs import logger
+from autogpt.models.command_registry import CommandRegistry
 
 OPEN_AI_CHAT_MODELS = {
     info.name: info
@@ -114,7 +112,7 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
 }
 
 
-def meter_api(func):
+def meter_api(func: Callable):
     """Adds ApiManager metering to functions which make OpenAI API calls"""
     from autogpt.llm.api_manager import ApiManager
 
@@ -152,7 +150,7 @@ def meter_api(func):
 
 
 def retry_api(
-    num_retries: int = 10,
+    max_retries: int = 10,
     backoff_base: float = 2.0,
     warn_user: bool = True,
 ):
@@ -164,43 +162,49 @@ def retry_api(
         warn_user bool: Whether to warn the user. Defaults to True.
     """
     error_messages = {
-        ServiceUnavailableError: f"{Fore.RED}Error: The OpenAI API engine is currently overloaded, passing...{Fore.RESET}",
-        RateLimitError: f"{Fore.RED}Error: Reached rate limit, passing...{Fore.RESET}",
+        ServiceUnavailableError: f"{Fore.RED}Error: The OpenAI API engine is currently overloaded{Fore.RESET}",
+        RateLimitError: f"{Fore.RED}Error: Reached rate limit{Fore.RESET}",
     }
     api_key_error_msg = (
         f"Please double check that you have setup a "
         f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
         f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
     )
-    backoff_msg = (
-        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
-    )
+    backoff_msg = f"{Fore.RED}Waiting {{backoff}} seconds...{Fore.RESET}"
 
-    def _wrapper(func):
+    def _wrapper(func: Callable):
         @functools.wraps(func)
         def _wrapped(*args, **kwargs):
             user_warned = not warn_user
-            num_attempts = num_retries + 1  # +1 for the first attempt
-            for attempt in range(1, num_attempts + 1):
+            max_attempts = max_retries + 1  # +1 for the first attempt
+            for attempt in range(1, max_attempts + 1):
                 try:
                     return func(*args, **kwargs)
 
                 except (RateLimitError, ServiceUnavailableError) as e:
-                    if attempt == num_attempts:
+                    if attempt >= max_attempts or (
+                        # User's API quota exceeded
+                        isinstance(e, RateLimitError)
+                        and (err := getattr(e, "error", {}))
+                        and err.get("code") == "insufficient_quota"
+                    ):
                         raise
 
                     error_msg = error_messages[type(e)]
-                    logger.debug(error_msg)
+                    logger.warn(error_msg)
                     if not user_warned:
                         logger.double_check(api_key_error_msg)
+                        logger.debug(f"Status: {e.http_status}")
+                        logger.debug(f"Response body: {e.json_body}")
+                        logger.debug(f"Response headers: {e.headers}")
                         user_warned = True
 
                 except (APIError, Timeout) as e:
-                    if (e.http_status not in [429, 502]) or (attempt == num_attempts):
+                    if (e.http_status not in [429, 502]) or (attempt == max_attempts):
                         raise
 
                 backoff = backoff_base ** (attempt + 2)
-                logger.debug(backoff_msg.format(backoff=backoff))
+                logger.warn(backoff_msg.format(backoff=backoff))
                 time.sleep(backoff)
 
         return _wrapped
@@ -301,13 +305,13 @@ class OpenAIFunctionSpec:
     @dataclass
     class ParameterSpec:
         name: str
-        type: str
+        type: str  # TODO: add enum support
         description: Optional[str]
         required: bool = False
 
     @property
-    def __dict__(self):
-        """Output an OpenAI-consumable function specification"""
+    def schema(self) -> dict[str, str | dict | list]:
+        """Returns an OpenAI-consumable function specification"""
         return {
             "name": self.name,
             "description": self.description,
@@ -326,14 +330,44 @@ class OpenAIFunctionSpec:
             },
         }
 
+    @property
+    def prompt_format(self) -> str:
+        """Returns the function formatted similarly to the way OpenAI does it internally:
+        https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
 
-def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
+        Example:
+        ```ts
+        // Get the current weather in a given location
+        type get_current_weather = (_: {
+        // The city and state, e.g. San Francisco, CA
+        location: string,
+        unit?: "celsius" | "fahrenheit",
+        }) => any;
+        ```
+        """
+
+        def param_signature(p_spec: OpenAIFunctionSpec.ParameterSpec) -> str:
+            # TODO: enum type support
+            return (
+                f"// {p_spec.description}\n" if p_spec.description else ""
+            ) + f"{p_spec.name}{'' if p_spec.required else '?'}: {p_spec.type},"
+
+        return "\n".join(
+            [
+                f"// {self.description}",
+                f"type {self.name} = (_ :{{",
+                *[param_signature(p) for p in self.parameters.values()],
+                "}) => any;",
+            ]
+        )
+
+
+def get_openai_command_specs(
+    command_registry: CommandRegistry,
+) -> list[OpenAIFunctionSpec]:
     """Get OpenAI-consumable function specs for the agent's available commands.
     see https://platform.openai.com/docs/guides/gpt/function-calling
     """
-    if not agent.config.openai_functions:
-        return []
-
     return [
         OpenAIFunctionSpec(
             name=command.name,
@@ -348,5 +382,48 @@ def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
                 for param in command.parameters
             },
         )
-        for command in agent.command_registry.commands.values()
+        for command in command_registry.commands.values()
     ]
+
+
+def count_openai_functions_tokens(
+    functions: list[OpenAIFunctionSpec], for_model: str
+) -> int:
+    """Returns the number of tokens taken up by a set of function definitions
+
+    Reference: https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
+    """
+    from autogpt.llm.utils import count_string_tokens
+
+    return count_string_tokens(
+        f"# Tools\n\n## functions\n\n{format_function_specs_as_typescript_ns(functions)}",
+        for_model,
+    )
+
+
+def format_function_specs_as_typescript_ns(functions: list[OpenAIFunctionSpec]) -> str:
+    """Returns a function signature block in the format used by OpenAI internally:
+    https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
+
+    For use with `count_string_tokens` to determine token usage of provided functions.
+
+    Example:
+    ```ts
+    namespace functions {
+
+    // Get the current weather in a given location
+    type get_current_weather = (_: {
+    // The city and state, e.g. San Francisco, CA
+    location: string,
+    unit?: "celsius" | "fahrenheit",
+    }) => any;
+
+    } // namespace functions
+    ```
+    """
+
+    return (
+        "namespace functions {\n\n"
+        + "\n\n".join(f.prompt_format for f in functions)
+        + "\n\n} // namespace functions"
+    )
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index e0ff1473..ff485260 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -7,12 +7,19 @@ from colorama import Fore
 from autogpt.config import Config
 
 from ..api_manager import ApiManager
-from ..base import ChatModelResponse, ChatSequence, Message
+from ..base import (
+    ChatModelResponse,
+    ChatSequence,
+    FunctionCallDict,
+    Message,
+    ResponseMessageDict,
+)
 from ..providers import openai as iopenai
 from ..providers.openai import (
     OPEN_AI_CHAT_MODELS,
     OpenAIFunctionCall,
     OpenAIFunctionSpec,
+    count_openai_functions_tokens,
 )
 from .token_counter import *
 
@@ -111,7 +118,13 @@ def create_chat_completion(
     if temperature is None:
         temperature = config.temperature
     if max_tokens is None:
-        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt.token_length
+        prompt_tlength = prompt.token_length
+        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt_tlength
+        logger.debug(f"Prompt length: {prompt_tlength} tokens")
+        if functions:
+            functions_tlength = count_openai_functions_tokens(functions, model)
+            max_tokens -= functions_tlength
+            logger.debug(f"Functions take up {functions_tlength} tokens in API call")
 
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
@@ -138,9 +151,8 @@ def create_chat_completion(
 
     if functions:
         chat_completion_kwargs["functions"] = [
-            function.__dict__ for function in functions
+            function.schema for function in functions
         ]
-        logger.debug(f"Function dicts: {chat_completion_kwargs['functions']}")
 
     response = iopenai.create_chat_completion(
         messages=prompt.raw(),
@@ -152,19 +164,24 @@ def create_chat_completion(
         logger.error(response.error)
         raise RuntimeError(response.error)
 
-    first_message = response.choices[0].message
+    first_message: ResponseMessageDict = response.choices[0].message
     content: str | None = first_message.get("content")
-    function_call: OpenAIFunctionCall | None = first_message.get("function_call")
+    function_call: FunctionCallDict | None = first_message.get("function_call")
 
     for plugin in config.plugins:
         if not plugin.can_handle_on_response():
             continue
+        # TODO: function call support in plugin.on_response()
         content = plugin.on_response(content)
 
     return ChatModelResponse(
         model_info=OPEN_AI_CHAT_MODELS[model],
         content=content,
-        function_call=function_call,
+        function_call=OpenAIFunctionCall(
+            name=function_call["name"], arguments=function_call["arguments"]
+        )
+        if function_call
+        else None,
     )
 
 
diff --git a/autogpt/llm/utils/token_counter.py b/autogpt/llm/utils/token_counter.py
index e34dbd1c..b0791e65 100644
--- a/autogpt/llm/utils/token_counter.py
+++ b/autogpt/llm/utils/token_counter.py
@@ -1,7 +1,7 @@
 """Functions for counting the number of tokens in a message or string."""
 from __future__ import annotations
 
-from typing import List
+from typing import List, overload
 
 import tiktoken
 
@@ -9,8 +9,18 @@ from autogpt.llm.base import Message
 from autogpt.logs import logger
 
 
+@overload
+def count_message_tokens(messages: Message, model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
+@overload
+def count_message_tokens(messages: List[Message], model: str = "gpt-3.5-turbo") -> int:
+    ...
+
+
 def count_message_tokens(
-    messages: List[Message], model: str = "gpt-3.5-turbo-0301"
+    messages: Message | List[Message], model: str = "gpt-3.5-turbo"
 ) -> int:
     """
     Returns the number of tokens used by a list of messages.
@@ -24,6 +34,9 @@ def count_message_tokens(
     Returns:
         int: The number of tokens used by the list of messages.
     """
+    if isinstance(messages, Message):
+        messages = [messages]
+
     if model.startswith("gpt-3.5-turbo"):
         tokens_per_message = (
             4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
diff --git a/autogpt/log_cycle/__init__.py b/autogpt/log_cycle/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/autogpt/log_cycle/json_handler.py b/autogpt/log_cycle/json_handler.py
deleted file mode 100644
index 51ae9ae0..00000000
--- a/autogpt/log_cycle/json_handler.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import json
-import logging
-
-
-class JsonFileHandler(logging.FileHandler):
-    def __init__(self, filename, mode="a", encoding=None, delay=False):
-        super().__init__(filename, mode, encoding, delay)
-
-    def emit(self, record):
-        json_data = json.loads(self.format(record))
-        with open(self.baseFilename, "w", encoding="utf-8") as f:
-            json.dump(json_data, f, ensure_ascii=False, indent=4)
-
-
-import logging
-
-
-class JsonFormatter(logging.Formatter):
-    def format(self, record):
-        return record.msg
diff --git a/autogpt/logs/__init__.py b/autogpt/logs/__init__.py
new file mode 100644
index 00000000..40df21cb
--- /dev/null
+++ b/autogpt/logs/__init__.py
@@ -0,0 +1,15 @@
+from .formatters import AutoGptFormatter, JsonFormatter, remove_color_codes
+from .handlers import ConsoleHandler, JsonFileHandler, TypingConsoleHandler
+from .log_cycle import (
+    CURRENT_CONTEXT_FILE_NAME,
+    FULL_MESSAGE_HISTORY_FILE_NAME,
+    NEXT_ACTION_FILE_NAME,
+    PROMPT_SUMMARY_FILE_NAME,
+    PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
+    SUMMARY_FILE_NAME,
+    SUPERVISOR_FEEDBACK_FILE_NAME,
+    USER_INPUT_FILE_NAME,
+    LogCycleHandler,
+)
+from .logger import Logger, logger
+from .utils import print_assistant_thoughts, remove_ansi_escape
diff --git a/autogpt/logs/formatters.py b/autogpt/logs/formatters.py
new file mode 100644
index 00000000..50e7c333
--- /dev/null
+++ b/autogpt/logs/formatters.py
@@ -0,0 +1,41 @@
+import logging
+import re
+
+from colorama import Style
+
+
+class AutoGptFormatter(logging.Formatter):
+    """
+    Allows to handle custom placeholders 'title_color' and 'message_no_color'.
+    To use this formatter, make sure to pass 'color', 'title' as log extras.
+    """
+
+    def format(self, record: logging.LogRecord) -> str:
+        if hasattr(record, "color"):
+            record.title_color = (
+                getattr(record, "color")
+                + getattr(record, "title", "")
+                + " "
+                + Style.RESET_ALL
+            )
+        else:
+            record.title_color = getattr(record, "title", "")
+
+        # Add this line to set 'title' to an empty string if it doesn't exist
+        record.title = getattr(record, "title", "")
+
+        if hasattr(record, "msg"):
+            record.message_no_color = remove_color_codes(getattr(record, "msg"))
+        else:
+            record.message_no_color = ""
+        return super().format(record)
+
+
+def remove_color_codes(s: str) -> str:
+    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
+    return ansi_escape.sub("", s)
+
+
+class JsonFormatter(logging.Formatter):
+    def format(self, record: logging.LogRecord):
+        return record.msg
diff --git a/autogpt/logs/handlers.py b/autogpt/logs/handlers.py
new file mode 100644
index 00000000..c60b0575
--- /dev/null
+++ b/autogpt/logs/handlers.py
@@ -0,0 +1,47 @@
+import json
+import logging
+import random
+import time
+
+
+class ConsoleHandler(logging.StreamHandler):
+    def emit(self, record: logging.LogRecord) -> None:
+        msg = self.format(record)
+        try:
+            print(msg)
+        except Exception:
+            self.handleError(record)
+
+
+class TypingConsoleHandler(logging.StreamHandler):
+    """Output stream to console using simulated typing"""
+
+    def emit(self, record: logging.LogRecord):
+        min_typing_speed = 0.05
+        max_typing_speed = 0.01
+
+        msg = self.format(record)
+        try:
+            words = msg.split()
+            for i, word in enumerate(words):
+                print(word, end="", flush=True)
+                if i < len(words) - 1:
+                    print(" ", end="", flush=True)
+                typing_speed = random.uniform(min_typing_speed, max_typing_speed)
+                time.sleep(typing_speed)
+                # type faster after each word
+                min_typing_speed = min_typing_speed * 0.95
+                max_typing_speed = max_typing_speed * 0.95
+            print()
+        except Exception:
+            self.handleError(record)
+
+
+class JsonFileHandler(logging.FileHandler):
+    def __init__(self, filename: str, mode="a", encoding=None, delay=False):
+        super().__init__(filename, mode, encoding, delay)
+
+    def emit(self, record: logging.LogRecord):
+        json_data = json.loads(self.format(record))
+        with open(self.baseFilename, "w", encoding="utf-8") as f:
+            json.dump(json_data, f, ensure_ascii=False, indent=4)
diff --git a/autogpt/log_cycle/log_cycle.py b/autogpt/logs/log_cycle.py
similarity index 97%
rename from autogpt/log_cycle/log_cycle.py
rename to autogpt/logs/log_cycle.py
index ebceb57e..f3cbf166 100644
--- a/autogpt/log_cycle/log_cycle.py
+++ b/autogpt/logs/log_cycle.py
@@ -2,7 +2,7 @@ import json
 import os
 from typing import Any, Dict, Union
 
-from autogpt.logs import logger
+from .logger import logger
 
 DEFAULT_PREFIX = "agent"
 FULL_MESSAGE_HISTORY_FILE_NAME = "full_message_history.json"
@@ -42,7 +42,7 @@ class LogCycleHandler:
 
         return outer_folder_path
 
-    def get_agent_short_name(self, ai_name):
+    def get_agent_short_name(self, ai_name: str) -> str:
         return ai_name[:15].rstrip() if ai_name else DEFAULT_PREFIX
 
     def create_inner_directory(self, outer_folder_path: str, cycle_count: int) -> str:
diff --git a/autogpt/logs.py b/autogpt/logs/logger.py
similarity index 53%
rename from autogpt/logs.py
rename to autogpt/logs/logger.py
index 7ff80542..e4cedc36 100644
--- a/autogpt/logs.py
+++ b/autogpt/logs/logger.py
@@ -3,20 +3,18 @@ from __future__ import annotations
 
 import logging
 import os
-import random
-import re
-import time
-from logging import LogRecord
 from typing import TYPE_CHECKING, Any, Optional
 
-from colorama import Fore, Style
+from colorama import Fore
 
 if TYPE_CHECKING:
     from autogpt.config import Config
 
-from autogpt.log_cycle.json_handler import JsonFileHandler, JsonFormatter
 from autogpt.singleton import Singleton
 
+from .formatters import AutoGptFormatter, JsonFormatter
+from .handlers import ConsoleHandler, JsonFileHandler, TypingConsoleHandler
+
 
 class Logger(metaclass=Singleton):
     """
@@ -100,8 +98,13 @@ class Logger(metaclass=Singleton):
             self.typing_logger.addHandler(self.console_handler)
 
     def typewriter_log(
-        self, title="", title_color="", content="", speak_text=False, level=logging.INFO
-    ):
+        self,
+        title: str = "",
+        title_color: str = "",
+        content: str = "",
+        speak_text: bool = False,
+        level: int = logging.INFO,
+    ) -> None:
         from autogpt.speech import say_text
 
         if speak_text and self.config and self.config.speak_mode:
@@ -122,29 +125,29 @@ class Logger(metaclass=Singleton):
 
     def debug(
         self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
         self._log(title, title_color, message, logging.DEBUG)
 
     def info(
         self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
         self._log(title, title_color, message, logging.INFO)
 
     def warn(
         self,
-        message,
-        title="",
-        title_color="",
-    ):
+        message: str,
+        title: str = "",
+        title_color: str = "",
+    ) -> None:
         self._log(title, title_color, message, logging.WARN)
 
-    def error(self, title, message=""):
+    def error(self, title: str, message: str = "") -> None:
         self._log(title, Fore.RED, message, logging.ERROR)
 
     def _log(
@@ -152,8 +155,8 @@ class Logger(metaclass=Singleton):
         title: str = "",
         title_color: str = "",
         message: str = "",
-        level=logging.INFO,
-    ):
+        level: int = logging.INFO,
+    ) -> None:
         if message:
             if isinstance(message, list):
                 message = " ".join(message)
@@ -161,11 +164,11 @@ class Logger(metaclass=Singleton):
             level, message, extra={"title": str(title), "color": str(title_color)}
         )
 
-    def set_level(self, level):
+    def set_level(self, level: logging._Level) -> None:
         self.logger.setLevel(level)
         self.typing_logger.setLevel(level)
 
-    def double_check(self, additionalText=None):
+    def double_check(self, additionalText: Optional[str] = None) -> None:
         if not additionalText:
             additionalText = (
                 "Please ensure you've setup and configured everything"
@@ -191,131 +194,10 @@ class Logger(metaclass=Singleton):
         self.json_logger.debug(data)
         self.json_logger.removeHandler(json_data_handler)
 
-    def get_log_directory(self):
+    def get_log_directory(self) -> str:
         this_files_dir_path = os.path.dirname(__file__)
-        log_dir = os.path.join(this_files_dir_path, "../logs")
+        log_dir = os.path.join(this_files_dir_path, "../../logs")
         return os.path.abspath(log_dir)
 
 
-"""
-Output stream to console using simulated typing
-"""
-
-
-class TypingConsoleHandler(logging.StreamHandler):
-    def emit(self, record):
-        min_typing_speed = 0.05
-        max_typing_speed = 0.01
-
-        msg = self.format(record)
-        try:
-            words = msg.split()
-            for i, word in enumerate(words):
-                print(word, end="", flush=True)
-                if i < len(words) - 1:
-                    print(" ", end="", flush=True)
-                typing_speed = random.uniform(min_typing_speed, max_typing_speed)
-                time.sleep(typing_speed)
-                # type faster after each word
-                min_typing_speed = min_typing_speed * 0.95
-                max_typing_speed = max_typing_speed * 0.95
-            print()
-        except Exception:
-            self.handleError(record)
-
-
-class ConsoleHandler(logging.StreamHandler):
-    def emit(self, record) -> None:
-        msg = self.format(record)
-        try:
-            print(msg)
-        except Exception:
-            self.handleError(record)
-
-
-class AutoGptFormatter(logging.Formatter):
-    """
-    Allows to handle custom placeholders 'title_color' and 'message_no_color'.
-    To use this formatter, make sure to pass 'color', 'title' as log extras.
-    """
-
-    def format(self, record: LogRecord) -> str:
-        if hasattr(record, "color"):
-            record.title_color = (
-                getattr(record, "color")
-                + getattr(record, "title", "")
-                + " "
-                + Style.RESET_ALL
-            )
-        else:
-            record.title_color = getattr(record, "title", "")
-
-        # Add this line to set 'title' to an empty string if it doesn't exist
-        record.title = getattr(record, "title", "")
-
-        if hasattr(record, "msg"):
-            record.message_no_color = remove_color_codes(getattr(record, "msg"))
-        else:
-            record.message_no_color = ""
-        return super().format(record)
-
-
-def remove_color_codes(s: str) -> str:
-    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
-    return ansi_escape.sub("", s)
-
-
-def remove_ansi_escape(s: str) -> str:
-    return s.replace("\x1B", "")
-
-
 logger = Logger()
-
-
-def print_assistant_thoughts(
-    ai_name: object,
-    assistant_reply_json_valid: object,
-    config: Config,
-) -> None:
-    from autogpt.speech import say_text
-
-    assistant_thoughts_reasoning = None
-    assistant_thoughts_plan = None
-    assistant_thoughts_speak = None
-    assistant_thoughts_criticism = None
-
-    assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
-    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
-    if assistant_thoughts:
-        assistant_thoughts_reasoning = remove_ansi_escape(
-            assistant_thoughts.get("reasoning")
-        )
-        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan"))
-        assistant_thoughts_criticism = remove_ansi_escape(
-            assistant_thoughts.get("criticism")
-        )
-        assistant_thoughts_speak = remove_ansi_escape(assistant_thoughts.get("speak"))
-    logger.typewriter_log(
-        f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, f"{assistant_thoughts_text}"
-    )
-    logger.typewriter_log("REASONING:", Fore.YELLOW, f"{assistant_thoughts_reasoning}")
-    if assistant_thoughts_plan:
-        logger.typewriter_log("PLAN:", Fore.YELLOW, "")
-        # If it's a list, join it into a string
-        if isinstance(assistant_thoughts_plan, list):
-            assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
-        elif isinstance(assistant_thoughts_plan, dict):
-            assistant_thoughts_plan = str(assistant_thoughts_plan)
-
-        # Split the input_string using the newline character and dashes
-        lines = assistant_thoughts_plan.split("\n")
-        for line in lines:
-            line = line.lstrip("- ")
-            logger.typewriter_log("- ", Fore.GREEN, line.strip())
-    logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
-    # Speak the assistant's thoughts
-    if assistant_thoughts_speak:
-        if config.speak_mode:
-            say_text(assistant_thoughts_speak, config)
-        else:
-            logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
diff --git a/autogpt/logs/utils.py b/autogpt/logs/utils.py
new file mode 100644
index 00000000..637c917f
--- /dev/null
+++ b/autogpt/logs/utils.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from colorama import Fore
+
+if TYPE_CHECKING:
+    from autogpt.config import Config
+
+from .logger import logger
+
+
+def print_assistant_thoughts(
+    ai_name: str,
+    assistant_reply_json_valid: dict,
+    config: Config,
+) -> None:
+    from autogpt.speech import say_text
+
+    assistant_thoughts_reasoning = None
+    assistant_thoughts_plan = None
+    assistant_thoughts_speak = None
+    assistant_thoughts_criticism = None
+
+    assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
+    assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
+    if assistant_thoughts:
+        assistant_thoughts_reasoning = remove_ansi_escape(
+            assistant_thoughts.get("reasoning", "")
+        )
+        assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan", ""))
+        assistant_thoughts_criticism = remove_ansi_escape(
+            assistant_thoughts.get("criticism", "")
+        )
+        assistant_thoughts_speak = remove_ansi_escape(
+            assistant_thoughts.get("speak", "")
+        )
+    logger.typewriter_log(
+        f"{ai_name.upper()} THOUGHTS:", Fore.YELLOW, assistant_thoughts_text
+    )
+    logger.typewriter_log("REASONING:", Fore.YELLOW, str(assistant_thoughts_reasoning))
+    if assistant_thoughts_plan:
+        logger.typewriter_log("PLAN:", Fore.YELLOW, "")
+        # If it's a list, join it into a string
+        if isinstance(assistant_thoughts_plan, list):
+            assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
+        elif isinstance(assistant_thoughts_plan, dict):
+            assistant_thoughts_plan = str(assistant_thoughts_plan)
+
+        # Split the input_string using the newline character and dashes
+        lines = assistant_thoughts_plan.split("\n")
+        for line in lines:
+            line = line.lstrip("- ")
+            logger.typewriter_log("- ", Fore.GREEN, line.strip())
+    logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
+    # Speak the assistant's thoughts
+    if assistant_thoughts_speak:
+        if config.speak_mode:
+            say_text(assistant_thoughts_speak, config)
+        else:
+            logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
+
+
+def remove_ansi_escape(s: str) -> str:
+    return s.replace("\x1B", "")
diff --git a/autogpt/main.py b/autogpt/main.py
index 08ac4b40..0da2d193 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -6,7 +6,7 @@ from typing import Optional
 
 from colorama import Fore, Style
 
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config.config import ConfigBuilder, check_openai_api_key
 from autogpt.configurator import create_config
 from autogpt.logs import logger
@@ -28,7 +28,6 @@ COMMAND_CATEGORIES = [
     "autogpt.commands.file_operations",
     "autogpt.commands.web_search",
     "autogpt.commands.web_selenium",
-    "autogpt.app",
     "autogpt.commands.task_statuses",
 ]
 
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index c7e8b73a..c718f2ed 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -2,49 +2,45 @@ from __future__ import annotations
 
 import copy
 import json
-from dataclasses import dataclass, field
-from typing import TYPE_CHECKING
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
-    from autogpt.agent import Agent
+    from autogpt.agents import Agent
 
 from autogpt.config import Config
 from autogpt.json_utils.utilities import extract_json_from_response
-from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
+from autogpt.llm.base import ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-from autogpt.llm.utils import count_string_tokens, create_chat_completion
-from autogpt.log_cycle.log_cycle import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME
-from autogpt.logs import logger
+from autogpt.llm.utils import (
+    count_message_tokens,
+    count_string_tokens,
+    create_chat_completion,
+)
+from autogpt.logs import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME, logger
 
 
 @dataclass
-class MessageHistory:
-    agent: Agent
-
-    messages: list[Message] = field(default_factory=list)
+class MessageHistory(ChatSequence):
+    max_summary_tlength: int = 500
+    agent: Optional[Agent] = None
     summary: str = "I was created"
-
     last_trimmed_index: int = 0
 
-    def __getitem__(self, i: int):
-        return self.messages[i]
+    SUMMARIZATION_PROMPT = '''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
-    def __iter__(self):
-        return iter(self.messages)
+You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
 
-    def __len__(self):
-        return len(self.messages)
+Summary So Far:
+"""
+{summary}
+"""
 
-    def add(
-        self,
-        role: MessageRole,
-        content: str,
-        type: MessageType | None = None,
-    ):
-        return self.append(Message(role, content, type))
-
-    def append(self, message: Message):
-        return self.messages.append(message)
+Latest Development:
+"""
+{new_events}
+"""
+'''
 
     def trim_messages(
         self, current_message_chain: list[Message], config: Config
@@ -84,7 +80,7 @@ class MessageHistory:
 
         return new_summary_message, new_messages_not_in_chain
 
-    def per_cycle(self, config: Config, messages: list[Message] | None = None):
+    def per_cycle(self, messages: list[Message] | None = None):
         """
         Yields:
             Message: a message containing user input
@@ -119,26 +115,33 @@ class MessageHistory:
         )
 
     def update_running_summary(
-        self, new_events: list[Message], config: Config
+        self,
+        new_events: list[Message],
+        config: Config,
+        max_summary_length: Optional[int] = None,
     ) -> Message:
         """
-        This function takes a list of dictionaries representing new events and combines them with the current summary,
-        focusing on key and potentially important information to remember. The updated summary is returned in a message
-        formatted in the 1st person past tense.
+        This function takes a list of Message objects and updates the running summary
+        to include the events they describe. The updated summary is returned
+        in a Message formatted in the 1st person past tense.
 
         Args:
-            new_events (List[Dict]): A list of dictionaries containing the latest events to be added to the summary.
+            new_events: A list of Messages containing the latest events to be added to the summary.
 
         Returns:
-            str: A message containing the updated summary of actions, formatted in the 1st person past tense.
+            Message: a Message containing the updated running summary.
 
         Example:
+            ```py
             new_events = [{"event": "entered the kitchen."}, {"event": "found a scrawled note with the number 7"}]
             update_running_summary(new_events)
             # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
+            ```
         """
         if not new_events:
             return self.summary_message()
+        if not max_summary_length:
+            max_summary_length = self.max_summary_tlength
 
         # Create a copy of the new_events list to prevent modifying the original list
         new_events = copy.deepcopy(new_events)
@@ -166,29 +169,29 @@ class MessageHistory:
             elif event.role == "user":
                 new_events.remove(event)
 
-        # Summarize events and current summary in batch to a new running summary
+        summ_model = OPEN_AI_CHAT_MODELS[config.fast_llm]
 
-        # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
-        # TODO make this default dynamic
-        prompt_template_length = 100
-        max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens
-        summary_tlength = count_string_tokens(str(self.summary), config.fast_llm)
+        # Determine token lengths for use in batching
+        prompt_template_length = len(
+            MessageHistory.SUMMARIZATION_PROMPT.format(summary="", new_events="")
+        )
+        max_input_tokens = summ_model.max_tokens - max_summary_length
+        summary_tlength = count_string_tokens(self.summary, summ_model.name)
         batch = []
         batch_tlength = 0
 
-        # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
+        # TODO: Put a cap on length of total new events and drop some previous events to
+        # save API cost. Need to think thru more how to do it without losing the context.
         for event in new_events:
-            event_tlength = count_string_tokens(str(event), config.fast_llm)
+            event_tlength = count_message_tokens(event, summ_model.name)
 
             if (
                 batch_tlength + event_tlength
-                > max_tokens - prompt_template_length - summary_tlength
+                > max_input_tokens - prompt_template_length - summary_tlength
             ):
                 # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, config)
-                summary_tlength = count_string_tokens(
-                    str(self.summary), config.fast_llm
-                )
+                self.summarize_batch(batch, config, max_summary_length)
+                summary_tlength = count_string_tokens(self.summary, summ_model.name)
                 batch = [event]
                 batch_tlength = event_tlength
             else:
@@ -197,41 +200,36 @@ class MessageHistory:
 
         if batch:
             # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, config)
+            self.summarize_batch(batch, config, max_summary_length)
 
         return self.summary_message()
 
-    def summarize_batch(self, new_events_batch, config):
-        prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
-
-You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
-
-Summary So Far:
-"""
-{self.summary}
-"""
-
-Latest Development:
-"""
-{new_events_batch or "Nothing new happened."}
-"""
-'''
+    def summarize_batch(
+        self, new_events_batch: list[Message], config: Config, max_output_length: int
+    ):
+        prompt = MessageHistory.SUMMARIZATION_PROMPT.format(
+            summary=self.summary, new_events=new_events_batch
+        )
 
         prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            prompt.raw(),
-            PROMPT_SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                prompt.raw(),
+                PROMPT_SUMMARY_FILE_NAME,
+            )
 
-        self.summary = create_chat_completion(prompt, config).content
+        self.summary = create_chat_completion(
+            prompt, config, max_tokens=max_output_length
+        ).content
 
-        self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_name,
-            self.agent.created_at,
-            self.agent.cycle_count,
-            self.summary,
-            SUMMARY_FILE_NAME,
-        )
+        if self.agent:
+            self.agent.log_cycle_handler.log_cycle(
+                self.agent.ai_config.ai_name,
+                self.agent.created_at,
+                self.agent.cycle_count,
+                self.summary,
+                SUMMARY_FILE_NAME,
+            )
diff --git a/autogpt/memory/vector/providers/base.py b/autogpt/memory/vector/providers/base.py
index dc4dbf3c..aa233228 100644
--- a/autogpt/memory/vector/providers/base.py
+++ b/autogpt/memory/vector/providers/base.py
@@ -6,13 +6,12 @@ import numpy as np
 
 from autogpt.config.config import Config
 from autogpt.logs import logger
-from autogpt.singleton import AbstractSingleton
 
 from .. import MemoryItem, MemoryItemRelevance
 from ..utils import Embedding, get_embedding
 
 
-class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
+class VectorMemoryProvider(MutableSet[MemoryItem]):
     @abc.abstractmethod
     def __init__(self, config: Config):
         pass
diff --git a/autogpt/models/command_registry.py b/autogpt/models/command_registry.py
index 59d3ae77..f54f4adb 100644
--- a/autogpt/models/command_registry.py
+++ b/autogpt/models/command_registry.py
@@ -15,8 +15,12 @@ class CommandRegistry:
     directory.
     """
 
-    commands: dict[str, Command] = {}
-    commands_aliases: dict[str, Command] = {}
+    commands: dict[str, Command]
+    commands_aliases: dict[str, Command]
+
+    def __init__(self):
+        self.commands = {}
+        self.commands_aliases = {}
 
     def __contains__(self, command_name: str):
         return command_name in self.commands or command_name in self.commands_aliases
diff --git a/autogpt/workspace/workspace.py b/autogpt/workspace/workspace.py
index 6d90f854..07186e73 100644
--- a/autogpt/workspace/workspace.py
+++ b/autogpt/workspace/workspace.py
@@ -123,7 +123,11 @@ class Workspace:
         logger.debug(f"Resolved root as '{root}'")
 
         # Allow exception for absolute paths if they are contained in your workspace directory.
-        if relative_path.is_absolute() and not relative_path.is_relative_to(root):
+        if (
+            relative_path.is_absolute()
+            and restrict_to_root
+            and not relative_path.is_relative_to(root)
+        ):
             raise ValueError(
                 f"Attempted to access absolute path '{relative_path}' in workspace '{root}'."
             )
diff --git a/benchmarks.py b/benchmarks.py
index cb592be8..2e143f9d 100644
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -1,4 +1,4 @@
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
 from autogpt.main import COMMAND_CATEGORIES
 from autogpt.memory.vector import get_memory
diff --git a/docs/imgs/e2b-dashboard.png b/docs/imgs/e2b-dashboard.png
new file mode 100644
index 00000000..456f1490
Binary files /dev/null and b/docs/imgs/e2b-dashboard.png differ
diff --git a/docs/imgs/e2b-log-url.png b/docs/imgs/e2b-log-url.png
new file mode 100644
index 00000000..3f1c189e
Binary files /dev/null and b/docs/imgs/e2b-log-url.png differ
diff --git a/docs/imgs/e2b-new-tag.png b/docs/imgs/e2b-new-tag.png
new file mode 100644
index 00000000..65a0a767
Binary files /dev/null and b/docs/imgs/e2b-new-tag.png differ
diff --git a/docs/imgs/e2b-tag-button.png b/docs/imgs/e2b-tag-button.png
new file mode 100644
index 00000000..741a6bac
Binary files /dev/null and b/docs/imgs/e2b-tag-button.png differ
diff --git a/docs/share-your-logs.md b/docs/share-your-logs.md
new file mode 100644
index 00000000..f673e375
--- /dev/null
+++ b/docs/share-your-logs.md
@@ -0,0 +1,52 @@
+## Share your logs with us to help improve Auto-GPT
+
+Do you notice weird behavior with your agent? Do you have an interesting use case? Do you have a bug you want to report?
+Follow the steps below to enable your logs and upload them. You can include these logs when making an issue report or discussing an issue with us.
+
+### Enable Debug Logs
+Activity, Error, and Debug logs are located in `./logs`
+
+To print out debug logs:
+
+``` shell
+./run.sh --debug     # on Linux / macOS
+
+.\run.bat --debug    # on Windows
+
+docker-compose run --rm auto-gpt --debug    # in Docker
+```
+
+### Inspect and share logs
+You can inspect and share logs via [e2b](https://e2b.dev).
+![E2b logs dashboard](./imgs/e2b-dashboard.png)
+
+
+
+1. Go to [autogpt.e2b.dev](https://autogpt.e2b.dev) and sign in.
+2. You'll see logs from other members of the AutoGPT team that you can inspect.
+3. Or you upload your own logs. Click on the "Upload log folder" button and select the debug logs dir that you generated. Wait a 1-2 seconds and the page reloads.
+4. You can share logs via sharing the URL in your browser.
+![E2b log URL](./imgs/e2b-log-url.png)
+
+
+### Add tags to logs
+You can add custom tags to logs for other members of your team. This is useful if you want to indicate that the agent is for example having issues with challenges.
+
+E2b offers 3 types of severity:
+
+- Success
+- Warning
+- Error
+
+You can name your tag any way you want.
+
+#### How to add a tag
+1. Click on the "plus" button on the left from the logs folder name.
+
+![E2b tag button](./imgs/e2b-tag-button.png)
+
+2. Type the name of a new tag.
+
+3. Select the severity.
+
+![E2b new tag](./imgs/e2b-new-tag.png)
diff --git a/docs/usage.md b/docs/usage.md
index a9ef2883..cb74ef7f 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -89,12 +89,20 @@ This may give your bot increased intelligence.
 
 ## Logs
 
-Activity and error logs are located in the `./output/logs`
+Activity, Error, and Debug logs are located in `./logs`
+
+!!! tip 
+    Do you notice weird behavior with your agent? Do you have an interesting use case? Do you have a bug you want to report?
+    Follow the step below to enable your logs. You can include these logs when making an issue report or discussing an issue with us.
 
 To print out debug logs:
 
 ``` shell
-./run.sh --debug
+./run.sh --debug     # on Linux / macOS
+
+.\run.bat --debug    # on Windows
+
+docker-compose run --rm auto-gpt --debug    # in Docker
 ```
 
 ## Disabling Command Categories
diff --git a/mkdocs.yml b/mkdocs.yml
index 50e06257..a8500445 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,7 +12,8 @@ nav:
     - Voice: configuration/voice.md
     - Image Generation: configuration/imagegen.md
 
-  - Contributing:
+  - Help us improve Auto-GPT:
+    - Share your debug logs with us: share-your-logs.md
     - Contribution guide: contributing.md
     - Running tests: testing.md
     - Code of Conduct: code-of-conduct.md
diff --git a/netlify.toml b/netlify.toml
index 43e79f0f..de261908 100644
--- a/netlify.toml
+++ b/netlify.toml
@@ -3,4 +3,4 @@
 [build]
   publish = "public/"
   command = "mkdocs build -d public"
-  ignore = "git diff --quiet HEAD^ HEAD docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"
+  ignore = "git diff --quiet $CACHED_COMMIT_REF $COMMIT_REF docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index 244886b5..342d67b9 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -7,7 +7,6 @@ constraints: [
 resources: [
   'Internet access for searches and information gathering.',
   'Long Term memory management.',
-  'GPT-3.5 powered Agents for delegation of simple tasks.',
   'File output.'
 ]
 performance_evaluations: [
diff --git a/pyproject.toml b/pyproject.toml
index 06b2f87f..f16ee501 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agpt"
-version = "0.4.4"
+version = "0.4.5"
 authors = [
   { name="Torantulino", email="support@agpt.co" },
 ]
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 4485d191..d5848722 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 4485d191a4c989053af99d56470d9e805e4d3d47
+Subproject commit d584872257a8a440da594c5fb83cce66095ecf0b
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index 90a7084d..c846f9ce 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -3,7 +3,7 @@ from pathlib import Path
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.commands.execute_code import execute_python_file
 from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
index 130c5bd7..64523b81 100644
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -6,7 +6,7 @@ from typing import Any, Generator
 
 import pytest
 
-from autogpt.log_cycle.log_cycle import LogCycleHandler
+from autogpt.logs import LogCycleHandler
 from autogpt.workspace import Workspace
 from benchmarks import run_task
 from tests.challenges.schema import Task
diff --git a/tests/conftest.py b/tests/conftest.py
index 14f6af78..09d358e6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,10 +6,11 @@ import pytest
 import yaml
 from pytest_mock import MockerFixture
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
 from autogpt.config.ai_config import AIConfig
 from autogpt.llm.api_manager import ApiManager
+from autogpt.logs import logger
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
@@ -52,6 +53,9 @@ def config(
     if not os.environ.get("OPENAI_API_KEY"):
         os.environ["OPENAI_API_KEY"] = "sk-dummy"
 
+    # HACK: this is necessary to ensure PLAIN_OUTPUT takes effect
+    logger.config = config
+
     config.plugins_dir = "tests/unit/data/test_plugins"
     config.plugins_config_file = temp_plugins_config_file
 
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 664c6cbb..d3832c27 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -1,6 +1,6 @@
 import pytest
 
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
diff --git a/tests/integration/memory/test_json_file_memory.py b/tests/integration/memory/test_json_file_memory.py
index e60a0766..38cd26c8 100644
--- a/tests/integration/memory/test_json_file_memory.py
+++ b/tests/integration/memory/test_json_file_memory.py
@@ -8,12 +8,6 @@ from autogpt.memory.vector import JSONFileMemory, MemoryItem
 from autogpt.workspace import Workspace
 
 
-@pytest.fixture(autouse=True)
-def cleanup_sut_singleton():
-    if JSONFileMemory in JSONFileMemory._instances:
-        del JSONFileMemory._instances[JSONFileMemory]
-
-
 def test_json_memory_init_without_backing_file(config: Config, workspace: Workspace):
     index_file = workspace.root / f"{config.memory_index}.json"
 
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index 6ba4a753..80010c6f 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,12 +1,13 @@
 import os
 import random
+import re
 import string
 import tempfile
 
 import pytest
 
 import autogpt.commands.execute_code as sut  # system under testing
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import Config
 
 
@@ -88,13 +89,9 @@ def test_execute_python_file_invalid(agent: Agent):
 
 
 def test_execute_python_file_not_found(agent: Agent):
-    assert all(
-        s in sut.execute_python_file("notexist.py", agent).lower()
-        for s in [
-            "python: can't open file 'notexist.py'",
-            "[errno 2] no such file or directory",
-        ]
-    )
+    result = sut.execute_python_file("notexist.py", agent).lower()
+    assert re.match(r"python: can't open file '([A-Z]:)?[/\\\-\w]*notexist.py'", result)
+    assert "[errno 2] no such file or directory" in result
 
 
 def test_execute_shell(random_string: str, agent: Agent):
diff --git a/tests/integration/test_image_gen.py b/tests/integration/test_image_gen.py
index 8cdcfd98..0a9f6897 100644
--- a/tests/integration/test_image_gen.py
+++ b/tests/integration/test_image_gen.py
@@ -6,7 +6,7 @@ from unittest.mock import patch
 import pytest
 from PIL import Image
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui
 
 
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index f98b2971..43de2860 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,7 +1,7 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.web_selenium import browse_website
 
 
@@ -12,6 +12,6 @@ def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
     question = "How to execute a barrel roll"
 
     response = browse_website(url, question, agent)
-    assert "Error" in response
+    assert "error" in response.lower()
     # Sanity check that the response is not too long
     assert len(response) < 200
diff --git a/tests/unit/test_agent.py b/tests/unit/test_agent.py
index 3fb896ba..7baeeb64 100644
--- a/tests/unit/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -1,46 +1,27 @@
-from unittest.mock import MagicMock
-
-import pytest
-
-from autogpt.agent import Agent
-from autogpt.config import AIConfig
-from autogpt.config.config import Config
-
-
-@pytest.fixture
-def agent(config: Config):
-    ai_name = "Test AI"
-    memory = MagicMock()
-    next_action_count = 0
-    command_registry = MagicMock()
-    ai_config = AIConfig(ai_name=ai_name)
-    system_prompt = "System prompt"
-    triggering_prompt = "Triggering prompt"
-    workspace_directory = "workspace_directory"
-
-    agent = Agent(
-        ai_name=ai_name,
-        memory=memory,
-        next_action_count=next_action_count,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=config,
-        system_prompt=system_prompt,
-        triggering_prompt=triggering_prompt,
-        workspace_directory=workspace_directory,
-    )
-    return agent
+from autogpt.agents.agent import Agent, execute_command
 
 
 def test_agent_initialization(agent: Agent):
-    assert agent.ai_name == "Test AI"
-    assert agent.memory == agent.memory
+    assert agent.ai_name == "Base"
     assert agent.history.messages == []
     assert agent.next_action_count == 0
-    assert agent.command_registry == agent.command_registry
-    assert agent.ai_config == agent.ai_config
-    assert agent.system_prompt == "System prompt"
-    assert agent.triggering_prompt == "Triggering prompt"
+
+
+def test_execute_command_plugin(agent: Agent):
+    """Test that executing a command that came from a plugin works as expected"""
+    command_name = "check_plan"
+    agent.ai_config.prompt_generator.add_command(
+        command_name,
+        "Read the plan.md with the next goals to achieve",
+        {},
+        lambda: "hi",
+    )
+    command_result = execute_command(
+        command_name=command_name,
+        arguments={},
+        agent=agent,
+    )
+    assert command_result == "hi"
 
 
 # More test methods can be added for specific agent interactions
diff --git a/tests/unit/test_agent_manager.py b/tests/unit/test_agent_manager.py
deleted file mode 100644
index 113771f7..00000000
--- a/tests/unit/test_agent_manager.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import pytest
-
-from autogpt.agent.agent_manager import AgentManager
-from autogpt.llm import ChatModelResponse
-from autogpt.llm.chat import create_chat_completion
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-
-
-@pytest.fixture
-def agent_manager(config):
-    # Hack, real gross. Singletons are not good times.
-    yield AgentManager(config)
-    del AgentManager._instances[AgentManager]
-
-
-@pytest.fixture
-def task():
-    return "translate English to French"
-
-
-@pytest.fixture
-def prompt():
-    return "Translate the following English text to French: 'Hello, how are you?'"
-
-
-@pytest.fixture
-def model():
-    return "gpt-3.5-turbo"
-
-
-@pytest.fixture(autouse=True)
-def mock_create_chat_completion(mocker, config):
-    mock_create_chat_completion = mocker.patch(
-        "autogpt.agent.agent_manager.create_chat_completion",
-        wraps=create_chat_completion,
-    )
-    mock_create_chat_completion.return_value = ChatModelResponse(
-        model_info=OPEN_AI_CHAT_MODELS[config.fast_llm],
-        content="irrelevant",
-        function_call={},
-    )
-    return mock_create_chat_completion
-
-
-def test_create_agent(agent_manager: AgentManager, task, prompt, model):
-    key, agent_reply = agent_manager.create_agent(task, prompt, model)
-    assert isinstance(key, int)
-    assert isinstance(agent_reply, str)
-    assert key in agent_manager.agents
-
-
-def test_message_agent(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    user_message = "Please translate 'Good morning' to French."
-    agent_reply = agent_manager.message_agent(key, user_message)
-    assert isinstance(agent_reply, str)
-
-
-def test_list_agents(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    agents_list = agent_manager.list_agents()
-    assert isinstance(agents_list, list)
-    assert (key, task) in agents_list
-
-
-def test_delete_agent(agent_manager: AgentManager, task, prompt, model):
-    key, _ = agent_manager.create_agent(task, prompt, model)
-    success = agent_manager.delete_agent(key)
-    assert success
-    assert key not in agent_manager.agents
diff --git a/tests/unit/test_execute_command.py b/tests/unit/test_execute_command.py
deleted file mode 100644
index 21fb0b66..00000000
--- a/tests/unit/test_execute_command.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from autogpt.agent import Agent
-from autogpt.app import execute_command
-
-
-def check_plan():
-    return "hi"
-
-
-def test_execute_command_plugin(agent: Agent):
-    """Test that executing a command that came from a plugin works as expected"""
-    command_name = "check_plan"
-    agent.ai_config.prompt_generator.add_command(
-        command_name,
-        "Read the plan.md with the next goals to achieve",
-        {},
-        check_plan,
-    )
-    command_result = execute_command(
-        command_name=command_name,
-        arguments={},
-        agent=agent,
-    )
-    assert command_result == "hi"
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index b8283111..d7d870a5 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -12,7 +12,7 @@ import pytest
 from pytest_mock import MockerFixture
 
 import autogpt.commands.file_operations as file_ops
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.config import Config
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
@@ -44,8 +44,13 @@ def mock_MemoryItem_from_text(
 
 
 @pytest.fixture()
-def test_file_path(workspace: Workspace):
-    return workspace.get_path("test_file.txt")
+def test_file_name():
+    return Path("test_file.txt")
+
+
+@pytest.fixture
+def test_file_path(test_file_name: Path, workspace: Workspace):
+    return workspace.get_path(test_file_name)
 
 
 @pytest.fixture()
@@ -130,42 +135,34 @@ def test_is_duplicate_operation(agent: Agent, mocker: MockerFixture):
     # Test cases with write operations
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", agent.config, "checksum1"
+            "write", "path/to/file1.txt", agent, "checksum1"
         )
         is True
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", agent.config, "checksum2"
+            "write", "path/to/file1.txt", agent, "checksum2"
         )
         is False
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file3.txt", agent.config, "checksum3"
+            "write", "path/to/file3.txt", agent, "checksum3"
         )
         is False
     )
     # Test cases with append operations
     assert (
         file_ops.is_duplicate_operation(
-            "append", "path/to/file1.txt", agent.config, "checksum1"
+            "append", "path/to/file1.txt", agent, "checksum1"
         )
         is False
     )
     # Test cases with delete operations
     assert (
-        file_ops.is_duplicate_operation(
-            "delete", "path/to/file1.txt", config=agent.config
-        )
-        is False
-    )
-    assert (
-        file_ops.is_duplicate_operation(
-            "delete", "path/to/file3.txt", config=agent.config
-        )
-        is True
+        file_ops.is_duplicate_operation("delete", "path/to/file1.txt", agent) is False
     )
+    assert file_ops.is_duplicate_operation("delete", "path/to/file3.txt", agent) is True
 
 
 # Test logging a file operation
@@ -206,7 +203,15 @@ def test_read_file_not_found(agent: Agent):
     assert "Error:" in content and filename in content and "no such file" in content
 
 
-def test_write_to_file(test_file_path: Path, agent: Agent):
+def test_write_to_file_relative_path(test_file_name: Path, agent: Agent):
+    new_content = "This is new content.\n"
+    file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
+    with open(agent.workspace.get_path(test_file_name), "r", encoding="utf-8") as f:
+        content = f.read()
+    assert content == new_content
+
+
+def test_write_to_file_absolute_path(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
     file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
     with open(test_file_path, "r", encoding="utf-8") as f:
@@ -214,24 +219,24 @@ def test_write_to_file(test_file_path: Path, agent: Agent):
     assert content == new_content
 
 
-def test_write_file_logs_checksum(test_file_path: Path, agent: Agent):
+def test_write_file_logs_checksum(test_file_name: Path, agent: Agent):
     new_content = "This is new content.\n"
     new_checksum = file_ops.text_checksum(new_content)
-    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
     with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_entry = f.read()
-    assert log_entry == f"write: {test_file_path} #{new_checksum}\n"
+    assert log_entry == f"write: {test_file_name} #{new_checksum}\n"
 
 
-def test_write_file_fails_if_content_exists(test_file_path: Path, agent: Agent):
+def test_write_file_fails_if_content_exists(test_file_name: Path, agent: Agent):
     new_content = "This is new content.\n"
     file_ops.log_operation(
         "write",
-        str(test_file_path),
+        str(test_file_name),
         agent=agent,
         checksum=file_ops.text_checksum(new_content),
     )
-    result = file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    result = file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
     assert result == "Error: File has already been updated."
 
 
@@ -258,11 +263,11 @@ def test_append_to_file(test_nested_file: Path, agent: Agent):
 
 
 def test_append_to_file_uses_checksum_from_appended_file(
-    test_file_path: Path, agent: Agent
+    test_file_name: Path, agent: Agent
 ):
     append_text = "This is appended text.\n"
-    file_ops.append_to_file(test_file_path, append_text, agent=agent)
-    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    file_ops.append_to_file(test_file_name, append_text, agent=agent)
+    file_ops.append_to_file(test_file_name, append_text, agent=agent)
     with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_contents = f.read()
 
@@ -272,8 +277,8 @@ def test_append_to_file_uses_checksum_from_appended_file(
     digest.update(append_text.encode("utf-8"))
     checksum2 = digest.hexdigest()
     assert log_contents == (
-        f"append: {test_file_path} #{checksum1}\n"
-        f"append: {test_file_path} #{checksum2}\n"
+        f"append: {test_file_name} #{checksum1}\n"
+        f"append: {test_file_name} #{checksum2}\n"
     )
 
 
@@ -288,7 +293,7 @@ def test_delete_missing_file(agent: Agent):
     # confuse the log
     file_ops.log_operation("write", filename, agent=agent, checksum="fake")
     try:
-        os.remove(filename)
+        os.remove(agent.workspace.get_path(filename))
     except FileNotFoundError as err:
         assert str(err) in file_ops.delete_file(filename, agent=agent)
         return
diff --git a/tests/unit/test_git_commands.py b/tests/unit/test_git_commands.py
index a6defdfc..9f56a384 100644
--- a/tests/unit/test_git_commands.py
+++ b/tests/unit/test_git_commands.py
@@ -2,7 +2,7 @@ import pytest
 from git.exc import GitCommandError
 from git.repo.base import Repo
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.git_operations import clone_repository
 
 
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
index 8ceee63f..ec01cd55 100644
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -4,7 +4,7 @@ from unittest.mock import MagicMock
 
 import pytest
 
-from autogpt.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
@@ -38,8 +38,8 @@ def agent(config: Config):
     return agent
 
 
-def test_message_history_batch_summary(mocker, agent, config):
-    history = MessageHistory(agent)
+def test_message_history_batch_summary(mocker, agent: Agent, config: Config):
+    history = MessageHistory.for_model(agent.config.smart_llm, agent=agent)
     model = config.fast_llm
     message_tlength = 0
     message_count = 0
@@ -48,7 +48,7 @@ def test_message_history_batch_summary(mocker, agent, config):
     mock_summary_response = ChatModelResponse(
         model_info=OPEN_AI_CHAT_MODELS[model],
         content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
-        function_call={},
+        function_call=None,
     )
     mock_summary = mocker.patch(
         "autogpt.memory.message_history.create_chat_completion",
@@ -105,7 +105,7 @@ def test_message_history_batch_summary(mocker, agent, config):
         result = (
             "Command browse_website returned: Answer gathered from website: The text in job"
             + str(i)
-            + " does not provide information on specific job requirements or a job URL.]",
+            + " does not provide information on specific job requirements or a job URL.]"
         )
         msg = Message("system", result, "action_result")
         history.append(msg)
@@ -117,7 +117,7 @@ def test_message_history_batch_summary(mocker, agent, config):
         history.append(user_input_msg)
 
     # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle(config))):
+    for cycle in reversed(list(history.per_cycle())):
         messages_to_add = [msg for msg in cycle if msg is not None]
         message_sequence.insert(insertion_index, *messages_to_add)
         break
@@ -134,7 +134,7 @@ def test_message_history_batch_summary(mocker, agent, config):
     )
 
     expected_call_count = math.ceil(
-        message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm).max_tokens)
+        message_tlength / (OPEN_AI_CHAT_MODELS[config.fast_llm].max_tokens)
     )
     # Expecting 2 batches because of over max token
     assert mock_summary.call_count == expected_call_count  # 2 at the time of writing
diff --git a/tests/unit/test_retry_provider_openai.py b/tests/unit/test_retry_provider_openai.py
index b2c2d04a..1b23f5d2 100644
--- a/tests/unit/test_retry_provider_openai.py
+++ b/tests/unit/test_retry_provider_openai.py
@@ -20,7 +20,7 @@ def error_factory(error_instance, error_count, retry_count, warn_user=True):
             self.count = 0
 
         @openai.retry_api(
-            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
+            max_retries=retry_count, backoff_base=0.001, warn_user=warn_user
         )
         def __call__(self):
             self.count += 1
@@ -69,16 +69,11 @@ def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure
 
     if error_count and retry_count:
         if type(error) == RateLimitError:
-            assert "Reached rate limit, passing..." in output.out
+            assert "Reached rate limit" in output.out
             assert "Please double check" in output.out
         if type(error) == ServiceUnavailableError:
-            assert (
-                "The OpenAI API engine is currently overloaded, passing..."
-                in output.out
-            )
+            assert "The OpenAI API engine is currently overloaded" in output.out
             assert "Please double check" in output.out
-        if type(error) == APIError:
-            assert "API Bad gateway" in output.out
     else:
         assert output.out == ""
 
@@ -96,7 +91,7 @@ def test_retry_open_api_rate_limit_no_warn(capsys):
 
     output = capsys.readouterr()
 
-    assert "Reached rate limit, passing..." in output.out
+    assert "Reached rate limit" in output.out
     assert "Please double check" not in output.out
 
 
@@ -115,7 +110,7 @@ def test_retry_open_api_service_unavairable_no_warn(capsys):
 
     output = capsys.readouterr()
 
-    assert "The OpenAI API engine is currently overloaded, passing..." in output.out
+    assert "The OpenAI API engine is currently overloaded" in output.out
     assert "Please double check" not in output.out
 
 
diff --git a/tests/unit/test_web_search.py b/tests/unit/test_web_search.py
index 4f514306..790b1c2f 100644
--- a/tests/unit/test_web_search.py
+++ b/tests/unit/test_web_search.py
@@ -3,7 +3,7 @@ import json
 import pytest
 from googleapiclient.errors import HttpError
 
-from autogpt.agent.agent import Agent
+from autogpt.agents.agent import Agent
 from autogpt.commands.web_search import google, safe_google_results, web_search
 
 
diff --git a/tests/vcr/__init__.py b/tests/vcr/__init__.py
index 04ce79fc..ffd4fa35 100644
--- a/tests/vcr/__init__.py
+++ b/tests/vcr/__init__.py
@@ -1,10 +1,16 @@
 import os
+from hashlib import sha256
 
 import openai.api_requestor
 import pytest
 from pytest_mock import MockerFixture
 
-from .vcr_filter import PROXY, before_record_request, before_record_response
+from .vcr_filter import (
+    PROXY,
+    before_record_request,
+    before_record_response,
+    freeze_request_body,
+)
 
 DEFAULT_RECORD_MODE = "new_episodes"
 BASE_VCR_CONFIG = {
@@ -12,10 +18,13 @@ BASE_VCR_CONFIG = {
     "before_record_response": before_record_response,
     "filter_headers": [
         "Authorization",
+        "AGENT-MODE",
+        "AGENT-TYPE",
+        "OpenAI-Organization",
         "X-OpenAI-Client-User-Agent",
         "User-Agent",
     ],
-    "match_on": ["method", "body"],
+    "match_on": ["method", "headers"],
 }
 
 
@@ -41,7 +50,7 @@ def vcr_cassette_dir(request):
     return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
 
 
-def patch_api_base(requestor):
+def patch_api_base(requestor: openai.api_requestor.APIRequestor):
     new_api_base = f"{PROXY}/v1"
     requestor.api_base = new_api_base
     return requestor
@@ -49,23 +58,35 @@ def patch_api_base(requestor):
 
 @pytest.fixture
 def patched_api_requestor(mocker: MockerFixture):
-    original_init = openai.api_requestor.APIRequestor.__init__
-    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
+    init_requestor = openai.api_requestor.APIRequestor.__init__
+    prepare_request = openai.api_requestor.APIRequestor._prepare_request_raw
 
-    def patched_init(requestor, *args, **kwargs):
-        original_init(requestor, *args, **kwargs)
+    def patched_init_requestor(requestor, *args, **kwargs):
+        init_requestor(requestor, *args, **kwargs)
         patch_api_base(requestor)
 
-    def patched_validate_headers(self, supplied_headers):
-        headers = original_validate_headers(self, supplied_headers)
-        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
-        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
-        return headers
+    def patched_prepare_request(self, *args, **kwargs):
+        url, headers, data = prepare_request(self, *args, **kwargs)
+
+        if PROXY:
+            headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
+            headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
+
+        # Add hash header for cheap & fast matching on cassette playback
+        headers["X-Content-Hash"] = sha256(
+            freeze_request_body(data), usedforsecurity=False
+        ).hexdigest()
+
+        return url, headers, data
 
     if PROXY:
-        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
         mocker.patch.object(
             openai.api_requestor.APIRequestor,
-            "_validate_headers",
-            new=patched_validate_headers,
+            "__init__",
+            new=patched_init_requestor,
         )
+    mocker.patch.object(
+        openai.api_requestor.APIRequestor,
+        "_prepare_request_raw",
+        new=patched_prepare_request,
+    )
diff --git a/tests/vcr/openai_filter.py b/tests/vcr/openai_filter.py
deleted file mode 100644
index 15a4b862..00000000
--- a/tests/vcr/openai_filter.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import json
-import re
-
-
-def replace_timestamp_in_request(request):
-    # Check if the request body contains a JSON object
-
-    try:
-        if not request or not request.body:
-            return request
-        body = json.loads(request.body)
-    except ValueError:
-        return request
-
-    if "messages" not in body:
-        return request
-
-    for message in body["messages"]:
-        if "content" in message and "role" in message and message["role"] == "system":
-            timestamp_regex = re.compile(r"\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \d{4}")
-            message["content"] = timestamp_regex.sub(
-                "Tue Jan 01 00:00:00 2000", message["content"]
-            )
-
-    request.body = json.dumps(body)
-    return request
-
-
-def before_record_response(response):
-    if "Transfer-Encoding" in response["headers"]:
-        del response["headers"]["Transfer-Encoding"]
-    return response
-
-
-def before_record_request(request):
-    filtered_request = filter_hostnames(request)
-    filtered_request_without_dynamic_data = replace_timestamp_in_request(
-        filtered_request
-    )
-    return filtered_request_without_dynamic_data
-
-
-def filter_hostnames(request):
-    allowed_hostnames = [
-        "api.openai.com",
-        "localhost:50337",
-    ]  # List of hostnames you want to allow
-
-    if any(hostname in request.url for hostname in allowed_hostnames):
-        return request
-    else:
-        return None
diff --git a/tests/vcr/vcr_filter.py b/tests/vcr/vcr_filter.py
index 1ba433a7..b1eb6461 100644
--- a/tests/vcr/vcr_filter.py
+++ b/tests/vcr/vcr_filter.py
@@ -1,8 +1,12 @@
+import contextlib
 import json
 import os
 import re
+from io import BytesIO
 from typing import Any, Dict, List
 
+from vcr.request import Request
+
 PROXY = os.environ.get("PROXY")
 
 REPLACEMENTS: List[Dict[str, str]] = [
@@ -39,19 +43,20 @@ def replace_message_content(content: str, replacements: List[Dict[str, str]]) ->
     return content
 
 
-def replace_timestamp_in_request(request: Any) -> Any:
+def freeze_request_body(json_body: str | bytes) -> bytes:
+    """Remove any dynamic items from the request body"""
+
     try:
-        if not request or not request.body:
-            return request
-        body = json.loads(request.body)
+        body = json.loads(json_body)
     except ValueError:
-        return request
+        return json_body if type(json_body) == bytes else json_body.encode()
 
     if "messages" not in body:
-        return request
-    body[
-        "max_tokens"
-    ] = 0  # this field is inconsistent between requests and not used at the moment.
+        return json.dumps(body, sort_keys=True).encode()
+
+    if "max_tokens" in body:
+        del body["max_tokens"]
+
     for message in body["messages"]:
         if "content" in message and "role" in message:
             if message["role"] == "system":
@@ -59,7 +64,20 @@ def replace_timestamp_in_request(request: Any) -> Any:
                     message["content"], REPLACEMENTS
                 )
 
-    request.body = json.dumps(body)
+    return json.dumps(body, sort_keys=True).encode()
+
+
+def freeze_request(request: Request) -> Request:
+    if not request or not request.body:
+        return request
+
+    with contextlib.suppress(ValueError):
+        request.body = freeze_request_body(
+            request.body.getvalue()
+            if isinstance(request.body, BytesIO)
+            else request.body
+        )
+
     return request
 
 
@@ -69,20 +87,23 @@ def before_record_response(response: Dict[str, Any]) -> Dict[str, Any]:
     return response
 
 
-def before_record_request(request: Any) -> Any:
+def before_record_request(request: Request) -> Request | None:
     request = replace_request_hostname(request, ORIGINAL_URL, NEW_URL)
 
     filtered_request = filter_hostnames(request)
-    filtered_request_without_dynamic_data = replace_timestamp_in_request(
-        filtered_request
-    )
+    if not filtered_request:
+        return None
+
+    filtered_request_without_dynamic_data = freeze_request(filtered_request)
     return filtered_request_without_dynamic_data
 
 
 from urllib.parse import urlparse, urlunparse
 
 
-def replace_request_hostname(request: Any, original_url: str, new_hostname: str) -> Any:
+def replace_request_hostname(
+    request: Request, original_url: str, new_hostname: str
+) -> Request:
     parsed_url = urlparse(request.uri)
 
     if parsed_url.hostname in original_url:
@@ -94,7 +115,7 @@ def replace_request_hostname(request: Any, original_url: str, new_hostname: str)
     return request
 
 
-def filter_hostnames(request: Any) -> Any:
+def filter_hostnames(request: Request) -> Request | None:
     # Add your implementation here for filtering hostnames
     if any(hostname in request.url for hostname in ALLOWED_HOSTNAMES):
         return request