From 7720f6af24520f346d80c07baddc8a16a9310446 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <pwuts@agpt.co>
Date: Thu, 21 Sep 2023 16:32:19 +0200
Subject: [PATCH] AutoGPT: replace `autogpt.llm.*` with LLM infrastructure of
 `autogpt.core`;

* Removed `autogpt.llm.base` and `autogpt.llm.utils`
* `core` does things async, so `Agent.think()` and `Agent.execute()` are now also async
* Renamed `dump()` and `parse()` on `JSONSchema` to `to_dict()` and `from_dict()`
* Removed `MessageHistory`

* Also, some typo's and linting fixes here and there
---
 .../autogpt/agbenchmark_config/benchmarks.py  |   6 +-
 autogpts/autogpt/autogpt/agents/agent.py      |  92 ++++--
 autogpts/autogpt/autogpt/agents/base.py       |  97 +++---
 .../autogpt/agents/features/context.py        |  11 +-
 .../autogpt/agents/features/watchdog.py       |   6 +-
 autogpts/autogpt/autogpt/app/main.py          |  67 +++-
 autogpts/autogpt/autogpt/app/setup.py         |  56 ++--
 autogpts/autogpt/autogpt/app/utils.py         |   4 +-
 .../autogpt/autogpt/commands/execute_code.py  |   3 +-
 .../autogpt/autogpt/commands/web_selenium.py  |  19 +-
 autogpts/autogpt/autogpt/config/ai_config.py  |   7 +-
 autogpts/autogpt/autogpt/config/config.py     |   5 +-
 .../autogpt/autogpt/core/ability/simple.py    |   4 +-
 .../prompt_strategies/next_ability.py         |   2 +-
 .../core/resource/model_providers/schema.py   |   2 +-
 .../autogpt/autogpt/core/utils/json_schema.py |   6 +-
 .../json_utils/llm_response_format_1.json     |  45 ---
 .../autogpt/autogpt/json_utils/utilities.py   |  57 +---
 autogpts/autogpt/autogpt/llm/__init__.py      |  21 --
 autogpts/autogpt/autogpt/llm/api_manager.py   |   7 +-
 autogpts/autogpt/autogpt/llm/base.py          | 219 --------------
 .../autogpt/autogpt/llm/providers/openai.py   | 286 +++---------------
 .../autogpt/autogpt/llm/utils/__init__.py     | 189 ------------
 .../autogpt/llm/utils/token_counter.py        |  87 ------
 autogpts/autogpt/autogpt/logs/__init__.py     |   1 -
 autogpts/autogpt/autogpt/logs/formatters.py   |   5 +-
 .../autogpt/autogpt/memory/message_history.py | 250 ---------------
 .../autogpt/memory/vector/memory_item.py      |  12 +-
 .../memory/vector/providers/json_file.py      |   8 +-
 .../autogpt/autogpt/memory/vector/utils.py    |  11 +-
 .../autogpt/models/command_parameter.py       |   5 +-
 .../autogpt/autogpt/models/context_item.py    |   2 +-
 autogpts/autogpt/autogpt/processing/text.py   | 141 +++++----
 .../autogpt/scripts/check_requirements.py     |   4 +-
 autogpts/autogpt/tests/conftest.py            |  10 +-
 .../tests/integration/agent_factory.py        |   3 +-
 .../memory/test_json_file_memory.py           |   2 +-
 .../autogpt/tests/integration/memory/utils.py |   8 +-
 .../tests/integration/test_execute_code.py    |  23 +-
 .../tests/integration/test_provider_openai.py |  33 --
 .../autogpt/tests/integration/test_setup.py   |  22 +-
 .../tests/integration/test_web_selenium.py    |   6 +-
 .../autogpt/tests/unit/test_api_manager.py    |   5 +-
 .../tests/unit/test_file_operations.py        |   4 +-
 .../tests/unit/test_message_history.py        | 140 ---------
 .../tests/unit/test_retry_provider_openai.py  |  13 +-
 .../autogpt/tests/unit/test_token_counter.py  |  55 ----
 autogpts/autogpt/tests/unit/test_utils.py     |  15 +-
 autogpts/autogpt/tests/vcr/__init__.py        |   5 +-
 49 files changed, 460 insertions(+), 1621 deletions(-)
 delete mode 100644 autogpts/autogpt/autogpt/json_utils/llm_response_format_1.json
 delete mode 100644 autogpts/autogpt/autogpt/llm/__init__.py
 delete mode 100644 autogpts/autogpt/autogpt/llm/base.py
 delete mode 100644 autogpts/autogpt/autogpt/llm/utils/__init__.py
 delete mode 100644 autogpts/autogpt/autogpt/llm/utils/token_counter.py
 delete mode 100644 autogpts/autogpt/autogpt/memory/message_history.py
 delete mode 100644 autogpts/autogpt/tests/integration/test_provider_openai.py
 delete mode 100644 autogpts/autogpt/tests/unit/test_message_history.py
 delete mode 100644 autogpts/autogpt/tests/unit/test_token_counter.py

diff --git a/autogpts/autogpt/agbenchmark_config/benchmarks.py b/autogpts/autogpt/agbenchmark_config/benchmarks.py
index 5635f491..2aaec63e 100644
--- a/autogpts/autogpt/agbenchmark_config/benchmarks.py
+++ b/autogpts/autogpt/agbenchmark_config/benchmarks.py
@@ -1,8 +1,9 @@
+import asyncio
 import sys
 from pathlib import Path
 
 from autogpt.agents import Agent
-from autogpt.app.main import run_interaction_loop
+from autogpt.app.main import _configure_openai_provider, run_interaction_loop
 from autogpt.commands import COMMAND_CATEGORIES
 from autogpt.config import AIConfig, ConfigBuilder
 from autogpt.logs.config import configure_logging
@@ -17,7 +18,7 @@ LOG_DIR = Path(__file__).parent / "logs"
 
 def run_specific_agent(task: str, continuous_mode: bool = False) -> None:
     agent = bootstrap_agent(task, continuous_mode)
-    run_interaction_loop(agent)
+    asyncio.run(run_interaction_loop(agent))
 
 
 def bootstrap_agent(task: str, continuous_mode: bool) -> Agent:
@@ -43,6 +44,7 @@ def bootstrap_agent(task: str, continuous_mode: bool) -> Agent:
     )
     return Agent(
         memory=get_memory(config),
+        llm_provider=_configure_openai_provider(config),
         command_registry=command_registry,
         ai_config=ai_config,
         config=config,
diff --git a/autogpts/autogpt/autogpt/agents/agent.py b/autogpts/autogpt/autogpt/agents/agent.py
index 187a996c..3071254a 100644
--- a/autogpts/autogpt/autogpt/agents/agent.py
+++ b/autogpts/autogpt/autogpt/agents/agent.py
@@ -8,14 +8,18 @@ from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
-    from autogpt.llm.base import ChatModelResponse, ChatSequence
     from autogpt.memory.vector import VectorMemory
     from autogpt.models.command_registry import CommandRegistry
 
-from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
+from autogpt.core.prompting import ChatPrompt
+from autogpt.core.resource.model_providers import (
+    ChatMessage,
+    ChatModelProvider,
+    ChatModelResponse,
+)
+from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.json_utils.utilities import extract_dict_from_response
 from autogpt.llm.api_manager import ApiManager
-from autogpt.llm.base import Message
-from autogpt.llm.utils import count_string_tokens
 from autogpt.logs.log_cycle import (
     CURRENT_CONTEXT_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
@@ -52,6 +56,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
     def __init__(
         self,
         ai_config: AIConfig,
+        llm_provider: ChatModelProvider,
         command_registry: CommandRegistry,
         memory: VectorMemory,
         triggering_prompt: str,
@@ -60,6 +65,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
     ):
         super().__init__(
             ai_config=ai_config,
+            llm_provider=llm_provider,
             command_registry=command_registry,
             config=config,
             default_cycle_instruction=triggering_prompt,
@@ -75,13 +81,13 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
         self.log_cycle_handler = LogCycleHandler()
         """LogCycleHandler for structured debug logging."""
 
-    def construct_base_prompt(self, *args, **kwargs) -> ChatSequence:
+    def construct_base_prompt(self, *args, **kwargs) -> ChatPrompt:
         if kwargs.get("prepend_messages") is None:
             kwargs["prepend_messages"] = []
 
         # Clock
         kwargs["prepend_messages"].append(
-            Message("system", f"The current time and date is {time.strftime('%c')}"),
+            ChatMessage.system(f"The current time and date is {time.strftime('%c')}"),
         )
 
         # Add budget information (if any) to prompt
@@ -93,8 +99,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
             if remaining_budget < 0:
                 remaining_budget = 0
 
-            budget_msg = Message(
-                "system",
+            budget_msg = ChatMessage.system(
                 f"Your remaining API budget is ${remaining_budget:.3f}"
                 + (
                     " BUDGET EXCEEDED! SHUT DOWN!\n\n"
@@ -114,7 +119,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
 
         return super().construct_base_prompt(*args, **kwargs)
 
-    def on_before_think(self, *args, **kwargs) -> ChatSequence:
+    def on_before_think(self, *args, **kwargs) -> ChatPrompt:
         prompt = super().on_before_think(*args, **kwargs)
 
         self.log_cycle_handler.log_count_within_cycle = 0
@@ -127,7 +132,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
         )
         return prompt
 
-    def execute(
+    async def execute(
         self,
         command_name: str,
         command_args: dict[str, str] = {},
@@ -173,8 +178,8 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
             except AgentException as e:
                 result = ActionErrorResult(reason=e.message, error=e)
 
-            result_tlength = count_string_tokens(str(result), self.llm.name)
-            history_tlength = count_string_tokens(
+            result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
+            history_tlength = self.llm_provider.count_tokens(
                 self.event_history.fmt_paragraph(), self.llm.name
             )
             if result_tlength + history_tlength > self.send_token_limit:
@@ -199,10 +204,10 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
     def parse_and_process_response(
         self, llm_response: ChatModelResponse, *args, **kwargs
     ) -> Agent.ThoughtProcessOutput:
-        if not llm_response.content:
+        if "content" not in llm_response.response:
             raise InvalidAgentResponseError("Assistant response has no text content")
 
-        response_content = llm_response.content
+        response_content = llm_response.response["content"]
 
         for plugin in self.config.plugins:
             if not plugin.can_handle_post_planning():
@@ -211,7 +216,7 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
 
         assistant_reply_dict = extract_dict_from_response(response_content)
 
-        _, errors = validate_dict(assistant_reply_dict, self.config)
+        _, errors = RESPONSE_SCHEMA.validate_object(assistant_reply_dict, logger)
         if errors:
             raise InvalidAgentResponseError(
                 "Validation of response failed:\n  "
@@ -243,6 +248,57 @@ class Agent(ContextMixin, WorkspaceMixin, WatchdogMixin, BaseAgent):
         return response
 
 
+RESPONSE_SCHEMA = JSONSchema(
+    type=JSONSchema.Type.OBJECT,
+    properties={
+        "thoughts": JSONSchema(
+            type=JSONSchema.Type.OBJECT,
+            required=True,
+            properties={
+                "text": JSONSchema(
+                    description="thoughts",
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+                "reasoning": JSONSchema(
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+                "plan": JSONSchema(
+                    description="- short bulleted\n- list that conveys\n- long-term plan",
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+                "criticism": JSONSchema(
+                    description="constructive self-criticism",
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+                "speak": JSONSchema(
+                    description="thoughts summary to say to user",
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+            },
+        ),
+        "command": JSONSchema(
+            type=JSONSchema.Type.OBJECT,
+            required=True,
+            properties={
+                "name": JSONSchema(
+                    type=JSONSchema.Type.STRING,
+                    required=True,
+                ),
+                "args": JSONSchema(
+                    type=JSONSchema.Type.OBJECT,
+                    required=True,
+                ),
+            },
+        ),
+    },
+)
+
+
 def extract_command(
     assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
 ) -> tuple[str, dict[str, str]]:
@@ -262,11 +318,11 @@ def extract_command(
         Exception: If any other error occurs
     """
     if config.openai_functions:
-        if assistant_reply.function_call is None:
+        if "function_call" not in assistant_reply.response:
             raise InvalidAgentResponseError("No 'function_call' in assistant reply")
         assistant_reply_json["command"] = {
-            "name": assistant_reply.function_call.name,
-            "args": json.loads(assistant_reply.function_call.arguments),
+            "name": assistant_reply.response["function_call"]["name"],
+            "args": json.loads(assistant_reply.response["function_call"]["arguments"]),
         }
     try:
         if not isinstance(assistant_reply_json, dict):
diff --git a/autogpts/autogpt/autogpt/agents/base.py b/autogpts/autogpt/autogpt/agents/base.py
index 780331c4..122e614f 100644
--- a/autogpts/autogpt/autogpt/agents/base.py
+++ b/autogpts/autogpt/autogpt/agents/base.py
@@ -2,20 +2,24 @@ from __future__ import annotations
 
 import logging
 import re
-from abc import ABCMeta, abstractmethod
+from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING, Any, Literal, Optional
 
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
-    from autogpt.llm.base import ChatModelInfo, ChatModelResponse
+    from autogpt.core.resource.model_providers.schema import (
+        ChatModelInfo,
+        ChatModelProvider,
+        ChatModelResponse,
+    )
     from autogpt.models.command_registry import CommandRegistry
 
-from autogpt.agents.utils.exceptions import InvalidAgentResponseError
 from autogpt.config.ai_directives import AIDirectives
-from autogpt.llm.base import ChatSequence, Message
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, get_openai_command_specs
-from autogpt.llm.utils import count_message_tokens, create_chat_completion
-from autogpt.models.action_history import EpisodicActionHistory, ActionResult
+from autogpt.core.prompting.schema import ChatMessage, ChatPrompt
+from autogpt.core.resource.model_providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
+from autogpt.llm.providers.openai import get_openai_command_specs
+from autogpt.models.action_history import ActionResult, EpisodicActionHistory
 from autogpt.prompts.generator import PromptGenerator
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 
@@ -26,7 +30,7 @@ CommandArgs = dict[str, str]
 AgentThoughts = dict[str, Any]
 
 
-class BaseAgent(metaclass=ABCMeta):
+class BaseAgent(ABC):
     """Base class for all Auto-GPT agents."""
 
     ThoughtProcessID = Literal["one-shot"]
@@ -35,6 +39,7 @@ class BaseAgent(metaclass=ABCMeta):
     def __init__(
         self,
         ai_config: AIConfig,
+        llm_provider: ChatModelProvider,
         command_registry: CommandRegistry,
         config: Config,
         big_brain: bool = True,
@@ -46,6 +51,8 @@ class BaseAgent(metaclass=ABCMeta):
         self.ai_config = ai_config
         """The AIConfig or "personality" object associated with this agent."""
 
+        self.llm_provider = llm_provider
+
         self.command_registry = command_registry
         """The registry containing all commands available to the agent."""
 
@@ -108,7 +115,7 @@ class BaseAgent(metaclass=ABCMeta):
         llm_name = self.config.smart_llm if self.big_brain else self.config.fast_llm
         return OPEN_AI_CHAT_MODELS[llm_name]
 
-    def think(
+    async def think(
         self,
         instruction: Optional[str] = None,
         thought_process_id: ThoughtProcessID = "one-shot",
@@ -124,21 +131,23 @@ class BaseAgent(metaclass=ABCMeta):
 
         instruction = instruction or self.default_cycle_instruction
 
-        prompt: ChatSequence = self.construct_prompt(instruction, thought_process_id)
+        prompt: ChatPrompt = self.construct_prompt(instruction, thought_process_id)
         prompt = self.on_before_think(prompt, thought_process_id, instruction)
-        raw_response = create_chat_completion(
-            prompt,
-            self.config,
+
+        logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}")
+        raw_response = await self.llm_provider.create_chat_completion(
+            prompt.messages,
             functions=get_openai_command_specs(self.command_registry)
             if self.config.openai_functions
-            else None,
+            else [],
+            model_name=self.llm.name,
         )
         self.cycle_count += 1
 
         return self.on_response(raw_response, thought_process_id, prompt, instruction)
 
     @abstractmethod
-    def execute(
+    async def execute(
         self,
         command_name: str,
         command_args: dict[str, str] = {},
@@ -159,15 +168,14 @@ class BaseAgent(metaclass=ABCMeta):
     def construct_base_prompt(
         self,
         thought_process_id: ThoughtProcessID,
-        prepend_messages: list[Message] = [],
-        append_messages: list[Message] = [],
+        prepend_messages: list[ChatMessage] = [],
+        append_messages: list[ChatMessage] = [],
         reserve_tokens: int = 0,
-    ) -> ChatSequence:
+    ) -> ChatPrompt:
         """Constructs and returns a prompt with the following structure:
         1. System prompt
         2. `prepend_messages`
-        3. Message history of the agent, truncated & prepended with running summary as needed
-        4. `append_messages`
+        3. `append_messages`
 
         Params:
             prepend_messages: Messages to insert between the system prompt and message history
@@ -178,27 +186,26 @@ class BaseAgent(metaclass=ABCMeta):
         if self.event_history:
             prepend_messages.insert(
                 0,
-                Message(
-                    "system",
-                    "## Progress\n\n" f"{self.event_history.fmt_paragraph()}",
+                ChatMessage.system(
+                    "## Progress\n\n" f"{self.event_history.fmt_paragraph()}"
                 ),
             )
 
-        prompt = ChatSequence.for_model(
-            self.llm.name,
-            [Message("system", self.system_prompt)] + prepend_messages,
+        prompt = ChatPrompt(
+            messages=[
+                ChatMessage.system(self.system_prompt),
+            ]
+            + prepend_messages
+            + (append_messages or []),
         )
 
-        if append_messages:
-            prompt.extend(append_messages)
-
         return prompt
 
     def construct_prompt(
         self,
         cycle_instruction: str,
         thought_process_id: ThoughtProcessID,
-    ) -> ChatSequence:
+    ) -> ChatPrompt:
         """Constructs and returns a prompt with the following structure:
         1. System prompt
         2. Message history of the agent, truncated & prepended with running summary as needed
@@ -211,16 +218,16 @@ class BaseAgent(metaclass=ABCMeta):
         if not cycle_instruction:
             raise ValueError("No instruction given")
 
-        cycle_instruction_msg = Message("user", cycle_instruction)
-        cycle_instruction_tlength = count_message_tokens(
+        cycle_instruction_msg = ChatMessage.user(cycle_instruction)
+        cycle_instruction_tlength = self.llm_provider.count_message_tokens(
             cycle_instruction_msg, self.llm.name
         )
 
-        append_messages: list[Message] = []
+        append_messages: list[ChatMessage] = []
 
         response_format_instr = self.response_format_instruction(thought_process_id)
         if response_format_instr:
-            append_messages.append(Message("system", response_format_instr))
+            append_messages.append(ChatMessage.system(response_format_instr))
 
         prompt = self.construct_base_prompt(
             thought_process_id,
@@ -229,7 +236,7 @@ class BaseAgent(metaclass=ABCMeta):
         )
 
         # ADD user input message ("triggering prompt")
-        prompt.append(cycle_instruction_msg)
+        prompt.messages.append(cycle_instruction_msg)
 
         return prompt
 
@@ -291,10 +298,10 @@ class BaseAgent(metaclass=ABCMeta):
 
     def on_before_think(
         self,
-        prompt: ChatSequence,
+        prompt: ChatPrompt,
         thought_process_id: ThoughtProcessID,
         instruction: str,
-    ) -> ChatSequence:
+    ) -> ChatPrompt:
         """Called after constructing the prompt but before executing it.
 
         Calls the `on_planning` hook of any enabled and capable plugins, adding their
@@ -306,7 +313,9 @@ class BaseAgent(metaclass=ABCMeta):
         Returns:
             The prompt to execute
         """
-        current_tokens_used = prompt.token_length
+        current_tokens_used = self.llm_provider.count_message_tokens(
+            prompt.messages, self.llm.name
+        )
         plugin_count = len(self.config.plugins)
         for i, plugin in enumerate(self.config.plugins):
             if not plugin.can_handle_on_planning():
@@ -314,13 +323,15 @@ class BaseAgent(metaclass=ABCMeta):
             plugin_response = plugin.on_planning(self.prompt_generator, prompt.raw())
             if not plugin_response or plugin_response == "":
                 continue
-            message_to_add = Message("system", plugin_response)
-            tokens_to_add = count_message_tokens(message_to_add, self.llm.name)
+            message_to_add = ChatMessage.system(plugin_response)
+            tokens_to_add = self.llm_provider.count_message_tokens(
+                message_to_add, self.llm.name
+            )
             if current_tokens_used + tokens_to_add > self.send_token_limit:
                 logger.debug(f"Plugin response too long, skipping: {plugin_response}")
                 logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
                 break
-            prompt.insert(
+            prompt.messages.insert(
                 -1, message_to_add
             )  # HACK: assumes cycle instruction to be at the end
             current_tokens_used += tokens_to_add
@@ -330,7 +341,7 @@ class BaseAgent(metaclass=ABCMeta):
         self,
         llm_response: ChatModelResponse,
         thought_process_id: ThoughtProcessID,
-        prompt: ChatSequence,
+        prompt: ChatPrompt,
         instruction: str,
     ) -> ThoughtProcessOutput:
         """Called upon receiving a response from the chat model.
@@ -358,7 +369,7 @@ class BaseAgent(metaclass=ABCMeta):
         self,
         llm_response: ChatModelResponse,
         thought_process_id: ThoughtProcessID,
-        prompt: ChatSequence,
+        prompt: ChatPrompt,
         instruction: str,
     ) -> ThoughtProcessOutput:
         """Validate, parse & process the LLM's response.
diff --git a/autogpts/autogpt/autogpt/agents/features/context.py b/autogpts/autogpt/autogpt/agents/features/context.py
index 23158ff9..c9889aba 100644
--- a/autogpts/autogpt/autogpt/agents/features/context.py
+++ b/autogpts/autogpt/autogpt/agents/features/context.py
@@ -3,12 +3,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
-    from autogpt.llm.base import ChatSequence
+    from autogpt.core.prompting import ChatPrompt
     from autogpt.models.context_item import ContextItem
 
     from ..base import BaseAgent
 
-from autogpt.llm.base import Message
+from autogpt.core.resource.model_providers import ChatMessage
 
 
 class AgentContext:
@@ -33,7 +33,7 @@ class AgentContext:
         self.items.clear()
 
     def format_numbered(self) -> str:
-        return "\n\n".join([f"{i}. {c}" for i, c in enumerate(self.items, 1)])
+        return "\n\n".join([f"{i}. {c.fmt()}" for i, c in enumerate(self.items, 1)])
 
 
 class ContextMixin:
@@ -46,7 +46,7 @@ class ContextMixin:
 
         super(ContextMixin, self).__init__(**kwargs)
 
-    def construct_base_prompt(self, *args: Any, **kwargs: Any) -> ChatSequence:
+    def construct_base_prompt(self, *args: Any, **kwargs: Any) -> ChatPrompt:
         if kwargs.get("append_messages") is None:
             kwargs["append_messages"] = []
 
@@ -54,8 +54,7 @@ class ContextMixin:
         if self.context:
             kwargs["append_messages"].insert(
                 0,
-                Message(
-                    "system",
+                ChatMessage.system(
                     "## Context\n"
                     + self.context.format_numbered()
                     + "\n\nWhen a context item is no longer needed and you are not done yet,"
diff --git a/autogpts/autogpt/autogpt/agents/features/watchdog.py b/autogpts/autogpt/autogpt/agents/features/watchdog.py
index e7310e2a..4166acb4 100644
--- a/autogpts/autogpt/autogpt/agents/features/watchdog.py
+++ b/autogpts/autogpt/autogpt/agents/features/watchdog.py
@@ -27,8 +27,8 @@ class WatchdogMixin:
                 f"{__class__.__name__} can only be applied to BaseAgent derivatives"
             )
 
-    def think(self, *args, **kwargs) -> BaseAgent.ThoughtProcessOutput:
-        command_name, command_args, thoughts = super(WatchdogMixin, self).think(
+    async def think(self, *args, **kwargs) -> BaseAgent.ThoughtProcessOutput:
+        command_name, command_args, thoughts = await super(WatchdogMixin, self).think(
             *args, **kwargs
         )
 
@@ -58,6 +58,6 @@ class WatchdogMixin:
 
                     # Switch to SMART_LLM and re-think
                     self.big_brain = True
-                    return self.think(*args, **kwargs)
+                    return await self.think(*args, **kwargs)
 
         return command_name, command_args, thoughts
diff --git a/autogpts/autogpt/autogpt/app/main.py b/autogpts/autogpt/autogpt/app/main.py
index 0b6d4b10..c967c9d9 100644
--- a/autogpts/autogpt/autogpt/app/main.py
+++ b/autogpts/autogpt/autogpt/app/main.py
@@ -9,6 +9,7 @@ from types import FrameType
 from typing import Optional
 
 from colorama import Fore, Style
+from pydantic import SecretStr
 
 from autogpt.agents import Agent, AgentThoughts, CommandArgs, CommandName
 from autogpt.agents.utils.exceptions import InvalidAgentResponseError
@@ -24,6 +25,12 @@ from autogpt.app.utils import (
 )
 from autogpt.commands import COMMAND_CATEGORIES
 from autogpt.config import AIConfig, Config, ConfigBuilder, check_openai_api_key
+from autogpt.core.resource.model_providers import (
+    ChatModelProvider,
+    ModelProviderCredentials,
+)
+from autogpt.core.resource.model_providers.openai import OpenAIProvider
+from autogpt.core.runner.client_lib.utils import coroutine
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs.config import configure_chat_plugins, configure_logging
 from autogpt.logs.helpers import print_attribute, speak
@@ -35,7 +42,8 @@ from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
 
-def run_auto_gpt(
+@coroutine
+async def run_auto_gpt(
     continuous: bool,
     continuous_limit: int,
     ai_settings: str,
@@ -81,6 +89,8 @@ def run_auto_gpt(
     # Set up logging module
     configure_logging(config)
 
+    llm_provider = _configure_openai_provider(config)
+
     logger = logging.getLogger(__name__)
 
     if config.continuous_mode:
@@ -148,8 +158,9 @@ def run_auto_gpt(
     # Create a CommandRegistry instance and scan default folder
     command_registry = CommandRegistry.with_command_modules(COMMAND_CATEGORIES, config)
 
-    ai_config = construct_main_ai_config(
+    ai_config = await construct_main_ai_config(
         config,
+        llm_provider=llm_provider,
         name=ai_name,
         role=ai_role,
         goals=ai_goals,
@@ -166,13 +177,42 @@ def run_auto_gpt(
 
     agent = Agent(
         memory=memory,
+        llm_provider=llm_provider,
         command_registry=command_registry,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
         ai_config=ai_config,
         config=config,
     )
 
-    run_interaction_loop(agent)
+    await run_interaction_loop(agent)
+
+
+def _configure_openai_provider(config: Config) -> OpenAIProvider:
+    """Create a configured OpenAIProvider object.
+
+    Args:
+        config: The program's configuration.
+
+    Returns:
+        A configured OpenAIProvider object.
+    """
+    if config.openai_api_key is None:
+        raise RuntimeError("OpenAI key is not configured")
+
+    openai_settings = OpenAIProvider.default_settings.copy(deep=True)
+    openai_settings.credentials = ModelProviderCredentials(
+        api_key=SecretStr(config.openai_api_key),
+        # TODO: support OpenAI Azure credentials
+        api_base=SecretStr(config.openai_api_base) if config.openai_api_base else None,
+        api_type=SecretStr(config.openai_api_type) if config.openai_api_type else None,
+        api_version=SecretStr(config.openai_api_version)
+        if config.openai_api_version
+        else None,
+    )
+    return OpenAIProvider(
+        settings=openai_settings,
+        logger=logging.getLogger("OpenAIProvider"),
+    )
 
 
 def _get_cycle_budget(continuous_mode: bool, continuous_limit: int) -> int | float:
@@ -195,7 +235,7 @@ class UserFeedback(str, enum.Enum):
     TEXT = "TEXT"
 
 
-def run_interaction_loop(
+async def run_interaction_loop(
     agent: Agent,
 ) -> None:
     """Run the main interaction loop for the agent.
@@ -254,7 +294,7 @@ def run_interaction_loop(
         # Have the agent determine the next action to take.
         with spinner:
             try:
-                command_name, command_args, assistant_reply_dict = agent.think()
+                command_name, command_args, assistant_reply_dict = await agent.think()
             except InvalidAgentResponseError as e:
                 logger.warn(f"The agent's thoughts could not be parsed: {e}")
                 consecutive_failures += 1
@@ -278,7 +318,7 @@ def run_interaction_loop(
         # Get user input #
         ##################
         if cycles_remaining == 1:  # Last cycle
-            user_feedback, user_input, new_cycles_remaining = get_user_feedback(
+            user_feedback, user_input, new_cycles_remaining = await get_user_feedback(
                 config,
                 ai_config,
             )
@@ -334,7 +374,7 @@ def run_interaction_loop(
         if not command_name:
             continue
 
-        result = agent.execute(command_name, command_args, user_input)
+        result = await agent.execute(command_name, command_args, user_input)
 
         if result.status == "success":
             logger.info(result, extra={"title": "SYSTEM:", "title_color": Fore.YELLOW})
@@ -380,7 +420,7 @@ def update_user(
     )
 
 
-def get_user_feedback(
+async def get_user_feedback(
     config: Config,
     ai_config: AIConfig,
 ) -> tuple[UserFeedback, str, int | None]:
@@ -413,9 +453,9 @@ def get_user_feedback(
     while user_feedback is None:
         # Get input from user
         if config.chat_messages_enabled:
-            console_input = clean_input(config, "Waiting for your response...")
+            console_input = await clean_input(config, "Waiting for your response...")
         else:
-            console_input = clean_input(
+            console_input = await clean_input(
                 config, Fore.MAGENTA + "Input: " + Style.RESET_ALL
             )
 
@@ -443,8 +483,9 @@ def get_user_feedback(
     return user_feedback, user_input, new_cycles_remaining
 
 
-def construct_main_ai_config(
+async def construct_main_ai_config(
     config: Config,
+    llm_provider: ChatModelProvider,
     name: Optional[str] = None,
     role: Optional[str] = None,
     goals: tuple[str] = tuple(),
@@ -483,7 +524,7 @@ def construct_main_ai_config(
             extra={"title": f"{Fore.GREEN}Welcome back!{Fore.RESET}"},
             msg=f"Would you like me to return to being {ai_config.ai_name}?",
         )
-        should_continue = clean_input(
+        should_continue = await clean_input(
             config,
             f"""Continue with the last settings?
 Name:  {ai_config.ai_name}
@@ -496,7 +537,7 @@ Continue ({config.authorise_key}/{config.exit_key}): """,
             ai_config = AIConfig()
 
     if any([not ai_config.ai_name, not ai_config.ai_role, not ai_config.ai_goals]):
-        ai_config = interactive_ai_config_setup(config)
+        ai_config = await interactive_ai_config_setup(config, llm_provider)
         ai_config.save(config.workdir / config.ai_settings_file)
 
     if config.restrict_to_workspace:
diff --git a/autogpts/autogpt/autogpt/app/setup.py b/autogpts/autogpt/autogpt/app/setup.py
index 74f04bb8..56c2591a 100644
--- a/autogpts/autogpt/autogpt/app/setup.py
+++ b/autogpts/autogpt/autogpt/app/setup.py
@@ -9,8 +9,7 @@ from jinja2 import Template
 from autogpt.app import utils
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
-from autogpt.llm.base import ChatSequence, Message
-from autogpt.llm.utils import create_chat_completion
+from autogpt.core.resource.model_providers import ChatMessage, ChatModelProvider
 from autogpt.logs.helpers import user_friendly_output
 from autogpt.prompts.default_prompts import (
     DEFAULT_SYSTEM_PROMPT_AICONFIG_AUTOMATIC,
@@ -21,8 +20,10 @@ from autogpt.prompts.default_prompts import (
 logger = logging.getLogger(__name__)
 
 
-def interactive_ai_config_setup(
-    config: Config, ai_config_template: Optional[AIConfig] = None
+async def interactive_ai_config_setup(
+    config: Config,
+    llm_provider: ChatModelProvider,
+    ai_config_template: Optional[AIConfig] = None,
 ) -> AIConfig:
     """Prompt the user for input
 
@@ -58,7 +59,7 @@ def interactive_ai_config_setup(
             title_color=Fore.GREEN,
         )
 
-        user_desire = utils.clean_input(
+        user_desire = await utils.clean_input(
             config, f"{Fore.LIGHTBLUE_EX}I want Auto-GPT to{Style.RESET_ALL}: "
         )
 
@@ -72,11 +73,11 @@ def interactive_ai_config_setup(
             title="Manual Mode Selected",
             title_color=Fore.GREEN,
         )
-        return generate_aiconfig_manual(config, ai_config_template)
+        return await generate_aiconfig_manual(config, ai_config_template)
 
     else:
         try:
-            return generate_aiconfig_automatic(user_desire, config)
+            return await generate_aiconfig_automatic(user_desire, config, llm_provider)
         except Exception as e:
             user_friendly_output(
                 title="Unable to automatically generate AI Config based on user desire.",
@@ -85,10 +86,10 @@ def interactive_ai_config_setup(
             )
             logger.debug(f"Error during AIConfig generation: {e}")
 
-            return generate_aiconfig_manual(config)
+            return await generate_aiconfig_manual(config)
 
 
-def generate_aiconfig_manual(
+async def generate_aiconfig_manual(
     config: Config, ai_config_template: Optional[AIConfig] = None
 ) -> AIConfig:
     """
@@ -124,7 +125,7 @@ def generate_aiconfig_manual(
             message="For example, 'Entrepreneur-GPT'",
             title_color=Fore.GREEN,
         )
-        ai_name = utils.clean_input(config, "AI Name: ")
+        ai_name = await utils.clean_input(config, "AI Name: ")
     if ai_name == "":
         ai_name = "Entrepreneur-GPT"
 
@@ -144,7 +145,7 @@ def generate_aiconfig_manual(
             " the sole goal of increasing your net worth.'",
             title_color=Fore.GREEN,
         )
-        ai_role = utils.clean_input(config, f"{ai_name} is: ")
+        ai_role = await utils.clean_input(config, f"{ai_name} is: ")
     if ai_role == "":
         ai_role = "an AI designed to autonomously develop and run businesses with the"
         " sole goal of increasing your net worth."
@@ -162,7 +163,7 @@ def generate_aiconfig_manual(
         logger.info("Enter nothing to load defaults, enter nothing when finished.")
         ai_goals = []
         for i in range(5):
-            ai_goal = utils.clean_input(
+            ai_goal = await utils.clean_input(
                 config, f"{Fore.LIGHTBLUE_EX}Goal{Style.RESET_ALL} {i+1}: "
             )
             if ai_goal == "":
@@ -182,7 +183,7 @@ def generate_aiconfig_manual(
         title_color=Fore.GREEN,
     )
     logger.info("Enter nothing to let the AI run without monetary limit")
-    api_budget_input = utils.clean_input(
+    api_budget_input = await utils.clean_input(
         config, f"{Fore.LIGHTBLUE_EX}Budget{Style.RESET_ALL}: $"
     )
     if api_budget_input == "":
@@ -199,10 +200,16 @@ def generate_aiconfig_manual(
             )
             api_budget = 0.0
 
-    return AIConfig(ai_name, ai_role, ai_goals, api_budget)
+    return AIConfig(
+        ai_name=ai_name, ai_role=ai_role, ai_goals=ai_goals, api_budget=api_budget
+    )
 
 
-def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
+async def generate_aiconfig_automatic(
+    user_prompt: str,
+    config: Config,
+    llm_provider: ChatModelProvider,
+) -> AIConfig:
     """Generates an AIConfig object from the given string.
 
     Returns:
@@ -214,16 +221,15 @@ def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
         DEFAULT_TASK_PROMPT_AICONFIG_AUTOMATIC
     ).render(user_prompt=user_prompt)
     # Call LLM with the string as user input
-    output = create_chat_completion(
-        ChatSequence.for_model(
-            config.smart_llm,
+    output = (
+        await llm_provider.create_chat_completion(
             [
-                Message("system", system_prompt),
-                Message("user", prompt_ai_config_automatic),
+                ChatMessage.system(system_prompt),
+                ChatMessage.user(prompt_ai_config_automatic),
             ],
-        ),
-        config,
-    ).content
+            config.smart_llm,
+        )
+    ).response["content"]
 
     # Debug LLM Output
     logger.debug(f"AI Config Generator Raw Output: {output}")
@@ -242,4 +248,6 @@ def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
     ai_goals = re.findall(r"(?<=\n)-\s*(.*)", output)
     api_budget = 0.0  # TODO: parse api budget using a regular expression
 
-    return AIConfig(ai_name, ai_role, ai_goals, api_budget)
+    return AIConfig(
+        ai_name=ai_name, ai_role=ai_role, ai_goals=ai_goals, api_budget=api_budget
+    )
diff --git a/autogpts/autogpt/autogpt/app/utils.py b/autogpts/autogpt/autogpt/app/utils.py
index 921ba7b5..032dffb2 100644
--- a/autogpts/autogpt/autogpt/app/utils.py
+++ b/autogpts/autogpt/autogpt/app/utils.py
@@ -14,7 +14,7 @@ logger = logging.getLogger(__name__)
 session = PromptSession(history=InMemoryHistory())
 
 
-def clean_input(config: Config, prompt: str = ""):
+async def clean_input(config: Config, prompt: str = ""):
     try:
         if config.chat_messages_enabled:
             for plugin in config.plugins:
@@ -50,7 +50,7 @@ def clean_input(config: Config, prompt: str = ""):
         # handle_sigint must be set to False, so the signal handler in the
         # autogpt/main.py could be employed properly. This referes to
         # https://github.com/Significant-Gravitas/Auto-GPT/pull/4799/files/3966cdfd694c2a80c0333823c3bc3da090f85ed3#r1264278776
-        answer = session.prompt(ANSI(prompt), handle_sigint=False)
+        answer = await session.prompt_async(ANSI(prompt), handle_sigint=False)
         return answer
     except KeyboardInterrupt:
         logger.info("You interrupted Auto-GPT")
diff --git a/autogpts/autogpt/autogpt/commands/execute_code.py b/autogpts/autogpt/autogpt/commands/execute_code.py
index 9ac2a344..2bbfcf32 100644
--- a/autogpts/autogpt/autogpt/commands/execute_code.py
+++ b/autogpts/autogpt/autogpt/commands/execute_code.py
@@ -163,7 +163,8 @@ def execute_python_file(
                 "python",
                 "-B",
                 file_path.relative_to(agent.workspace.root).as_posix(),
-            ] + args,
+            ]
+            + args,
             volumes={
                 str(agent.workspace.root): {
                     "bind": "/workspace",
diff --git a/autogpts/autogpt/autogpt/commands/web_selenium.py b/autogpts/autogpt/autogpt/commands/web_selenium.py
index c47ab360..0d072fac 100644
--- a/autogpts/autogpt/autogpt/commands/web_selenium.py
+++ b/autogpts/autogpt/autogpt/commands/web_selenium.py
@@ -39,7 +39,6 @@ if TYPE_CHECKING:
 
 from autogpt.agents.utils.exceptions import CommandExecutionError
 from autogpt.command_decorator import command
-from autogpt.llm.utils import count_string_tokens
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.processing.text import summarize_text
 from autogpt.url_utils.validators import validate_url
@@ -70,7 +69,7 @@ class BrowsingError(CommandExecutionError):
     },
 )
 @validate_url
-def read_webpage(url: str, agent: Agent, question: str = "") -> str:
+async def read_webpage(url: str, agent: Agent, question: str = "") -> str:
     """Browse a website and return the answer and links to the user
 
     Args:
@@ -91,8 +90,11 @@ def read_webpage(url: str, agent: Agent, question: str = "") -> str:
         summarized = False
         if not text:
             return f"Website did not contain any text.\n\nLinks: {links}"
-        elif count_string_tokens(text, agent.llm.name) > TOKENS_TO_TRIGGER_SUMMARY:
-            text = summarize_memorize_webpage(
+        elif (
+            agent.llm_provider.count_tokens(text, agent.llm.name)
+            > TOKENS_TO_TRIGGER_SUMMARY
+        ):
+            text = await summarize_memorize_webpage(
                 url, text, question or None, agent, driver
             )
             return_literal_content = bool(question)
@@ -247,7 +249,7 @@ def close_browser(driver: WebDriver) -> None:
     driver.quit()
 
 
-def summarize_memorize_webpage(
+async def summarize_memorize_webpage(
     url: str,
     text: str,
     question: str | None,
@@ -276,5 +278,10 @@ def summarize_memorize_webpage(
     # new_memory = MemoryItem.from_webpage(text, url, agent.config, question=question)
     # memory.add(new_memory)
 
-    summary, _ = summarize_text(text, question=question, config=agent.config)
+    summary, _ = await summarize_text(
+        text,
+        question=question,
+        llm_provider=agent.llm_provider,
+        config=agent.config,  # FIXME
+    )
     return summary
diff --git a/autogpts/autogpt/autogpt/config/ai_config.py b/autogpts/autogpt/autogpt/config/ai_config.py
index 392a0198..1cf16687 100644
--- a/autogpts/autogpt/autogpt/config/ai_config.py
+++ b/autogpts/autogpt/autogpt/config/ai_config.py
@@ -2,9 +2,9 @@
 from __future__ import annotations
 
 from pathlib import Path
-from pydantic import BaseModel, Field
 
 import yaml
+from pydantic import BaseModel, Field
 
 
 class AIConfig(BaseModel):
@@ -53,10 +53,7 @@ class AIConfig(BaseModel):
         api_budget = config_params.get("api_budget", 0.0)
 
         return AIConfig(
-            ai_name=ai_name,
-            ai_role=ai_role,
-            ai_goals=ai_goals,
-            api_budget=api_budget
+            ai_name=ai_name, ai_role=ai_role, ai_goals=ai_goals, api_budget=api_budget
         )
 
     def save(self, ai_settings_file: str | Path) -> None:
diff --git a/autogpts/autogpt/autogpt/config/config.py b/autogpts/autogpt/autogpt/config/config.py
index 3dc2fc00..3fb371da 100644
--- a/autogpts/autogpt/autogpt/config/config.py
+++ b/autogpts/autogpt/autogpt/config/config.py
@@ -13,7 +13,7 @@ from colorama import Fore
 from pydantic import Field, validator
 
 from autogpt.core.configuration.schema import Configurable, SystemSettings
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.core.resource.model_providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.plugins.plugins_config import PluginsConfig
 
 AI_SETTINGS_FILE = "ai_settings.yaml"
@@ -153,10 +153,11 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
     def validate_openai_functions(cls, v: bool, values: dict[str, Any]):
         if v:
             smart_llm = values["smart_llm"]
-            assert OPEN_AI_CHAT_MODELS[smart_llm].supports_functions, (
+            assert OPEN_AI_CHAT_MODELS[smart_llm].has_function_call_api, (
                 f"Model {smart_llm} does not support OpenAI Functions. "
                 "Please disable OPENAI_FUNCTIONS or choose a suitable model."
             )
+        return v
 
     def get_openai_credentials(self, model: str) -> dict[str, str]:
         credentials = {
diff --git a/autogpts/autogpt/autogpt/core/ability/simple.py b/autogpts/autogpt/autogpt/core/ability/simple.py
index 94d443e9..96241318 100644
--- a/autogpts/autogpt/autogpt/core/ability/simple.py
+++ b/autogpts/autogpt/autogpt/core/ability/simple.py
@@ -79,7 +79,9 @@ class SimpleAbilityRegistry(AbilityRegistry, Configurable):
         self._abilities.append(ability)
 
     def list_abilities(self) -> list[str]:
-        return [f"{ability.name()}: {ability.description}" for ability in self._abilities]
+        return [
+            f"{ability.name()}: {ability.description}" for ability in self._abilities
+        ]
 
     def dump_abilities(self) -> list[CompletionModelFunction]:
         return [ability.spec for ability in self._abilities]
diff --git a/autogpts/autogpt/autogpt/core/planning/prompt_strategies/next_ability.py b/autogpts/autogpt/autogpt/core/planning/prompt_strategies/next_ability.py
index 5fd0052d..725128c5 100644
--- a/autogpts/autogpt/autogpt/core/planning/prompt_strategies/next_ability.py
+++ b/autogpts/autogpt/autogpt/core/planning/prompt_strategies/next_ability.py
@@ -69,7 +69,7 @@ class NextAbility(PromptStrategy):
         system_info=DEFAULT_SYSTEM_INFO,
         user_prompt_template=DEFAULT_USER_PROMPT_TEMPLATE,
         additional_ability_arguments={
-            k: v.dump() for k, v in DEFAULT_ADDITIONAL_ABILITY_ARGUMENTS.items()
+            k: v.to_dict() for k, v in DEFAULT_ADDITIONAL_ABILITY_ARGUMENTS.items()
         },
     )
 
diff --git a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
index 816b573e..6c0edb57 100644
--- a/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
+++ b/autogpts/autogpt/autogpt/core/resource/model_providers/schema.py
@@ -106,7 +106,7 @@ class CompletionModelFunction(BaseModel):
             "parameters": {
                 "type": "object",
                 "properties": {
-                    name: param.dump() for name, param in self.parameters.items()
+                    name: param.to_dict() for name, param in self.parameters.items()
                 },
                 "required": [
                     name for name, param in self.parameters.items() if param.required
diff --git a/autogpts/autogpt/autogpt/core/utils/json_schema.py b/autogpts/autogpt/autogpt/core/utils/json_schema.py
index 702a6d7d..312f51cd 100644
--- a/autogpts/autogpt/autogpt/core/utils/json_schema.py
+++ b/autogpts/autogpt/autogpt/core/utils/json_schema.py
@@ -35,13 +35,13 @@ class JSONSchema(BaseModel):
         }
         if self.type == "array":
             if self.items:
-                schema["items"] = self.items.dump()
+                schema["items"] = self.items.to_dict()
             schema["minItems"] = self.minItems
             schema["maxItems"] = self.maxItems
         elif self.type == "object":
             if self.properties:
                 schema["properties"] = {
-                    name: prop.dump() for name, prop in self.properties.items()
+                    name: prop.to_dict() for name, prop in self.properties.items()
                 }
                 schema["required"] = [
                     name for name, prop in self.properties.items() if prop.required
@@ -98,7 +98,7 @@ class JSONSchema(BaseModel):
             tuple: A tuple where the first element is a boolean indicating whether the object is valid or not,
                 and the second element is a list of errors found in the object, or None if the object is valid.
         """
-        validator = Draft7Validator(self.dump())
+        validator = Draft7Validator(self.to_dict())
 
         if errors := sorted(validator.iter_errors(object), key=lambda e: e.path):
             for error in errors:
diff --git a/autogpts/autogpt/autogpt/json_utils/llm_response_format_1.json b/autogpts/autogpt/autogpt/json_utils/llm_response_format_1.json
deleted file mode 100644
index 17101dda..00000000
--- a/autogpts/autogpt/autogpt/json_utils/llm_response_format_1.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "properties": {
-        "thoughts": {
-            "type": "object",
-            "properties": {
-                "text": {
-                    "type": "string",
-                    "description": "thoughts"
-                },
-                "reasoning": {
-                    "type": "string"
-                },
-                "plan": {
-                    "type": "string",
-                    "description": "- short bulleted\n- list that conveys\n- long-term plan"
-                },
-                "criticism": {
-                    "type": "string",
-                    "description": "constructive self-criticism"
-                },
-                "speak": {
-                    "type": "string",
-                    "description": "thoughts summary to say to user"
-                }
-            },
-            "required": ["text", "reasoning", "plan", "criticism", "speak"],
-            "additionalProperties": false
-        },
-        "command": {
-            "type": "object",
-            "properties": {
-                "name": {"type": "string"},
-                "args": {
-                    "type": "object"
-                }
-            },
-            "required": ["name", "args"],
-            "additionalProperties": false
-        }
-    },
-    "required": ["thoughts", "command"],
-    "additionalProperties": false
-}
diff --git a/autogpts/autogpt/autogpt/json_utils/utilities.py b/autogpts/autogpt/autogpt/json_utils/utilities.py
index 7bacfc2f..876106ad 100644
--- a/autogpts/autogpt/autogpt/json_utils/utilities.py
+++ b/autogpts/autogpt/autogpt/json_utils/utilities.py
@@ -1,18 +1,10 @@
 """Utilities for the json_fixes package."""
 import ast
-import json
 import logging
-import os.path
-from typing import Any, Literal
-
-from jsonschema import Draft7Validator
-
-from autogpt.config import Config
+from typing import Any
 
 logger = logging.getLogger(__name__)
 
-LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
-
 
 def extract_dict_from_response(response_content: str) -> dict[str, Any]:
     # Sometimes the response includes the JSON in a code block with ```
@@ -28,50 +20,3 @@ def extract_dict_from_response(response_content: str) -> dict[str, Any]:
         logger.debug(f"Invalid JSON received in response: {response_content}")
         # TODO: How to raise an error here without causing the program to exit?
         return {}
-
-
-def llm_response_schema(
-    config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
-) -> dict[str, Any]:
-    filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
-    with open(filename, "r") as f:
-        try:
-            json_schema = json.load(f)
-        except Exception as e:
-            raise RuntimeError(f"Failed to load JSON schema: {e}")
-    if config.openai_functions:
-        del json_schema["properties"]["command"]
-        json_schema["required"].remove("command")
-    return json_schema
-
-
-def validate_dict(
-    object: object, config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
-) -> tuple[Literal[True], None] | tuple[Literal[False], list]:
-    """
-    :type schema_name: object
-    :param schema_name: str
-    :type json_object: object
-
-    Returns:
-        bool: Whether the json_object is valid or not
-        list: Errors found in the json_object, or None if the object is valid
-    """
-    schema = llm_response_schema(config, schema_name)
-    validator = Draft7Validator(schema)
-
-    if errors := sorted(validator.iter_errors(object), key=lambda e: e.path):
-        for error in errors:
-            logger.debug(f"JSON Validation Error: {error}")
-
-        if config.debug_mode:
-            logger.error(json.dumps(object, indent=4))
-            logger.error("The following issues were found:")
-
-            for error in errors:
-                logger.error(f"Error: {error.message}")
-        return False, errors
-
-    logger.debug("The JSON object is valid.")
-
-    return True, None
diff --git a/autogpts/autogpt/autogpt/llm/__init__.py b/autogpts/autogpt/autogpt/llm/__init__.py
deleted file mode 100644
index 976d5eff..00000000
--- a/autogpts/autogpt/autogpt/llm/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from autogpt.llm.base import (
-    ChatModelInfo,
-    ChatModelResponse,
-    ChatSequence,
-    EmbeddingModelInfo,
-    EmbeddingModelResponse,
-    LLMResponse,
-    Message,
-    ModelInfo,
-)
-
-__all__ = [
-    "Message",
-    "ChatSequence",
-    "ModelInfo",
-    "ChatModelInfo",
-    "EmbeddingModelInfo",
-    "LLMResponse",
-    "ChatModelResponse",
-    "EmbeddingModelResponse",
-]
diff --git a/autogpts/autogpt/autogpt/llm/api_manager.py b/autogpts/autogpt/autogpt/llm/api_manager.py
index 09d6a0db..14626362 100644
--- a/autogpts/autogpt/autogpt/llm/api_manager.py
+++ b/autogpts/autogpt/autogpt/llm/api_manager.py
@@ -6,7 +6,8 @@ from typing import List, Optional
 import openai
 from openai import Model
 
-from autogpt.llm.base import CompletionModelInfo
+from autogpt.core.resource.model_providers.openai import OPEN_AI_MODELS
+from autogpt.core.resource.model_providers.schema import ChatModelInfo
 from autogpt.singleton import Singleton
 
 logger = logging.getLogger(__name__)
@@ -37,15 +38,13 @@ class ApiManager(metaclass=Singleton):
         model (str): The model used for the API call.
         """
         # the .model property in API responses can contain version suffixes like -v2
-        from autogpt.llm.providers.openai import OPEN_AI_MODELS
-
         model = model[:-3] if model.endswith("-v2") else model
         model_info = OPEN_AI_MODELS[model]
 
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
         self.total_cost += prompt_tokens * model_info.prompt_token_cost / 1000
-        if isinstance(model_info, CompletionModelInfo):
+        if isinstance(model_info, ChatModelInfo):
             self.total_cost += (
                 completion_tokens * model_info.completion_token_cost / 1000
             )
diff --git a/autogpts/autogpt/autogpt/llm/base.py b/autogpts/autogpt/autogpt/llm/base.py
deleted file mode 100644
index 99afac8f..00000000
--- a/autogpts/autogpt/autogpt/llm/base.py
+++ /dev/null
@@ -1,219 +0,0 @@
-from __future__ import annotations
-import json
-
-from math import ceil, floor
-from pydantic import BaseModel, Field
-from typing import Any, Literal, Optional, Type, TypedDict, TypeVar, overload
-
-MessageRole = Literal["system", "user", "assistant", "function"]
-MessageType = Literal["ai_response", "action_result"]
-
-TText = list[int]
-"""Token array representing tokenized text"""
-
-
-class MessageDict(TypedDict):
-    role: MessageRole
-    content: str
-
-
-class ResponseMessageDict(TypedDict):
-    role: Literal["assistant"]
-    content: Optional[str]
-    function_call: Optional[FunctionCallDict]
-
-
-class FunctionCallDict(TypedDict):
-    name: str
-    arguments: str
-
-
-class Message(BaseModel):
-    """OpenAI Message object containing a role and the message content"""
-
-    role: MessageRole
-    content: str
-    type: Optional[MessageType]
-
-    def __init__(
-        self,
-        role: MessageRole,
-        content: str,
-        type: Optional[MessageType] = None
-    ):
-        super().__init__(
-            role=role,
-            content=content,
-            type=type,
-        )
-
-    def raw(self) -> MessageDict:
-        return {"role": self.role, "content": self.content}
-
-
-class ModelInfo(BaseModel):
-    """Struct for model information.
-
-    Would be lovely to eventually get this directly from APIs, but needs to be scraped from
-    websites for now.
-    """
-
-    name: str
-    max_tokens: int
-    prompt_token_cost: float
-
-
-class CompletionModelInfo(ModelInfo):
-    """Struct for generic completion model information."""
-
-    completion_token_cost: float
-
-
-class ChatModelInfo(CompletionModelInfo):
-    """Struct for chat model information."""
-
-    supports_functions: bool = False
-
-
-class TextModelInfo(CompletionModelInfo):
-    """Struct for text completion model information."""
-
-
-class EmbeddingModelInfo(ModelInfo):
-    """Struct for embedding model information."""
-
-    embedding_dimensions: int
-
-
-# Can be replaced by Self in Python 3.11
-TChatSequence = TypeVar("TChatSequence", bound="ChatSequence")
-
-
-class ChatSequence(BaseModel):
-    """Utility container for a chat sequence"""
-
-    model: ChatModelInfo
-    messages: list[Message] = Field(default_factory=list[Message])
-
-    @overload
-    def __getitem__(self, key: int) -> Message:
-        ...
-
-    @overload
-    def __getitem__(self: TChatSequence, key: slice) -> TChatSequence:
-        ...
-
-    def __getitem__(self: TChatSequence, key: int | slice) -> Message | TChatSequence:
-        if isinstance(key, slice):
-            copy = self.copy(deep=True)
-            copy.messages = self.messages[key]
-            return copy
-        return self.messages[key]
-
-    def __iter__(self):
-        return iter(self.messages)
-
-    def __len__(self):
-        return len(self.messages)
-
-    def add(
-        self,
-        message_role: MessageRole,
-        content: str,
-        type: MessageType | None = None,
-    ) -> None:
-        self.append(Message(message_role, content, type))
-
-    def append(self, message: Message):
-        return self.messages.append(message)
-
-    def extend(self, messages: list[Message] | ChatSequence):
-        return self.messages.extend(messages)
-
-    def insert(self, index: int, *messages: Message):
-        for message in reversed(messages):
-            self.messages.insert(index, message)
-
-    @classmethod
-    def for_model(
-        cls: Type[TChatSequence],
-        model_name: str,
-        messages: list[Message] | ChatSequence = [],
-        **kwargs,
-    ) -> TChatSequence:
-        from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-
-        if model_name not in OPEN_AI_CHAT_MODELS:
-            raise ValueError(f"Unknown chat model '{model_name}'")
-
-        return cls(
-            model=OPEN_AI_CHAT_MODELS[model_name], messages=list(messages), **kwargs
-        )
-
-    @property
-    def token_length(self) -> int:
-        from autogpt.llm.utils import count_message_tokens
-
-        return count_message_tokens(self.messages, self.model.name)
-
-    def raw(self) -> list[MessageDict]:
-        return [m.raw() for m in self.messages]
-
-    def dump(self) -> str:
-        SEPARATOR_LENGTH = 42
-
-        def separator(text: str):
-            half_sep_len = (SEPARATOR_LENGTH - 2 - len(text)) / 2
-            return f"{floor(half_sep_len)*'-'} {text.upper()} {ceil(half_sep_len)*'-'}"
-
-        formatted_messages = "\n".join(
-            [f"{separator(m.role)}\n{m.content}" for m in self.messages]
-        )
-        return f"""
-============== {__class__.__name__} ==============
-Length: {self.token_length} tokens; {len(self.messages)} messages
-{formatted_messages}
-==========================================
-"""
-
-
-class LLMResponse(BaseModel):
-    """Standard response struct for a response from an LLM model."""
-
-    model_info: ModelInfo
-
-
-class EmbeddingModelResponse(LLMResponse):
-    """Standard response struct for a response from an embedding model."""
-
-    embedding: list[float] = Field(default_factory=list)
-
-
-class ChatModelResponse(LLMResponse):
-    """Standard response struct for a response from a chat LLM."""
-
-    content: Optional[str]
-    function_call: Optional[LLMFunctionCall]
-
-
-class LLMFunctionCall(BaseModel):
-    """Represents a function call as generated by an OpenAI model
-
-    Attributes:
-        name: the name of the function that the LLM wants to call
-        arguments: a stringified JSON object (unverified) containing `arg: value` pairs
-    """
-
-    name: str
-    arguments: dict[str, Any] = {}
-
-    @staticmethod
-    def parse(raw: FunctionCallDict):
-        return LLMFunctionCall(
-            name=raw["name"],
-            arguments=json.loads(raw["arguments"]),
-        )
-
-
-# Complete model initialization; necessary because of order of definition
-ChatModelResponse.update_forward_refs()
diff --git a/autogpts/autogpt/autogpt/llm/providers/openai.py b/autogpts/autogpt/autogpt/llm/providers/openai.py
index b018604d..1829bb8e 100644
--- a/autogpts/autogpt/autogpt/llm/providers/openai.py
+++ b/autogpts/autogpt/autogpt/llm/providers/openai.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
+import enum
 import functools
 import logging
 import time
-from dataclasses import dataclass
-from typing import Callable, List, Optional, TypeVar
+from typing import Callable, TypeVar
 from unittest.mock import patch
 
 import openai
@@ -13,110 +13,13 @@ from colorama import Fore, Style
 from openai.error import APIError, RateLimitError, ServiceUnavailableError, Timeout
 from openai.openai_object import OpenAIObject
 
-from autogpt.llm.base import (
-    ChatModelInfo,
-    EmbeddingModelInfo,
-    MessageDict,
-    TextModelInfo,
-    TText,
-)
+from autogpt.core.resource.model_providers import CompletionModelFunction
+from autogpt.core.utils.json_schema import JSONSchema
 from autogpt.logs.helpers import request_user_double_check
 from autogpt.models.command_registry import CommandRegistry
 
 logger = logging.getLogger(__name__)
 
-OPEN_AI_CHAT_MODELS = {
-    info.name: info
-    for info in [
-        ChatModelInfo(
-            name="gpt-3.5-turbo-0301",
-            prompt_token_cost=0.0015,
-            completion_token_cost=0.002,
-            max_tokens=4096,
-        ),
-        ChatModelInfo(
-            name="gpt-3.5-turbo-0613",
-            prompt_token_cost=0.0015,
-            completion_token_cost=0.002,
-            max_tokens=4096,
-            supports_functions=True,
-        ),
-        ChatModelInfo(
-            name="gpt-3.5-turbo-16k-0613",
-            prompt_token_cost=0.003,
-            completion_token_cost=0.004,
-            max_tokens=16384,
-            supports_functions=True,
-        ),
-        ChatModelInfo(
-            name="gpt-4-0314",
-            prompt_token_cost=0.03,
-            completion_token_cost=0.06,
-            max_tokens=8192,
-        ),
-        ChatModelInfo(
-            name="gpt-4-0613",
-            prompt_token_cost=0.03,
-            completion_token_cost=0.06,
-            max_tokens=8191,
-            supports_functions=True,
-        ),
-        ChatModelInfo(
-            name="gpt-4-32k-0314",
-            prompt_token_cost=0.06,
-            completion_token_cost=0.12,
-            max_tokens=32768,
-        ),
-        ChatModelInfo(
-            name="gpt-4-32k-0613",
-            prompt_token_cost=0.06,
-            completion_token_cost=0.12,
-            max_tokens=32768,
-            supports_functions=True,
-        ),
-    ]
-}
-# Set aliases for rolling model IDs
-chat_model_mapping = {
-    "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
-    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
-    "gpt-4": "gpt-4-0613",
-    "gpt-4-32k": "gpt-4-32k-0613",
-}
-for alias, target in chat_model_mapping.items():
-    alias_info = ChatModelInfo(**OPEN_AI_CHAT_MODELS[target].__dict__)
-    alias_info.name = alias
-    OPEN_AI_CHAT_MODELS[alias] = alias_info
-
-OPEN_AI_TEXT_MODELS = {
-    info.name: info
-    for info in [
-        TextModelInfo(
-            name="text-davinci-003",
-            prompt_token_cost=0.02,
-            completion_token_cost=0.02,
-            max_tokens=4097,
-        ),
-    ]
-}
-
-OPEN_AI_EMBEDDING_MODELS = {
-    info.name: info
-    for info in [
-        EmbeddingModelInfo(
-            name="text-embedding-ada-002",
-            prompt_token_cost=0.0001,
-            max_tokens=8191,
-            embedding_dimensions=1536,
-        ),
-    ]
-}
-
-OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] = {
-    **OPEN_AI_CHAT_MODELS,
-    **OPEN_AI_TEXT_MODELS,
-    **OPEN_AI_EMBEDDING_MODELS,
-}
 
 T = TypeVar("T", bound=Callable)
 
@@ -222,155 +125,56 @@ def retry_api(
     return _wrapper
 
 
-@meter_api
-@retry_api()
-def create_chat_completion(
-    messages: List[MessageDict],
-    *_,
-    **kwargs,
-) -> OpenAIObject:
-    """Create a chat completion using the OpenAI API
-
-    Args:
-        messages: A list of messages to feed to the chatbot.
-        kwargs: Other arguments to pass to the OpenAI API chat completion call.
-    Returns:
-        OpenAIObject: The ChatCompletion response from OpenAI
+def format_openai_function_for_prompt(func: CompletionModelFunction) -> str:
+    """Returns the function formatted similarly to the way OpenAI does it internally:
+    https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
 
+    Example:
+    ```ts
+    // Get the current weather in a given location
+    type get_current_weather = (_: {
+    // The city and state, e.g. San Francisco, CA
+    location: string,
+    unit?: "celsius" | "fahrenheit",
+    }) => any;
+    ```
     """
-    completion: OpenAIObject = openai.ChatCompletion.create(
-        messages=messages,
-        **kwargs,
-    )
-    return completion
 
-
-@meter_api
-@retry_api()
-def create_text_completion(
-    prompt: str,
-    *_,
-    **kwargs,
-) -> OpenAIObject:
-    """Create a text completion using the OpenAI API
-
-    Args:
-        prompt: A text prompt to feed to the LLM
-        kwargs: Other arguments to pass to the OpenAI API text completion call.
-    Returns:
-        OpenAIObject: The Completion response from OpenAI
-
-    """
-    return openai.Completion.create(
-        prompt=prompt,
-        **kwargs,
-    )
-
-
-@meter_api
-@retry_api()
-def create_embedding(
-    input: str | TText | List[str] | List[TText],
-    *_,
-    **kwargs,
-) -> OpenAIObject:
-    """Create an embedding using the OpenAI API
-
-    Args:
-        input: The text to embed.
-        kwargs: Other arguments to pass to the OpenAI API embedding call.
-    Returns:
-        OpenAIObject: The Embedding response from OpenAI
-
-    """
-    return openai.Embedding.create(
-        input=input,
-        **kwargs,
-    )
-
-
-@dataclass
-class OpenAIFunctionSpec:
-    """Represents a "function" in OpenAI, which is mapped to a Command in Auto-GPT"""
-
-    name: str
-    description: str
-    parameters: dict[str, ParameterSpec]
-
-    @dataclass
-    class ParameterSpec:
-        name: str
-        type: str  # TODO: add enum support
-        description: Optional[str]
-        required: bool = False
-
-    @property
-    def schema(self) -> dict[str, str | dict | list]:
-        """Returns an OpenAI-consumable function specification"""
-        return {
-            "name": self.name,
-            "description": self.description,
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    param.name: {
-                        "type": param.type,
-                        "description": param.description,
-                    }
-                    for param in self.parameters.values()
-                },
-                "required": [
-                    param.name for param in self.parameters.values() if param.required
-                ],
-            },
-        }
-
-    @property
-    def prompt_format(self) -> str:
-        """Returns the function formatted similarly to the way OpenAI does it internally:
-        https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
-
-        Example:
-        ```ts
-        // Get the current weather in a given location
-        type get_current_weather = (_: {
-        // The city and state, e.g. San Francisco, CA
-        location: string,
-        unit?: "celsius" | "fahrenheit",
-        }) => any;
-        ```
-        """
-
-        def param_signature(p_spec: OpenAIFunctionSpec.ParameterSpec) -> str:
-            # TODO: enum type support
-            return (
-                f"// {p_spec.description}\n" if p_spec.description else ""
-            ) + f"{p_spec.name}{'' if p_spec.required else '?'}: {p_spec.type},"
-
-        return "\n".join(
-            [
-                f"// {self.description}",
-                f"type {self.name} = (_ :{{",
-                *[param_signature(p) for p in self.parameters.values()],
-                "}) => any;",
-            ]
+    def param_signature(name: str, spec: JSONSchema) -> str:
+        # TODO: enum type support
+        type_dec = (
+            spec.type if not spec.enum else " | ".join(repr(e) for e in spec.enum)
         )
+        return (
+            f"// {spec.description}\n" if spec.description else ""
+        ) + f"{name}{'' if spec.required else '?'}: {type_dec},"
+
+    return "\n".join(
+        [
+            f"// {func.description}",
+            f"type {func.name} = (_ :{{",
+            *[param_signature(name, p) for name, p in func.parameters.items()],
+            "}) => any;",
+        ]
+    )
 
 
 def get_openai_command_specs(
     command_registry: CommandRegistry,
-) -> list[OpenAIFunctionSpec]:
+) -> list[CompletionModelFunction]:
     """Get OpenAI-consumable function specs for the agent's available commands.
     see https://platform.openai.com/docs/guides/gpt/function-calling
     """
     return [
-        OpenAIFunctionSpec(
+        CompletionModelFunction(
             name=command.name,
             description=command.description,
             parameters={
-                param.name: OpenAIFunctionSpec.ParameterSpec(
-                    name=param.name,
-                    type=param.type,
+                param.name: JSONSchema(
+                    type=param.type if type(param.type) == JSONSchema.Type else None,
+                    enum=[v.value for v in type(param.type)]
+                    if type(param.type) == enum.Enum
+                    else None,
                     required=param.required,
                     description=param.description,
                 )
@@ -382,13 +186,15 @@ def get_openai_command_specs(
 
 
 def count_openai_functions_tokens(
-    functions: list[OpenAIFunctionSpec], for_model: str
+    functions: list[CompletionModelFunction], for_model: str
 ) -> int:
     """Returns the number of tokens taken up by a set of function definitions
 
     Reference: https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
     """
-    from autogpt.llm.utils import count_string_tokens
+    from autogpt.llm.utils import (
+        count_string_tokens,  # FIXME: maybe move to OpenAIProvider?
+    )
 
     return count_string_tokens(
         f"# Tools\n\n## functions\n\n{format_function_specs_as_typescript_ns(functions)}",
@@ -396,7 +202,9 @@ def count_openai_functions_tokens(
     )
 
 
-def format_function_specs_as_typescript_ns(functions: list[OpenAIFunctionSpec]) -> str:
+def format_function_specs_as_typescript_ns(
+    functions: list[CompletionModelFunction],
+) -> str:
     """Returns a function signature block in the format used by OpenAI internally:
     https://community.openai.com/t/how-to-calculate-the-tokens-when-using-function-call/266573/18
 
@@ -419,6 +227,6 @@ def format_function_specs_as_typescript_ns(functions: list[OpenAIFunctionSpec])
 
     return (
         "namespace functions {\n\n"
-        + "\n\n".join(f.prompt_format for f in functions)
+        + "\n\n".join(format_openai_function_for_prompt(f) for f in functions)
         + "\n\n} // namespace functions"
     )
diff --git a/autogpts/autogpt/autogpt/llm/utils/__init__.py b/autogpts/autogpt/autogpt/llm/utils/__init__.py
deleted file mode 100644
index f51666c2..00000000
--- a/autogpts/autogpt/autogpt/llm/utils/__init__.py
+++ /dev/null
@@ -1,189 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Optional
-
-from colorama import Fore
-
-from autogpt.config import Config
-
-from ..base import (
-    ChatModelResponse,
-    ChatSequence,
-    FunctionCallDict,
-    LLMFunctionCall,
-    Message,
-    ResponseMessageDict,
-)
-from ..providers import openai as iopenai
-from ..providers.openai import (
-    OPEN_AI_CHAT_MODELS,
-    OpenAIFunctionSpec,
-    count_openai_functions_tokens,
-)
-
-from .token_counter import count_message_tokens, count_string_tokens
-
-logger = logging.getLogger(__name__)
-
-
-def call_ai_function(
-    function: str,
-    args: list,
-    description: str,
-    config: Config,
-    model: Optional[str] = None,
-) -> str:
-    """Call an AI function
-
-    This is a magic function that can do anything with no-code. See
-    https://github.com/Torantulino/AI-Functions for more info.
-
-    Args:
-        function (str): The function to call
-        args (list): The arguments to pass to the function
-        description (str): The description of the function
-        model (str, optional): The model to use. Defaults to None.
-
-    Returns:
-        str: The response from the function
-    """
-    if model is None:
-        model = config.smart_llm
-    # For each arg, if any are None, convert to "None":
-    args = [str(arg) if arg is not None else "None" for arg in args]
-    # parse args to comma separated string
-    arg_str: str = ", ".join(args)
-
-    prompt = ChatSequence.for_model(
-        model,
-        [
-            Message(
-                "system",
-                f"You are now the following python function: ```# {description}"
-                f"\n{function}```\n\nOnly respond with your `return` value.",
-            ),
-            Message("user", arg_str),
-        ],
-    )
-    return create_chat_completion(prompt=prompt, temperature=0, config=config).content
-
-
-def create_text_completion(
-    prompt: str,
-    config: Config,
-    model: Optional[str],
-    temperature: Optional[float],
-    max_output_tokens: Optional[int],
-) -> str:
-    if model is None:
-        model = config.fast_llm
-    if temperature is None:
-        temperature = config.temperature
-
-    kwargs = {"model": model}
-    kwargs.update(config.get_openai_credentials(model))
-
-    response = iopenai.create_text_completion(
-        prompt=prompt,
-        **kwargs,
-        temperature=temperature,
-        max_tokens=max_output_tokens,
-    )
-    logger.debug(f"Response: {response}")
-
-    return response.choices[0].text
-
-
-# Overly simple abstraction until we create something better
-def create_chat_completion(
-    prompt: ChatSequence,
-    config: Config,
-    functions: Optional[list[OpenAIFunctionSpec]] = None,
-    model: Optional[str] = None,
-    temperature: Optional[float] = None,
-    max_tokens: Optional[int] = None,
-) -> ChatModelResponse:
-    """Create a chat completion using the OpenAI API
-
-    Args:
-        messages (List[Message]): The messages to send to the chat completion
-        model (str, optional): The model to use. Defaults to None.
-        temperature (float, optional): The temperature to use. Defaults to 0.9.
-        max_tokens (int, optional): The max tokens to use. Defaults to None.
-
-    Returns:
-        str: The response from the chat completion
-    """
-
-    if model is None:
-        model = prompt.model.name
-    if temperature is None:
-        temperature = config.temperature
-    if max_tokens is None:
-        prompt_tlength = prompt.token_length
-        max_tokens = (
-            OPEN_AI_CHAT_MODELS[model].max_tokens - prompt_tlength - 1
-        )  # the -1 is just here because we have a bug and we don't know how to fix it. When using gpt-4-0314 we get a token error.
-        logger.debug(f"Prompt length: {prompt_tlength} tokens")
-        if functions:
-            functions_tlength = count_openai_functions_tokens(functions, model)
-            max_tokens -= functions_tlength
-            logger.debug(f"Functions take up {functions_tlength} tokens in API call")
-
-    logger.debug(
-        f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
-    )
-    chat_completion_kwargs = {
-        "model": model,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-    }
-
-    for plugin in config.plugins:
-        if plugin.can_handle_chat_completion(
-            messages=prompt.raw(),
-            **chat_completion_kwargs,
-        ):
-            message = plugin.handle_chat_completion(
-                messages=prompt.raw(),
-                **chat_completion_kwargs,
-            )
-            if message is not None:
-                return message
-
-    chat_completion_kwargs.update(config.get_openai_credentials(model))
-
-    if functions:
-        chat_completion_kwargs["functions"] = [
-            function.schema for function in functions
-        ]
-
-    # Print full prompt to debug log
-    logger.debug(prompt.dump())
-
-    response = iopenai.create_chat_completion(
-        messages=prompt.raw(),
-        **chat_completion_kwargs,
-    )
-    logger.debug(f"Response: {response}")
-
-    if hasattr(response, "error"):
-        logger.error(response.error)
-        raise RuntimeError(response.error)
-
-    first_message: ResponseMessageDict = response.choices[0].message
-    content: str | None = first_message.get("content")
-    function_call: FunctionCallDict | None = first_message.get("function_call")
-
-    for plugin in config.plugins:
-        if not plugin.can_handle_on_response():
-            continue
-        # TODO: function call support in plugin.on_response()
-        content = plugin.on_response(content)
-
-    return ChatModelResponse(
-        model_info=OPEN_AI_CHAT_MODELS[model],
-        content=content,
-        function_call=LLMFunctionCall.parse(function_call) if function_call else None,
-    )
diff --git a/autogpts/autogpt/autogpt/llm/utils/token_counter.py b/autogpts/autogpt/autogpt/llm/utils/token_counter.py
deleted file mode 100644
index 8fe10083..00000000
--- a/autogpts/autogpt/autogpt/llm/utils/token_counter.py
+++ /dev/null
@@ -1,87 +0,0 @@
-"""Functions for counting the number of tokens in a message or string."""
-from __future__ import annotations
-
-import logging
-from typing import List, overload
-
-import tiktoken
-
-from autogpt.llm.base import Message
-
-logger = logging.getLogger(__name__)
-
-
-@overload
-def count_message_tokens(messages: Message, model: str = "gpt-3.5-turbo") -> int:
-    ...
-
-
-@overload
-def count_message_tokens(messages: List[Message], model: str = "gpt-3.5-turbo") -> int:
-    ...
-
-
-def count_message_tokens(
-    messages: Message | List[Message], model: str = "gpt-3.5-turbo"
-) -> int:
-    """
-    Returns the number of tokens used by a list of messages.
-
-    Args:
-        messages (list): A list of messages, each of which is a dictionary
-            containing the role and content of the message.
-        model (str): The name of the model to use for tokenization.
-            Defaults to "gpt-3.5-turbo-0301".
-
-    Returns:
-        int: The number of tokens used by the list of messages.
-    """
-    if isinstance(messages, Message):
-        messages = [messages]
-
-    if model.startswith("gpt-3.5-turbo"):
-        tokens_per_message = (
-            4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        )
-        tokens_per_name = -1  # if there's a name, the role is omitted
-        encoding_model = "gpt-3.5-turbo"
-    elif model.startswith("gpt-4"):
-        tokens_per_message = 3
-        tokens_per_name = 1
-        encoding_model = "gpt-4"
-    else:
-        raise NotImplementedError(
-            f"count_message_tokens() is not implemented for model {model}.\n"
-            " See https://github.com/openai/openai-python/blob/main/chatml.md for"
-            " information on how messages are converted to tokens."
-        )
-    try:
-        encoding = tiktoken.encoding_for_model(encoding_model)
-    except KeyError:
-        logger.warn("Warning: model not found. Using cl100k_base encoding.")
-        encoding = tiktoken.get_encoding("cl100k_base")
-
-    num_tokens = 0
-    for message in messages:
-        num_tokens += tokens_per_message
-        for key, value in message.raw().items():
-            num_tokens += len(encoding.encode(value))
-            if key == "name":
-                num_tokens += tokens_per_name
-    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
-    return num_tokens
-
-
-def count_string_tokens(string: str, model_name: str) -> int:
-    """
-    Returns the number of tokens in a text string.
-
-    Args:
-        string (str): The text string.
-        model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")
-
-    Returns:
-        int: The number of tokens in the text string.
-    """
-    encoding = tiktoken.encoding_for_model(model_name)
-    return len(encoding.encode(string))
diff --git a/autogpts/autogpt/autogpt/logs/__init__.py b/autogpts/autogpt/autogpt/logs/__init__.py
index 9c46d1e5..abcdac71 100644
--- a/autogpts/autogpt/autogpt/logs/__init__.py
+++ b/autogpts/autogpt/autogpt/logs/__init__.py
@@ -1,7 +1,6 @@
 from .helpers import user_friendly_output
 from .log_cycle import (
     CURRENT_CONTEXT_FILE_NAME,
-    FULL_MESSAGE_HISTORY_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
     PROMPT_SUMMARY_FILE_NAME,
     PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
diff --git a/autogpts/autogpt/autogpt/logs/formatters.py b/autogpts/autogpt/autogpt/logs/formatters.py
index 2ea94dac..6166b571 100644
--- a/autogpts/autogpt/autogpt/logs/formatters.py
+++ b/autogpts/autogpt/autogpt/logs/formatters.py
@@ -21,9 +21,8 @@ class AutoGptFormatter(FancyConsoleFormatter):
 
         # Determine color for title
         title = getattr(record, "title", "")
-        title_color = (
-            getattr(record, "title_color", "")
-            or self.LEVEL_COLOR_MAP.get(record.levelno, "")
+        title_color = getattr(record, "title_color", "") or self.LEVEL_COLOR_MAP.get(
+            record.levelno, ""
         )
         if title and title_color:
             title = f"{title_color + Style.BRIGHT}{title}{Style.RESET_ALL}"
diff --git a/autogpts/autogpt/autogpt/memory/message_history.py b/autogpts/autogpt/autogpt/memory/message_history.py
deleted file mode 100644
index 43395fb2..00000000
--- a/autogpts/autogpt/autogpt/memory/message_history.py
+++ /dev/null
@@ -1,250 +0,0 @@
-from __future__ import annotations
-
-import copy
-import json
-import logging
-from typing import TYPE_CHECKING, Iterator, Optional
-
-if TYPE_CHECKING:
-    from autogpt.agents import Agent, BaseAgent
-    from autogpt.config import Config
-
-from autogpt.json_utils.utilities import extract_dict_from_response
-from autogpt.llm.base import ChatSequence, Message
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-from autogpt.llm.utils import (
-    count_message_tokens,
-    count_string_tokens,
-    create_chat_completion,
-)
-from autogpt.logs import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME, LogCycleHandler
-
-logger = logging.getLogger(__name__)
-
-
-class MessageHistory(ChatSequence):
-    max_summary_tlength: int = 500
-    agent: Optional[BaseAgent | Agent] = None
-    summary: str = "I was created"
-    last_trimmed_index: int = 0
-
-    SUMMARIZATION_PROMPT = '''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
-
-You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
-
-Summary So Far:
-"""
-{summary}
-"""
-
-Latest Development:
-"""
-{new_events}
-"""
-'''
-
-    def trim_messages(
-        self, current_message_chain: list[Message], config: Config
-    ) -> tuple[Message, list[Message]]:
-        """
-        Returns a list of trimmed messages: messages which are in the message history
-        but not in current_message_chain.
-
-        Args:
-            current_message_chain (list[Message]): The messages currently in the context.
-            config (Config): The config to use.
-
-        Returns:
-            Message: A message with the new running summary after adding the trimmed messages.
-            list[Message]: A list of messages that are in full_message_history with an index higher than last_trimmed_index and absent from current_message_chain.
-        """
-        # Select messages in full_message_history with an index higher than last_trimmed_index
-        new_messages = [
-            msg for i, msg in enumerate(self) if i > self.last_trimmed_index
-        ]
-
-        # Remove messages that are already present in current_message_chain
-        new_messages_not_in_chain = [
-            msg for msg in new_messages if msg not in current_message_chain
-        ]
-
-        if not new_messages_not_in_chain:
-            return self.summary_message(), []
-
-        new_summary_message = self.update_running_summary(
-            new_events=new_messages_not_in_chain, config=config
-        )
-
-        # Find the index of the last message processed
-        last_message = new_messages_not_in_chain[-1]
-        self.last_trimmed_index = self.messages.index(last_message)
-
-        return new_summary_message, new_messages_not_in_chain
-
-    def per_cycle(
-        self, messages: Optional[list[Message]] = None
-    ) -> Iterator[tuple[Message | None, Message, Message]]:
-        """
-        Yields:
-            Message: a message containing user input
-            Message: a message from the AI containing a proposed action
-            Message: the message containing the result of the AI's proposed action
-        """
-        messages = messages or self.messages
-        for i in range(0, len(messages) - 1):
-            ai_message = messages[i]
-            if ai_message.type != "ai_response":
-                continue
-            user_message = (
-                messages[i - 1] if i > 0 and messages[i - 1].role == "user" else None
-            )
-            result_message = messages[i + 1]
-            try:
-                assert (
-                    extract_dict_from_response(ai_message.content) != {}
-                ), "AI response is not a valid JSON object"
-                assert result_message.type == "action_result"
-
-                yield user_message, ai_message, result_message
-            except AssertionError as err:
-                logger.debug(
-                    f"Invalid item in message history: {err}; Messages: {messages[i-1:i+2]}"
-                )
-
-    def summary_message(self) -> Message:
-        return Message(
-            "system",
-            f"This reminds you of these events from your past: \n{self.summary}",
-        )
-
-    def update_running_summary(
-        self,
-        new_events: list[Message],
-        config: Config,
-        max_summary_length: Optional[int] = None,
-    ) -> Message:
-        """
-        This function takes a list of Message objects and updates the running summary
-        to include the events they describe. The updated summary is returned
-        in a Message formatted in the 1st person past tense.
-
-        Args:
-            new_events: A list of Messages containing the latest events to be added to the summary.
-
-        Returns:
-            Message: a Message containing the updated running summary.
-
-        Example:
-            ```py
-            new_events = [{"event": "entered the kitchen."}, {"event": "found a scrawled note with the number 7"}]
-            update_running_summary(new_events)
-            # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
-            ```
-        """
-        if not new_events:
-            return self.summary_message()
-        if not max_summary_length:
-            max_summary_length = self.max_summary_tlength
-
-        # Create a copy of the new_events list to prevent modifying the original list
-        new_events = copy.deepcopy(new_events)
-
-        # Replace "assistant" with "you". This produces much better first person past tense results.
-        for event in new_events:
-            if event.role.lower() == "assistant":
-                event.role = "you"
-
-                # Remove "thoughts" dictionary from "content"
-                try:
-                    content_dict = extract_dict_from_response(event.content)
-                    if "thoughts" in content_dict:
-                        del content_dict["thoughts"]
-                    event.content = json.dumps(content_dict)
-                except json.JSONDecodeError as e:
-                    logger.error(f"Error: Invalid JSON: {e}")
-                    if config.debug_mode:
-                        logger.error(f"{event.content}")
-
-            elif event.role.lower() == "system":
-                event.role = "your computer"
-
-            # Delete all user messages
-            elif event.role == "user":
-                new_events.remove(event)
-
-        summ_model = OPEN_AI_CHAT_MODELS[config.fast_llm]
-
-        # Determine token lengths for use in batching
-        prompt_template_length = len(
-            MessageHistory.SUMMARIZATION_PROMPT.format(summary="", new_events="")
-        )
-        max_input_tokens = summ_model.max_tokens - max_summary_length
-        summary_tlength = count_string_tokens(self.summary, summ_model.name)
-        batch: list[Message] = []
-        batch_tlength = 0
-
-        # TODO: Put a cap on length of total new events and drop some previous events to
-        # save API cost. Need to think thru more how to do it without losing the context.
-        for event in new_events:
-            event_tlength = count_message_tokens(event, summ_model.name)
-
-            if (
-                batch_tlength + event_tlength
-                > max_input_tokens - prompt_template_length - summary_tlength
-            ):
-                # The batch is full. Summarize it and start a new one.
-                self._update_summary_with_batch(batch, config, max_summary_length)
-                summary_tlength = count_string_tokens(self.summary, summ_model.name)
-                batch = [event]
-                batch_tlength = event_tlength
-            else:
-                batch.append(event)
-                batch_tlength += event_tlength
-
-        if batch:
-            # There's an unprocessed batch. Summarize it.
-            self._update_summary_with_batch(batch, config, max_summary_length)
-
-        return self.summary_message()
-
-    def _update_summary_with_batch(
-        self, new_events_batch: list[Message], config: Config, max_output_length: int
-    ) -> None:
-        prompt = MessageHistory.SUMMARIZATION_PROMPT.format(
-            summary=self.summary, new_events=new_events_batch
-        )
-
-        prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
-        if (
-            self.agent is not None
-            and hasattr(self.agent, "created_at")
-            and isinstance(
-                getattr(self.agent, "log_cycle_handler", None), LogCycleHandler
-            )
-        ):
-            self.agent.log_cycle_handler.log_cycle(
-                self.agent.ai_config.ai_name,
-                self.agent.created_at,
-                self.agent.cycle_count,
-                prompt.raw(),
-                PROMPT_SUMMARY_FILE_NAME,
-            )
-
-        self.summary = create_chat_completion(
-            prompt, config, max_tokens=max_output_length
-        ).content
-
-        if (
-            self.agent is not None
-            and hasattr(self.agent, "created_at")
-            and isinstance(
-                getattr(self.agent, "log_cycle_handler", None), LogCycleHandler
-            )
-        ):
-            self.agent.log_cycle_handler.log_cycle(
-                self.agent.ai_config.ai_name,
-                self.agent.created_at,
-                self.agent.cycle_count,
-                self.summary,
-                SUMMARY_FILE_NAME,
-            )
diff --git a/autogpts/autogpt/autogpt/memory/vector/memory_item.py b/autogpts/autogpt/autogpt/memory/vector/memory_item.py
index df5b6aef..7ca8c788 100644
--- a/autogpts/autogpt/autogpt/memory/vector/memory_item.py
+++ b/autogpts/autogpt/autogpt/memory/vector/memory_item.py
@@ -9,8 +9,7 @@ import numpy as np
 from pydantic import BaseModel
 
 from autogpt.config import Config
-from autogpt.llm import Message
-from autogpt.llm.utils import count_string_tokens
+from autogpt.core.resource.model_providers import ChatMessage
 from autogpt.processing.text import chunk_content, split_text, summarize_text
 
 from .utils import Embedding, get_embedding
@@ -49,6 +48,7 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
         # Fix encoding, e.g. removing unicode surrogates (see issue #778)
         text = ftfy.fix_text(text)
 
+        # FIXME: needs ModelProvider
         chunks = [
             chunk
             for chunk, _ in (
@@ -113,7 +113,7 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
         return MemoryItem.from_text(content, "code_file", {"location": path})
 
     @staticmethod
-    def from_ai_action(ai_message: Message, result_message: Message):
+    def from_ai_action(ai_message: ChatMessage, result_message: ChatMessage):
         # The result_message contains either user feedback
         # or the result of the command specified in ai_message
 
@@ -158,7 +158,7 @@ class MemoryItem(BaseModel, arbitrary_types_allowed=True):
 
     def dump(self, calculate_length=False) -> str:
         if calculate_length:
-            token_length = count_string_tokens(
+            token_length = self.llm_provider.count_tokens(
                 self.raw_content, Config().embedding_model
             )
         return f"""
@@ -213,7 +213,7 @@ class MemoryItemRelevance(BaseModel):
     def of(
         memory_item: MemoryItem, for_query: str, e_query: Embedding | None = None
     ) -> MemoryItemRelevance:
-        e_query = e_query or get_embedding(for_query)
+        e_query = e_query if e_query is not None else get_embedding(for_query)
         _, srs, crs = MemoryItemRelevance.calculate_scores(memory_item, e_query)
         return MemoryItemRelevance(
             for_query=for_query,
@@ -235,7 +235,7 @@ class MemoryItemRelevance(BaseModel):
             list: the relevance scores of the memory chunks
         """
         summary_relevance_score = np.dot(memory.e_summary, compare_to)
-        chunk_relevance_scores = np.dot(memory.e_chunks, compare_to)
+        chunk_relevance_scores = np.dot(memory.e_chunks, compare_to).tolist()
         logger.debug(f"Relevance of summary: {summary_relevance_score}")
         logger.debug(f"Relevance of chunks: {chunk_relevance_scores}")
 
diff --git a/autogpts/autogpt/autogpt/memory/vector/providers/json_file.py b/autogpts/autogpt/autogpt/memory/vector/providers/json_file.py
index 5c9a7993..2d673318 100644
--- a/autogpts/autogpt/autogpt/memory/vector/providers/json_file.py
+++ b/autogpts/autogpt/autogpt/memory/vector/providers/json_file.py
@@ -80,9 +80,13 @@ class JSONFileMemory(VectorMemoryProvider):
             logger.debug(f"Loading memories from index file '{self.file_path}'")
             json_index = orjson.loads(f.read())
             for memory_item_dict in json_index:
-                self.memories.append(MemoryItem(**memory_item_dict))
+                self.memories.append(MemoryItem.parse_obj(memory_item_dict))
 
     def save_index(self):
         logger.debug(f"Saving memory index to file {self.file_path}")
         with self.file_path.open("wb") as f:
-            return f.write(orjson.dumps(self.memories, option=self.SAVE_OPTIONS))
+            return f.write(
+                orjson.dumps(
+                    [m.dict() for m in self.memories], option=self.SAVE_OPTIONS
+                )
+            )
diff --git a/autogpts/autogpt/autogpt/memory/vector/utils.py b/autogpts/autogpt/autogpt/memory/vector/utils.py
index 12193db7..1023230c 100644
--- a/autogpts/autogpt/autogpt/memory/vector/utils.py
+++ b/autogpts/autogpt/autogpt/memory/vector/utils.py
@@ -1,18 +1,19 @@
 import logging
 from contextlib import suppress
-from typing import Any, overload
+from typing import Any, Sequence, overload
 
 import numpy as np
 
 from autogpt.config import Config
-from autogpt.llm.base import TText
-from autogpt.llm.providers import openai as iopenai
 
 logger = logging.getLogger(__name__)
 
-Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
+Embedding = list[float] | list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
 """Embedding vector"""
 
+TText = Sequence[int]
+"""Tokenized text"""
+
 
 @overload
 def get_embedding(input: str | TText, config: Config) -> Embedding:
@@ -60,7 +61,7 @@ def get_embedding(
         + (f" via Azure deployment '{kwargs['engine']}'" if config.use_azure else "")
     )
 
-    embeddings = iopenai.create_embedding(
+    embeddings = embedding_provider.create_embedding(
         input,
         **kwargs,
     ).data
diff --git a/autogpts/autogpt/autogpt/models/command_parameter.py b/autogpts/autogpt/autogpt/models/command_parameter.py
index ec130c87..9264bf13 100644
--- a/autogpts/autogpt/autogpt/models/command_parameter.py
+++ b/autogpts/autogpt/autogpt/models/command_parameter.py
@@ -1,10 +1,13 @@
 import dataclasses
+import enum
+
+from autogpt.core.utils.json_schema import JSONSchema
 
 
 @dataclasses.dataclass
 class CommandParameter:
     name: str
-    type: str
+    type: JSONSchema.Type | enum.Enum
     description: str
     required: bool
 
diff --git a/autogpts/autogpt/autogpt/models/context_item.py b/autogpts/autogpt/autogpt/models/context_item.py
index 7c8e306a..17a2a0b8 100644
--- a/autogpts/autogpt/autogpt/models/context_item.py
+++ b/autogpts/autogpt/autogpt/models/context_item.py
@@ -29,7 +29,7 @@ class ContextItem(ABC):
         """The content represented by the context item"""
         ...
 
-    def __str__(self) -> str:
+    def fmt(self) -> str:
         return (
             f"{self.description} (source: {self.source})\n"
             "```\n"
diff --git a/autogpts/autogpt/autogpt/processing/text.py b/autogpts/autogpt/autogpt/processing/text.py
index 7f90c2c8..00351c1f 100644
--- a/autogpts/autogpt/autogpt/processing/text.py
+++ b/autogpts/autogpt/autogpt/processing/text.py
@@ -1,15 +1,17 @@
 """Text processing functions"""
 import logging
-from math import ceil
-from typing import Iterator, Optional, Sequence, TypeVar
+import math
+from typing import Iterator, Optional, TypeVar
 
 import spacy
-import tiktoken
 
 from autogpt.config import Config
-from autogpt.llm.base import ChatSequence
-from autogpt.llm.providers.openai import OPEN_AI_MODELS
-from autogpt.llm.utils import count_string_tokens, create_chat_completion
+from autogpt.core.prompting import ChatPrompt
+from autogpt.core.resource.model_providers import (
+    ChatMessage,
+    ChatModelProvider,
+    ModelTokenizer,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -17,8 +19,8 @@ T = TypeVar("T")
 
 
 def batch(
-    sequence: Sequence[T], max_batch_length: int, overlap: int = 0
-) -> Iterator[Sequence[T]]:
+    sequence: list[T], max_batch_length: int, overlap: int = 0
+) -> Iterator[list[T]]:
     """Batch data from iterable into slices of length N. The last batch may be shorter."""
     # batched('ABCDEFG', 3) --> ABC DEF G
     if max_batch_length < 1:
@@ -27,52 +29,30 @@ def batch(
         yield sequence[i : i + max_batch_length]
 
 
-def _max_chunk_length(model: str, max: Optional[int] = None) -> int:
-    model_max_input_tokens = OPEN_AI_MODELS[model].max_tokens - 1
-    if max is not None and max > 0:
-        return min(max, model_max_input_tokens)
-    return model_max_input_tokens
-
-
-def must_chunk_content(
-    text: str, for_model: str, max_chunk_length: Optional[int] = None
-) -> bool:
-    return count_string_tokens(text, for_model) > _max_chunk_length(
-        for_model, max_chunk_length
-    )
-
-
 def chunk_content(
     content: str,
-    for_model: str,
-    max_chunk_length: Optional[int] = None,
+    max_chunk_length: int,
+    tokenizer: ModelTokenizer,
     with_overlap: bool = True,
 ) -> Iterator[tuple[str, int]]:
     """Split content into chunks of approximately equal token length."""
 
     MAX_OVERLAP = 200  # limit overlap to save tokens
 
-    if not must_chunk_content(content, for_model, max_chunk_length):
-        yield content, count_string_tokens(content, for_model)
-        return
-
-    max_chunk_length = max_chunk_length or _max_chunk_length(for_model)
-
-    tokenizer = tiktoken.encoding_for_model(for_model)
-
     tokenized_text = tokenizer.encode(content)
     total_length = len(tokenized_text)
-    n_chunks = ceil(total_length / max_chunk_length)
+    n_chunks = math.ceil(total_length / max_chunk_length)
 
-    chunk_length = ceil(total_length / n_chunks)
+    chunk_length = math.ceil(total_length / n_chunks)
     overlap = min(max_chunk_length - chunk_length, MAX_OVERLAP) if with_overlap else 0
 
     for token_batch in batch(tokenized_text, chunk_length + overlap, overlap):
         yield tokenizer.decode(token_batch), len(token_batch)
 
 
-def summarize_text(
+async def summarize_text(
     text: str,
+    llm_provider: ChatModelProvider,
     config: Config,
     instruction: Optional[str] = None,
     question: Optional[str] = None,
@@ -104,31 +84,37 @@ def summarize_text(
             "Do not directly answer the question itself"
         )
 
-    summarization_prompt = ChatSequence.for_model(model)
+    summarization_prompt = ChatPrompt(messages=[])
 
-    token_length = count_string_tokens(text, model)
-    logger.info(f"Text length: {token_length} tokens")
+    text_tlength = llm_provider.count_tokens(text, model)
+    logger.info(f"Text length: {text_tlength} tokens")
 
     # reserve 50 tokens for summary prompt, 500 for the response
-    max_chunk_length = _max_chunk_length(model) - 550
+    max_chunk_length = llm_provider.get_token_limit(model) - 550
     logger.info(f"Max chunk length: {max_chunk_length} tokens")
 
-    if not must_chunk_content(text, model, max_chunk_length):
+    if text_tlength < max_chunk_length:
         # summarization_prompt.add("user", text)
-        summarization_prompt.add(
-            "user",
-            "Write a concise summary of the following text"
-            f"{f'; {instruction}' if instruction is not None else ''}:"
-            "\n\n\n"
-            f'LITERAL TEXT: """{text}"""'
-            "\n\n\n"
-            "CONCISE SUMMARY: The text is best summarized as"
-            # "Only respond with a concise summary or description of the user message."
+        summarization_prompt.messages.append(
+            ChatMessage.user(
+                "Write a concise summary of the following text"
+                f"{f'; {instruction}' if instruction is not None else ''}:"
+                "\n\n\n"
+                f'LITERAL TEXT: """{text}"""'
+                "\n\n\n"
+                "CONCISE SUMMARY: The text is best summarized as"
+                # "Only respond with a concise summary or description of the user message."
+            )
         )
 
-        summary = create_chat_completion(
-            prompt=summarization_prompt, config=config, temperature=0, max_tokens=500
-        ).content
+        summary = (
+            await llm_provider.create_chat_completion(
+                model_prompt=summarization_prompt.messages,
+                model_name=model,
+                temperature=0,
+                max_tokens=500,
+            )
+        ).response["content"]
 
         logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
         return summary.strip(), None
@@ -136,7 +122,10 @@ def summarize_text(
     summaries: list[str] = []
     chunks = list(
         split_text(
-            text, for_model=model, config=config, max_chunk_length=max_chunk_length
+            text,
+            config=config,
+            max_chunk_length=max_chunk_length,
+            tokenizer=llm_provider.get_tokenizer(model),
         )
     )
 
@@ -144,12 +133,21 @@ def summarize_text(
         logger.info(
             f"Summarizing chunk {i + 1} / {len(chunks)} of length {chunk_length} tokens"
         )
-        summary, _ = summarize_text(chunk, config, instruction)
+        summary, _ = await summarize_text(
+            text=chunk,
+            instruction=instruction,
+            llm_provider=llm_provider,
+            config=config,
+        )
         summaries.append(summary)
 
     logger.info(f"Summarized {len(chunks)} chunks")
 
-    summary, _ = summarize_text("\n\n".join(summaries), config)
+    summary, _ = await summarize_text(
+        "\n\n".join(summaries),
+        llm_provider=llm_provider,
+        config=config,
+    )
     return summary.strip(), [
         (summaries[i], chunks[i][0]) for i in range(0, len(chunks))
     ]
@@ -157,10 +155,10 @@ def summarize_text(
 
 def split_text(
     text: str,
-    for_model: str,
     config: Config,
+    max_chunk_length: int,
+    tokenizer: ModelTokenizer,
     with_overlap: bool = True,
-    max_chunk_length: Optional[int] = None,
 ) -> Iterator[tuple[str, int]]:
     """Split text into chunks of sentences, with each chunk not exceeding the maximum length
 
@@ -177,17 +175,14 @@ def split_text(
     Raises:
         ValueError: when a sentence is longer than the maximum length
     """
+    text_length = len(tokenizer.encode(text))
 
-    max_length = _max_chunk_length(for_model, max_chunk_length)
-
-    text_length = count_string_tokens(text, for_model)
-
-    if text_length < max_length:
+    if text_length < max_chunk_length:
         yield text, text_length
         return
 
-    n_chunks = ceil(text_length / max_length)
-    target_chunk_length = ceil(text_length / n_chunks)
+    n_chunks = math.ceil(text_length / max_chunk_length)
+    target_chunk_length = math.ceil(text_length / n_chunks)
 
     nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
     nlp.add_pipe("sentencizer")
@@ -202,25 +197,25 @@ def split_text(
     i = 0
     while i < len(sentences):
         sentence = sentences[i]
-        sentence_length = count_string_tokens(sentence, for_model)
+        sentence_length = len(tokenizer.encode(sentence))
         expected_chunk_length = current_chunk_length + 1 + sentence_length
 
         if (
-            expected_chunk_length < max_length
+            expected_chunk_length < max_chunk_length
             # try to create chunks of approximately equal size
             and expected_chunk_length - (sentence_length / 2) < target_chunk_length
         ):
             current_chunk.append(sentence)
             current_chunk_length = expected_chunk_length
 
-        elif sentence_length < max_length:
+        elif sentence_length < max_chunk_length:
             if last_sentence:
                 yield " ".join(current_chunk), current_chunk_length
                 current_chunk = []
                 current_chunk_length = 0
 
                 if with_overlap:
-                    overlap_max_length = max_length - sentence_length - 1
+                    overlap_max_length = max_chunk_length - sentence_length - 1
                     if last_sentence_length < overlap_max_length:
                         current_chunk += [last_sentence]
                         current_chunk_length += last_sentence_length + 1
@@ -229,9 +224,9 @@ def split_text(
                         current_chunk += [
                             list(
                                 chunk_content(
-                                    last_sentence,
-                                    for_model,
-                                    overlap_max_length,
+                                    content=last_sentence,
+                                    max_chunk_length=overlap_max_length,
+                                    tokenizer=tokenizer,
                                 )
                             ).pop()[0],
                         ]
@@ -243,7 +238,7 @@ def split_text(
         else:  # sentence longer than maximum length -> chop up and try again
             sentences[i : i + 1] = [
                 chunk
-                for chunk, _ in chunk_content(sentence, for_model, target_chunk_length)
+                for chunk, _ in chunk_content(sentence, target_chunk_length, tokenizer)
             ]
             continue
 
diff --git a/autogpts/autogpt/scripts/check_requirements.py b/autogpts/autogpt/scripts/check_requirements.py
index 1e0a8c1f..1ea8f444 100644
--- a/autogpts/autogpt/scripts/check_requirements.py
+++ b/autogpts/autogpt/scripts/check_requirements.py
@@ -8,8 +8,8 @@ try:
 except ModuleNotFoundError:
     os.system(f"{sys.executable} -m pip install 'poetry>=1.6.1,<2.0.0'")
 
-from poetry.factory import Factory
 from poetry.core.constraints.version.version import Version
+from poetry.factory import Factory
 
 
 def main():
@@ -22,7 +22,7 @@ def main():
     for dep in dependency_group.dependencies:
         # Try to verify that the installed version is suitable
         with contextlib.suppress(ModuleNotFoundError):
-            installed_version = version(dep.name)   # if this fails -> not installed
+            installed_version = version(dep.name)  # if this fails -> not installed
             if dep.constraint.allows(Version.parse(installed_version)):
                 continue
         # If the above verification fails, mark the package as missing
diff --git a/autogpts/autogpt/tests/conftest.py b/autogpts/autogpt/tests/conftest.py
index db35120e..a240e640 100644
--- a/autogpts/autogpt/tests/conftest.py
+++ b/autogpts/autogpt/tests/conftest.py
@@ -7,7 +7,9 @@ import yaml
 from pytest_mock import MockerFixture
 
 from autogpt.agents import Agent
+from autogpt.app.main import _configure_openai_provider
 from autogpt.config import AIConfig, Config, ConfigBuilder
+from autogpt.core.resource.model_providers import ChatModelProvider, OpenAIProvider
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs.config import configure_logging
 from autogpt.memory.vector import get_memory
@@ -89,7 +91,12 @@ def api_manager() -> ApiManager:
 
 
 @pytest.fixture
-def agent(config: Config) -> Agent:
+def llm_provider(config: Config) -> OpenAIProvider:
+    return _configure_openai_provider(config)
+
+
+@pytest.fixture
+def agent(config: Config, llm_provider: ChatModelProvider) -> Agent:
     ai_config = AIConfig(
         ai_name="Base",
         ai_role="A base AI",
@@ -103,6 +110,7 @@ def agent(config: Config) -> Agent:
 
     return Agent(
         memory=memory_json_file,
+        llm_provider=llm_provider,
         command_registry=command_registry,
         ai_config=ai_config,
         config=config,
diff --git a/autogpts/autogpt/tests/integration/agent_factory.py b/autogpts/autogpt/tests/integration/agent_factory.py
index c63eaf80..742cd79f 100644
--- a/autogpts/autogpt/tests/integration/agent_factory.py
+++ b/autogpts/autogpt/tests/integration/agent_factory.py
@@ -19,7 +19,7 @@ def memory_json_file(config: Config):
 
 
 @pytest.fixture
-def dummy_agent(config: Config, memory_json_file):
+def dummy_agent(config: Config, llm_provider, memory_json_file):
     command_registry = CommandRegistry()
 
     ai_config = AIConfig(
@@ -32,6 +32,7 @@ def dummy_agent(config: Config, memory_json_file):
 
     agent = Agent(
         memory=memory_json_file,
+        llm_provider=llm_provider,
         command_registry=command_registry,
         ai_config=ai_config,
         config=config,
diff --git a/autogpts/autogpt/tests/integration/memory/test_json_file_memory.py b/autogpts/autogpt/tests/integration/memory/test_json_file_memory.py
index 38cd26c8..76f867e2 100644
--- a/autogpts/autogpt/tests/integration/memory/test_json_file_memory.py
+++ b/autogpts/autogpt/tests/integration/memory/test_json_file_memory.py
@@ -79,7 +79,7 @@ def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
 
     try:
         assert index.file_path.exists(), "index was not saved to file"
-        assert len(index) == 1, f"index constains {len(index)} items instead of 1"
+        assert len(index) == 1, f"index contains {len(index)} items instead of 1"
         assert index.memories[0] == memory_item, "item in index != added mock item"
     except AssertionError as e:
         raise ValueError(f"Setting up for load_index test failed: {e}")
diff --git a/autogpts/autogpt/tests/integration/memory/utils.py b/autogpts/autogpt/tests/integration/memory/utils.py
index 1f791160..aea12832 100644
--- a/autogpts/autogpt/tests/integration/memory/utils.py
+++ b/autogpts/autogpt/tests/integration/memory/utils.py
@@ -5,7 +5,7 @@ from pytest_mock import MockerFixture
 import autogpt.memory.vector.memory_item as vector_memory_item
 import autogpt.memory.vector.providers.base as memory_provider_base
 from autogpt.config.config import Config
-from autogpt.llm.providers.openai import OPEN_AI_EMBEDDING_MODELS
+from autogpt.core.resource.model_providers import OPEN_AI_EMBEDDING_MODELS
 from autogpt.memory.vector import get_memory
 from autogpt.memory.vector.utils import Embedding
 
@@ -21,16 +21,16 @@ def mock_embedding(embedding_dimension: int) -> Embedding:
 
 
 @pytest.fixture
-def mock_get_embedding(mocker: MockerFixture, embedding_dimension: int):
+def mock_get_embedding(mocker: MockerFixture, mock_embedding: Embedding):
     mocker.patch.object(
         vector_memory_item,
         "get_embedding",
-        return_value=[0.0255] * embedding_dimension,
+        return_value=mock_embedding,
     )
     mocker.patch.object(
         memory_provider_base,
         "get_embedding",
-        return_value=[0.0255] * embedding_dimension,
+        return_value=mock_embedding,
     )
 
 
diff --git a/autogpts/autogpt/tests/integration/test_execute_code.py b/autogpts/autogpt/tests/integration/test_execute_code.py
index 5ac1cc6b..e9f0615a 100644
--- a/autogpts/autogpt/tests/integration/test_execute_code.py
+++ b/autogpts/autogpt/tests/integration/test_execute_code.py
@@ -1,4 +1,3 @@
-import os
 import random
 import string
 import tempfile
@@ -9,7 +8,6 @@ import pytest
 import autogpt.commands.execute_code as sut  # system under testing
 from autogpt.agents.agent import Agent
 from autogpt.agents.utils.exceptions import (
-    AccessDeniedError,
     InvalidArgumentError,
     OperationNotAllowedError,
 )
@@ -56,7 +54,9 @@ def test_execute_python_file_args(
 ):
     random_args = [random_string] * 2
     random_args_string = " ".join(random_args)
-    result = sut.execute_python_file(python_test_args_file, agent=agent, random_args)
+    result = sut.execute_python_file(
+        python_test_args_file, args=random_args, agent=agent
+    )
     assert result == f"{random_args_string}\n"
 
 
@@ -65,23 +65,6 @@ def test_execute_python_code(random_code: str, random_string: str, agent: Agent)
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
 
-def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
-    ai_name = agent.ai_config.ai_name
-    destination = os.path.join(
-        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
-    )
-    os.makedirs(os.path.dirname(destination), exist_ok=True)
-
-    with open(destination, "w+") as f:
-        f.write("This will be overwritten")
-
-    sut.execute_python_code(random_code, agent=agent)
-
-    # Check that the file is updated with the new code
-    with open(destination) as f:
-        assert f.read() == random_code
-
-
 def test_execute_python_file_invalid(agent: Agent):
     with pytest.raises(InvalidArgumentError):
         sut.execute_python_file("not_python", agent)
diff --git a/autogpts/autogpt/tests/integration/test_provider_openai.py b/autogpts/autogpt/tests/integration/test_provider_openai.py
deleted file mode 100644
index 7fa57ca9..00000000
--- a/autogpts/autogpt/tests/integration/test_provider_openai.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from autogpt.llm.api_manager import ApiManager
-from autogpt.llm.providers import openai
-
-api_manager = ApiManager()
-
-
-@pytest.fixture(autouse=True)
-def reset_api_manager():
-    api_manager.reset()
-    yield
-
-
-def test_create_chat_completion_empty_messages():
-    """Test if empty messages result in zero tokens and cost."""
-    messages = []
-    model = "gpt-3.5-turbo"
-
-    with patch("openai.ChatCompletion.create") as mock_create:
-        mock_response = MagicMock()
-        del mock_response.error
-        mock_response.usage.prompt_tokens = 0
-        mock_response.usage.completion_tokens = 0
-        mock_create.return_value = mock_response
-
-        openai.create_chat_completion(messages, model=model)
-
-        assert api_manager.get_total_prompt_tokens() == 0
-        assert api_manager.get_total_completion_tokens() == 0
-        assert api_manager.get_total_cost() == 0
diff --git a/autogpts/autogpt/tests/integration/test_setup.py b/autogpts/autogpt/tests/integration/test_setup.py
index e40cdf1d..91a39dc1 100644
--- a/autogpts/autogpt/tests/integration/test_setup.py
+++ b/autogpts/autogpt/tests/integration/test_setup.py
@@ -8,10 +8,12 @@ from autogpt.config.ai_config import AIConfig
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_generate_aiconfig_automatic_default(patched_api_requestor, config):
+async def test_generate_aiconfig_automatic_default(
+    patched_api_requestor, config, llm_provider
+):
     user_inputs = [""]
     with patch("autogpt.app.utils.session.prompt", side_effect=user_inputs):
-        ai_config = interactive_ai_config_setup(config)
+        ai_config = await interactive_ai_config_setup(config, llm_provider)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -21,9 +23,11 @@ def test_generate_aiconfig_automatic_default(patched_api_requestor, config):
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_generate_aiconfig_automatic_typical(patched_api_requestor, config):
+async def test_generate_aiconfig_automatic_typical(
+    patched_api_requestor, config, llm_provider
+):
     user_prompt = "Help me create a rock opera about cybernetic giraffes"
-    ai_config = generate_aiconfig_automatic(user_prompt, config)
+    ai_config = await generate_aiconfig_automatic(user_prompt, config, llm_provider)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -33,7 +37,9 @@ def test_generate_aiconfig_automatic_typical(patched_api_requestor, config):
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_generate_aiconfig_automatic_fallback(patched_api_requestor, config):
+async def test_generate_aiconfig_automatic_fallback(
+    patched_api_requestor, config, llm_provider
+):
     user_inputs = [
         "T&GF£OIBECC()!*",
         "Chef-GPT",
@@ -44,7 +50,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor, config):
         "",
     ]
     with patch("autogpt.app.utils.session.prompt", side_effect=user_inputs):
-        ai_config = interactive_ai_config_setup(config)
+        ai_config = await interactive_ai_config_setup(config, llm_provider)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
@@ -54,7 +60,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor, config):
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_prompt_user_manual_mode(patched_api_requestor, config):
+async def test_prompt_user_manual_mode(patched_api_requestor, config, llm_provider):
     user_inputs = [
         "--manual",
         "Chef-GPT",
@@ -65,7 +71,7 @@ def test_prompt_user_manual_mode(patched_api_requestor, config):
         "",
     ]
     with patch("autogpt.app.utils.session.prompt", side_effect=user_inputs):
-        ai_config = interactive_ai_config_setup(config)
+        ai_config = await interactive_ai_config_setup(config, llm_provider)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
diff --git a/autogpts/autogpt/tests/integration/test_web_selenium.py b/autogpts/autogpt/tests/integration/test_web_selenium.py
index e935bb00..dfdbb9e0 100644
--- a/autogpts/autogpt/tests/integration/test_web_selenium.py
+++ b/autogpts/autogpt/tests/integration/test_web_selenium.py
@@ -6,12 +6,14 @@ from autogpt.commands.web_selenium import BrowsingError, read_webpage
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_browse_website_nonexistent_url(agent: Agent, patched_api_requestor: None):
+async def test_browse_website_nonexistent_url(
+    agent: Agent, patched_api_requestor: None
+):
     url = "https://auto-gpt-thinks-this-website-does-not-exist.com"
     question = "How to execute a barrel roll"
 
     with pytest.raises(BrowsingError, match=r"NAME_NOT_RESOLVED") as raised:
-        read_webpage(url=url, question=question, agent=agent)
+        await read_webpage(url=url, question=question, agent=agent)
 
         # Sanity check that the response is not too long
         assert len(raised.exconly()) < 200
diff --git a/autogpts/autogpt/tests/unit/test_api_manager.py b/autogpts/autogpt/tests/unit/test_api_manager.py
index 615204d1..da9b0c3d 100644
--- a/autogpts/autogpt/tests/unit/test_api_manager.py
+++ b/autogpts/autogpt/tests/unit/test_api_manager.py
@@ -3,8 +3,11 @@ from unittest.mock import patch
 import pytest
 from pytest_mock import MockerFixture
 
+from autogpt.core.resource.model_providers import (
+    OPEN_AI_CHAT_MODELS,
+    OPEN_AI_EMBEDDING_MODELS,
+)
 from autogpt.llm.api_manager import ApiManager
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, OPEN_AI_EMBEDDING_MODELS
 
 api_manager = ApiManager()
 
diff --git a/autogpts/autogpt/tests/unit/test_file_operations.py b/autogpts/autogpt/tests/unit/test_file_operations.py
index 36e01e04..5d21383b 100644
--- a/autogpts/autogpt/tests/unit/test_file_operations.py
+++ b/autogpts/autogpt/tests/unit/test_file_operations.py
@@ -245,9 +245,7 @@ def test_write_file_succeeds_if_content_different(
     test_file_with_content_path: Path, agent: Agent
 ):
     new_content = "This is different content.\n"
-    file_ops.write_to_file(
-        str(test_file_with_content_path), new_content, agent=agent
-    )
+    file_ops.write_to_file(str(test_file_with_content_path), new_content, agent=agent)
 
 
 def test_append_to_file(test_nested_file: Path, agent: Agent):
diff --git a/autogpts/autogpt/tests/unit/test_message_history.py b/autogpts/autogpt/tests/unit/test_message_history.py
deleted file mode 100644
index a41d1e3a..00000000
--- a/autogpts/autogpt/tests/unit/test_message_history.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import math
-import time
-from unittest.mock import MagicMock
-
-import pytest
-
-from autogpt.agents import Agent
-from autogpt.config import AIConfig
-from autogpt.config.config import Config
-from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
-from autogpt.llm.utils import count_string_tokens
-from autogpt.memory.message_history import MessageHistory
-
-
-@pytest.fixture
-def agent(config: Config):
-    memory = MagicMock()
-    command_registry = MagicMock()
-    ai_config = AIConfig(ai_name="Test AI")
-    triggering_prompt = "Triggering prompt"
-
-    agent = Agent(
-        memory=memory,
-        command_registry=command_registry,
-        ai_config=ai_config,
-        config=config,
-        triggering_prompt=triggering_prompt,
-    )
-    return agent
-
-
-def test_message_history_batch_summary(mocker, agent: Agent, config: Config):
-    history = MessageHistory(model=agent.llm, agent=agent)
-    model = config.fast_llm
-    message_tlength = 0
-    message_count = 0
-
-    # Setting the mock output and inputs
-    mock_summary_response = ChatModelResponse(
-        model_info=OPEN_AI_CHAT_MODELS[model],
-        content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
-        function_call=None,
-    )
-    mock_summary = mocker.patch(
-        "autogpt.memory.message_history.create_chat_completion",
-        return_value=mock_summary_response,
-    )
-
-    system_prompt = 'You are AIJobSearcher, an AI designed to search for job openings for software engineer role\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. Find any job openings for software engineers online\n2. Go through each of the websites and job openings to summarize their requirements and URL, and skip that if you already visit the website\n\nIt takes money to let you run. Your API budget is $5.000\n\nConstraints:\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n3. No user assistance\n4. Exclusively use the commands listed in double quotes e.g. "command name"\n\nCommands:\n1. google_search: Google Search, args: "query": "<query>"\n2. browse_website: Browse Website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"\n3. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"\n\nResources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou should only respond in JSON format as described below \nResponse Format: \n{\n    "thoughts": {\n        "text": "thought",\n        "reasoning": "reasoning",\n        "plan": "- short bulleted\\n- list that conveys\\n- long-term plan",\n        "criticism": "constructive self-criticism",\n        "speak": "thoughts summary to say to user"\n    },\n    "command": {\n        "name": "command name",\n        "args": {\n            "arg name": "value"\n        }\n    }\n} \nEnsure the response can be parsed by Python json.loads'
-    message_sequence = ChatSequence.for_model(
-        model,
-        [
-            Message("system", system_prompt),
-            Message("system", f"The current time and date is {time.strftime('%c')}"),
-        ],
-    )
-    insertion_index = len(message_sequence)
-
-    user_input = "Determine which next command to use, and respond using the format specified above:'"
-    user_input_msg = Message("user", user_input)
-    history.append(user_input_msg)
-
-    # mock a reponse from AI
-    assistant_reply = '{\n    "thoughts": {\n        "text": "I will use the \'google_search\' command to find more websites with job openings for software engineering manager role.",\n        "reasoning": "Since the previous website did not provide any relevant information, I will use the \'google_search\' command to find more websites with job openings for software engineer role.",\n        "plan": "- Use \'google_search\' command to find more websites with job openings for software engineer role",\n        "criticism": "I need to ensure that I am able to extract the relevant information from each website and job opening.",\n        "speak": "I will now use the \'google_search\' command to find more websites with job openings for software engineer role."\n    },\n    "command": {\n        "name": "google_search",\n        "args": {\n            "query": "software engineer job openings"\n        }\n    }\n}'
-    msg = Message("assistant", assistant_reply, "ai_response")
-    history.append(msg)
-    message_tlength += count_string_tokens(str(msg), config.fast_llm)
-    message_count += 1
-
-    # mock some websites returned from google search command in the past
-    result = "Command google_search returned: ["
-    for i in range(50):
-        result += "http://www.job" + str(i) + ".com,"
-    result += "]"
-    msg = Message("system", result, "action_result")
-    history.append(msg)
-    message_tlength += count_string_tokens(str(msg), config.fast_llm)
-    message_count += 1
-
-    user_input = "Determine which next command to use, and respond using the format specified above:'"
-    user_input_msg = Message("user", user_input)
-    history.append(user_input_msg)
-
-    # mock numbers of AI response and action results from browse_website commands in the past, doesn't need the thoughts part, as the summarization code discard them anyway
-    for i in range(50):
-        assistant_reply = (
-            '{\n    "command": {\n        "name": "browse_website",\n        "args": {\n            "url": "https://www.job'
-            + str(i)
-            + '.com",\n            "question": "software engineer"\n        }\n    }\n}'
-        )
-        msg = Message("assistant", assistant_reply, "ai_response")
-        history.append(msg)
-        message_tlength += count_string_tokens(str(msg), config.fast_llm)
-        message_count += 1
-
-        result = (
-            "Command browse_website returned: Answer gathered from website: The text in job"
-            + str(i)
-            + " does not provide information on specific job requirements or a job URL.]"
-        )
-        msg = Message("system", result, "action_result")
-        history.append(msg)
-        message_tlength += count_string_tokens(str(msg), config.fast_llm)
-        message_count += 1
-
-        user_input = "Determine which next command to use, and respond using the format specified above:'"
-        user_input_msg = Message("user", user_input)
-        history.append(user_input_msg)
-
-    # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle())):
-        messages_to_add = [msg for msg in cycle if msg is not None]
-        message_sequence.insert(insertion_index, *messages_to_add)
-        break
-
-    # count the expected token length of the trimmed message by reducing the token length of messages in the last cycle
-    for message in messages_to_add:
-        if message.role != "user":
-            message_tlength -= count_string_tokens(str(message), config.fast_llm)
-            message_count -= 1
-
-    # test the main trim_message function
-    new_summary_message, trimmed_messages = history.trim_messages(
-        current_message_chain=list(message_sequence), config=config
-    )
-
-    expected_call_count = math.ceil(
-        message_tlength / (OPEN_AI_CHAT_MODELS[config.fast_llm].max_tokens)
-    )
-    # Expecting 2 batches because of over max token
-    assert mock_summary.call_count == expected_call_count  # 2 at the time of writing
-    # Expecting 100 messages because 50 pairs of ai_response and action_result, based on the range set above
-    assert len(trimmed_messages) == message_count  # 100 at the time of writing
-    assert new_summary_message == Message(
-        role="system",
-        content="This reminds you of these events from your past: \n"
-        + mock_summary_response.content,
-        type=None,
-    )
diff --git a/autogpts/autogpt/tests/unit/test_retry_provider_openai.py b/autogpts/autogpt/tests/unit/test_retry_provider_openai.py
index f626807c..e076a397 100644
--- a/autogpts/autogpt/tests/unit/test_retry_provider_openai.py
+++ b/autogpts/autogpt/tests/unit/test_retry_provider_openai.py
@@ -1,7 +1,10 @@
+import logging
+
 import pytest
 from openai.error import APIError, RateLimitError, ServiceUnavailableError
 
 from autogpt.llm.providers import openai
+from autogpt.logs.config import USER_FRIENDLY_OUTPUT_LOGGER
 
 
 @pytest.fixture(params=[RateLimitError, ServiceUnavailableError, APIError])
@@ -52,9 +55,17 @@ def test_retry_open_api_no_error(caplog: pytest.LogCaptureFixture):
     ids=["passing", "passing_edge", "failing", "failing_edge", "failing_no_retries"],
 )
 def test_retry_open_api_passing(
-    caplog: pytest.LogCaptureFixture, error, error_count, retry_count, failure
+    caplog: pytest.LogCaptureFixture,
+    error: Exception,
+    error_count: int,
+    retry_count: int,
+    failure: bool,
 ):
     """Tests the retry with simulated errors [RateLimitError, ServiceUnavailableError, APIError], but should ulimately pass"""
+
+    # Add capture handler to non-propagating logger
+    logging.getLogger(USER_FRIENDLY_OUTPUT_LOGGER).addHandler(caplog.handler)
+
     call_count = min(error_count, retry_count) + 1
 
     raises = error_factory(error, error_count, retry_count)
diff --git a/autogpts/autogpt/tests/unit/test_token_counter.py b/autogpts/autogpt/tests/unit/test_token_counter.py
deleted file mode 100644
index feed18b1..00000000
--- a/autogpts/autogpt/tests/unit/test_token_counter.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pytest
-
-from autogpt.llm.base import Message
-from autogpt.llm.utils import count_message_tokens, count_string_tokens
-
-
-def test_count_message_tokens():
-    messages = [
-        Message("user", "Hello"),
-        Message("assistant", "Hi there!"),
-    ]
-    assert count_message_tokens(messages) == 17
-
-
-def test_count_message_tokens_empty_input():
-    """Empty input should return 3 tokens"""
-    assert count_message_tokens([]) == 3
-
-
-def test_count_message_tokens_invalid_model():
-    """Invalid model should raise a NotImplementedError"""
-    messages = [
-        Message("user", "Hello"),
-        Message("assistant", "Hi there!"),
-    ]
-    with pytest.raises(NotImplementedError):
-        count_message_tokens(messages, model="invalid_model")
-
-
-def test_count_message_tokens_gpt_4():
-    messages = [
-        Message("user", "Hello"),
-        Message("assistant", "Hi there!"),
-    ]
-    assert count_message_tokens(messages, model="gpt-4-0314") == 15
-
-
-def test_count_string_tokens():
-    """Test that the string tokens are counted correctly."""
-
-    string = "Hello, world!"
-    assert count_string_tokens(string, model_name="gpt-3.5-turbo-0301") == 4
-
-
-def test_count_string_tokens_empty_input():
-    """Test that the string tokens are counted correctly."""
-
-    assert count_string_tokens("", model_name="gpt-3.5-turbo-0301") == 0
-
-
-def test_count_string_tokens_gpt_4():
-    """Test that the string tokens are counted correctly."""
-
-    string = "Hello, world!"
-    assert count_string_tokens(string, model_name="gpt-4-0314") == 4
diff --git a/autogpts/autogpt/tests/unit/test_utils.py b/autogpts/autogpt/tests/unit/test_utils.py
index 298deae6..b9ce8fb5 100644
--- a/autogpts/autogpt/tests/unit/test_utils.py
+++ b/autogpts/autogpt/tests/unit/test_utils.py
@@ -9,8 +9,7 @@ from autogpt.app.utils import (
     get_current_git_branch,
     get_latest_bulletin,
 )
-from autogpt.config import Config
-from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
+from autogpt.json_utils.utilities import extract_dict_from_response
 from autogpt.utils import validate_yaml_file
 from tests.utils import skip_in_ci
 
@@ -182,18 +181,6 @@ def test_get_current_git_branch_failure(mock_repo):
     assert branch_name == ""
 
 
-def test_validate_json_valid(valid_json_response, config: Config):
-    valid, errors = validate_dict(valid_json_response, config)
-    assert valid
-    assert errors is None
-
-
-def test_validate_json_invalid(invalid_json_response, config: Config):
-    valid, errors = validate_dict(valid_json_response, config)
-    assert not valid
-    assert errors is not None
-
-
 def test_extract_json_from_response(valid_json_response: dict):
     emulated_response_from_openai = str(valid_json_response)
     assert (
diff --git a/autogpts/autogpt/tests/vcr/__init__.py b/autogpts/autogpt/tests/vcr/__init__.py
index 8272001c..4d45aafc 100644
--- a/autogpts/autogpt/tests/vcr/__init__.py
+++ b/autogpts/autogpt/tests/vcr/__init__.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from hashlib import sha256
 
@@ -72,8 +73,8 @@ def patched_api_requestor(mocker: MockerFixture):
             headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
             headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
 
-        print(
-            f"[DEBUG] Outgoing API request: {headers}\n{data.decode() if data else None}"
+        logging.getLogger("patched_api_requestor").debug(
+            f"Outgoing API request: {headers}\n{data.decode() if data else None}"
         )
 
         # Add hash header for cheap & fast matching on cassette playback