From 857d26d101107828358103ef7ff20438415b368c Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Wed, 21 Jun 2023 19:52:44 -0700
Subject: [PATCH] Add OpenAI function call support (#4683)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 .env.template                       |   6 +-
 autogpt/agent/agent.py              |   8 +-
 autogpt/agent/agent_manager.py      |   8 +-
 autogpt/app.py                      |  28 +++-
 autogpt/command_decorator.py        |  23 ++-
 autogpt/config/ai_config.py         |   2 +-
 autogpt/config/config.py            |   2 +
 autogpt/json_utils/utilities.py     |  10 +-
 autogpt/llm/base.py                 |   8 +-
 autogpt/llm/chat.py                 |   6 +-
 autogpt/llm/providers/openai.py     |  83 +++++++++-
 autogpt/llm/utils/__init__.py       |  36 +++--
 autogpt/memory/message_history.py   |   2 +-
 autogpt/models/command.py           |  16 +-
 autogpt/models/command_parameter.py |  12 ++
 autogpt/models/command_registry.py  |   2 +
 autogpt/processing/text.py          |   4 +-
 autogpt/prompts/generator.py        |  25 ++-
 autogpt/setup.py                    |   2 +-
 tests/unit/test_agent_manager.py    |  10 +-
 tests/unit/test_commands.py         |  54 +++----
 tests/unit/test_message_history.py  |  12 +-
 tests/unit/test_prompt_generator.py | 237 ++++++++++++++++------------
 23 files changed, 416 insertions(+), 180 deletions(-)
 create mode 100644 autogpt/models/command_parameter.py

diff --git a/.env.template b/.env.template
index 06745245..c3fcb761 100644
--- a/.env.template
+++ b/.env.template
@@ -25,10 +25,14 @@ OPENAI_API_KEY=your-openai-api-key
 ## PROMPT_SETTINGS_FILE - Specifies which Prompt Settings file to use (defaults to prompt_settings.yaml)
 # PROMPT_SETTINGS_FILE=prompt_settings.yaml
 
-## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url 
+## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url
 # the following is an example:
 # OPENAI_API_BASE_URL=http://localhost:443/v1
 
+## OPENAI_FUNCTIONS - Enables OpenAI functions: https://platform.openai.com/docs/guides/gpt/function-calling
+## WARNING: this feature is only supported by OpenAI's newest models. Until these models become the default on 27 June, add a '-0613' suffix to the model of your choosing.
+# OPENAI_FUNCTIONS=False
+
 ## AUTHORISE COMMAND KEY - Key to authorise commands
 # AUTHORISE_COMMAND_KEY=y
 
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 7537233e..fca03a5f 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -142,7 +142,9 @@ class Agent:
                 )
 
             try:
-                assistant_reply_json = extract_json_from_response(assistant_reply)
+                assistant_reply_json = extract_json_from_response(
+                    assistant_reply.content
+                )
                 validate_json(assistant_reply_json, self.config)
             except json.JSONDecodeError as e:
                 logger.error(f"Exception while validating assistant reply JSON: {e}")
@@ -160,7 +162,9 @@ class Agent:
                     print_assistant_thoughts(
                         self.ai_name, assistant_reply_json, self.config
                     )
-                    command_name, arguments = get_command(assistant_reply_json)
+                    command_name, arguments = get_command(
+                        assistant_reply_json, assistant_reply, self.config
+                    )
                     if self.config.speak_mode:
                         say_text(f"I want to execute {command_name}")
 
diff --git a/autogpt/agent/agent_manager.py b/autogpt/agent/agent_manager.py
index 1f1c8a1d..eaecbf3b 100644
--- a/autogpt/agent/agent_manager.py
+++ b/autogpt/agent/agent_manager.py
@@ -41,7 +41,9 @@ class AgentManager(metaclass=Singleton):
             if plugin_messages := plugin.pre_instruction(messages.raw()):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages, config=self.config)
+        agent_reply = create_chat_completion(
+            prompt=messages, config=self.config
+        ).content
 
         messages.add("assistant", agent_reply)
 
@@ -92,7 +94,9 @@ class AgentManager(metaclass=Singleton):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
 
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages, config=self.config)
+        agent_reply = create_chat_completion(
+            prompt=messages, config=self.config
+        ).content
 
         messages.add("assistant", agent_reply)
 
diff --git a/autogpt/app.py b/autogpt/app.py
index 78e3a4dd..06db7938 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -3,6 +3,8 @@ import json
 from typing import Dict
 
 from autogpt.agent.agent import Agent
+from autogpt.config import Config
+from autogpt.llm import ChatModelResponse
 
 
 def is_valid_int(value: str) -> bool:
@@ -21,11 +23,15 @@ def is_valid_int(value: str) -> bool:
         return False
 
 
-def get_command(response_json: Dict):
+def get_command(
+    assistant_reply_json: Dict, assistant_reply: ChatModelResponse, config: Config
+):
     """Parse the response and return the command name and arguments
 
     Args:
-        response_json (json): The response from the AI
+        assistant_reply_json (dict): The response object from the AI
+        assistant_reply (ChatModelResponse): The model response from the AI
+        config (Config): The config object
 
     Returns:
         tuple: The command name and arguments
@@ -35,14 +41,24 @@ def get_command(response_json: Dict):
 
         Exception: If any other error occurs
     """
+    if config.openai_functions:
+        if assistant_reply.function_call is None:
+            return "Error:", "No 'function_call' in assistant reply"
+        assistant_reply_json["command"] = {
+            "name": assistant_reply.function_call.name,
+            "args": json.loads(assistant_reply.function_call.arguments),
+        }
     try:
-        if "command" not in response_json:
+        if "command" not in assistant_reply_json:
             return "Error:", "Missing 'command' object in JSON"
 
-        if not isinstance(response_json, dict):
-            return "Error:", f"'response_json' object is not dictionary {response_json}"
+        if not isinstance(assistant_reply_json, dict):
+            return (
+                "Error:",
+                f"The previous message sent was not a dictionary {assistant_reply_json}",
+            )
 
-        command = response_json["command"]
+        command = assistant_reply_json["command"]
         if not isinstance(command, dict):
             return "Error:", "'command' object is not a dictionary"
 
diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
index 1edd766e..f179f978 100644
--- a/autogpt/command_decorator.py
+++ b/autogpt/command_decorator.py
@@ -1,28 +1,43 @@
 import functools
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Optional, TypedDict
 
 from autogpt.config import Config
-from autogpt.models.command import Command
+from autogpt.models.command import Command, CommandParameter
 
 # Unique identifier for auto-gpt commands
 AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
 
 
+class CommandParameterSpec(TypedDict):
+    type: str
+    description: str
+    required: bool
+
+
 def command(
     name: str,
     description: str,
-    arguments: Dict[str, Dict[str, Any]],
+    parameters: dict[str, CommandParameterSpec],
     enabled: bool | Callable[[Config], bool] = True,
     disabled_reason: Optional[str] = None,
 ) -> Callable[..., Any]:
     """The command decorator is used to create Command objects from ordinary functions."""
 
     def decorator(func: Callable[..., Any]) -> Command:
+        typed_parameters = [
+            CommandParameter(
+                name=param_name,
+                description=parameter.get("description"),
+                type=parameter.get("type", "string"),
+                required=parameter.get("required", False),
+            )
+            for param_name, parameter in parameters.items()
+        ]
         cmd = Command(
             name=name,
             description=description,
             method=func,
-            signature=arguments,
+            parameters=typed_parameters,
             enabled=enabled,
             disabled_reason=disabled_reason,
         )
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index 6b9e15f1..3c645abe 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -164,5 +164,5 @@ class AIConfig:
         if self.api_budget > 0.0:
             full_prompt += f"\nIt takes money to let you run. Your API budget is ${self.api_budget:.3f}"
         self.prompt_generator = prompt_generator
-        full_prompt += f"\n\n{prompt_generator.generate_prompt_string()}"
+        full_prompt += f"\n\n{prompt_generator.generate_prompt_string(config)}"
         return full_prompt
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 5e0999b1..d032f822 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -88,6 +88,8 @@ class Config:
         if self.openai_organization is not None:
             openai.organization = self.openai_organization
 
+        self.openai_functions = os.getenv("OPENAI_FUNCTIONS", "False") == "True"
+
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
         # ELEVENLABS_VOICE_1_ID is deprecated and included for backwards-compatibility
         self.elevenlabs_voice_id = os.getenv(
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 4fbf0c05..7162abc5 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -29,11 +29,15 @@ def extract_json_from_response(response_content: str) -> dict:
 
 
 def llm_response_schema(
-    schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
+    config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
 ) -> dict[str, Any]:
     filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
     with open(filename, "r") as f:
-        return json.load(f)
+        json_schema = json.load(f)
+    if config.openai_functions:
+        del json_schema["properties"]["command"]
+        json_schema["required"].remove("command")
+    return json_schema
 
 
 def validate_json(
@@ -47,7 +51,7 @@ def validate_json(
     Returns:
         bool: Whether the json_object is valid or not
     """
-    schema = llm_response_schema(schema_name)
+    schema = llm_response_schema(config, schema_name)
     validator = Draft7Validator(schema)
 
     if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index d372ad25..4ff80dc7 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -2,7 +2,10 @@ from __future__ import annotations
 
 from dataclasses import dataclass, field
 from math import ceil, floor
-from typing import List, Literal, TypedDict
+from typing import TYPE_CHECKING, List, Literal, Optional, TypedDict
+
+if TYPE_CHECKING:
+    from autogpt.llm.providers.openai import OpenAIFunctionCall
 
 MessageRole = Literal["system", "user", "assistant"]
 MessageType = Literal["ai_response", "action_result"]
@@ -156,4 +159,5 @@ class EmbeddingModelResponse(LLMResponse):
 class ChatModelResponse(LLMResponse):
     """Standard response struct for a response from an LLM model."""
 
-    content: str = None
+    content: Optional[str] = None
+    function_call: Optional[OpenAIFunctionCall] = None
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 0a088d06..c5d5a945 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -3,6 +3,8 @@ from __future__ import annotations
 import time
 from typing import TYPE_CHECKING
 
+from autogpt.llm.providers.openai import get_openai_command_specs
+
 if TYPE_CHECKING:
     from autogpt.agent.agent import Agent
 
@@ -94,6 +96,7 @@ def chat_with_ai(
     current_tokens_used += count_message_tokens([user_input_msg], model)
 
     current_tokens_used += 500  # Reserve space for new_summary_message
+    current_tokens_used += 500  # Reserve space for the openai functions TODO improve
 
     # Add Messages until the token limit is reached or there are no more messages to add.
     for cycle in reversed(list(agent.history.per_cycle(agent.config))):
@@ -193,11 +196,12 @@ def chat_with_ai(
     assistant_reply = create_chat_completion(
         prompt=message_sequence,
         config=agent.config,
+        functions=get_openai_command_specs(agent),
         max_tokens=tokens_remaining,
     )
 
     # Update full message history
     agent.history.append(user_input_msg)
-    agent.history.add("assistant", assistant_reply, "ai_response")
+    agent.history.add("assistant", assistant_reply.content, "ai_response")
 
     return assistant_reply
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index ef384667..3c16f5cf 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -1,6 +1,9 @@
+from __future__ import annotations
+
 import functools
 import time
-from typing import List
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, List, Optional
 from unittest.mock import patch
 
 import openai
@@ -9,6 +12,9 @@ from colorama import Fore, Style
 from openai.error import APIError, RateLimitError, Timeout
 from openai.openai_object import OpenAIObject
 
+if TYPE_CHECKING:
+    from autogpt.agent.agent import Agent
+
 from autogpt.llm.base import (
     ChatModelInfo,
     EmbeddingModelInfo,
@@ -267,3 +273,78 @@ def create_embedding(
         input=input,
         **kwargs,
     )
+
+
+@dataclass
+class OpenAIFunctionCall:
+    """Represents a function call as generated by an OpenAI model
+
+    Attributes:
+        name: the name of the function that the LLM wants to call
+        arguments: a stringified JSON object (unverified) containing `arg: value` pairs
+    """
+
+    name: str
+    arguments: str
+
+
+@dataclass
+class OpenAIFunctionSpec:
+    """Represents a "function" in OpenAI, which is mapped to a Command in Auto-GPT"""
+
+    name: str
+    description: str
+    parameters: dict[str, ParameterSpec]
+
+    @dataclass
+    class ParameterSpec:
+        name: str
+        type: str
+        description: Optional[str]
+        required: bool = False
+
+    @property
+    def __dict__(self):
+        """Output an OpenAI-consumable function specification"""
+        return {
+            "name": self.name,
+            "description": self.description,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    param.name: {
+                        "type": param.type,
+                        "description": param.description,
+                    }
+                    for param in self.parameters.values()
+                },
+                "required": [
+                    param.name for param in self.parameters.values() if param.required
+                ],
+            },
+        }
+
+
+def get_openai_command_specs(agent: Agent) -> list[OpenAIFunctionSpec]:
+    """Get OpenAI-consumable function specs for the agent's available commands.
+    see https://platform.openai.com/docs/guides/gpt/function-calling
+    """
+    if not agent.config.openai_functions:
+        return []
+
+    return [
+        OpenAIFunctionSpec(
+            name=command.name,
+            description=command.description,
+            parameters={
+                param.name: OpenAIFunctionSpec.ParameterSpec(
+                    name=param.name,
+                    type=param.type,
+                    required=param.required,
+                    description=param.description,
+                )
+                for param in command.parameters
+            },
+        )
+        for command in agent.command_registry.commands.values()
+    ]
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index 3b0d3e17..41765314 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from dataclasses import asdict
 from typing import List, Literal, Optional
 
 from colorama import Fore
@@ -8,8 +9,13 @@ from autogpt.config import Config
 from autogpt.logs import logger
 
 from ..api_manager import ApiManager
-from ..base import ChatSequence, Message
+from ..base import ChatModelResponse, ChatSequence, Message
 from ..providers import openai as iopenai
+from ..providers.openai import (
+    OPEN_AI_CHAT_MODELS,
+    OpenAIFunctionCall,
+    OpenAIFunctionSpec,
+)
 from .token_counter import *
 
 
@@ -52,7 +58,7 @@ def call_ai_function(
             Message("user", arg_str),
         ],
     )
-    return create_chat_completion(prompt=prompt, temperature=0)
+    return create_chat_completion(prompt=prompt, temperature=0, config=config).content
 
 
 def create_text_completion(
@@ -88,10 +94,11 @@ def create_text_completion(
 def create_chat_completion(
     prompt: ChatSequence,
     config: Config,
+    functions: Optional[List[OpenAIFunctionSpec]] = None,
     model: Optional[str] = None,
     temperature: Optional[float] = None,
     max_tokens: Optional[int] = None,
-) -> str:
+) -> ChatModelResponse:
     """Create a chat completion using the OpenAI API
 
     Args:
@@ -103,6 +110,7 @@ def create_chat_completion(
     Returns:
         str: The response from the chat completion
     """
+
     if model is None:
         model = prompt.model.name
     if temperature is None:
@@ -134,6 +142,11 @@ def create_chat_completion(
         chat_completion_kwargs[
             "deployment_id"
         ] = config.get_azure_deployment_id_for_model(model)
+    if functions:
+        chat_completion_kwargs["functions"] = [
+            function.__dict__ for function in functions
+        ]
+        logger.debug(f"Function dicts: {chat_completion_kwargs['functions']}")
 
     response = iopenai.create_chat_completion(
         messages=prompt.raw(),
@@ -141,19 +154,24 @@ def create_chat_completion(
     )
     logger.debug(f"Response: {response}")
 
-    resp = ""
-    if not hasattr(response, "error"):
-        resp = response.choices[0].message["content"]
-    else:
+    if hasattr(response, "error"):
         logger.error(response.error)
         raise RuntimeError(response.error)
 
+    first_message = response.choices[0].message
+    content: str | None = first_message.get("content")
+    function_call: OpenAIFunctionCall | None = first_message.get("function_call")
+
     for plugin in config.plugins:
         if not plugin.can_handle_on_response():
             continue
-        resp = plugin.on_response(resp)
+        content = plugin.on_response(content)
 
-    return resp
+    return ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[model],
+        content=content,
+        function_call=function_call,
+    )
 
 
 def check_model(
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 4dba13dd..f3e1dc30 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -228,7 +228,7 @@ Latest Development:
             PROMPT_SUMMARY_FILE_NAME,
         )
 
-        self.summary = create_chat_completion(prompt, config)
+        self.summary = create_chat_completion(prompt, config).content
 
         self.agent.log_cycle_handler.log_cycle(
             self.agent.ai_name,
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
index f88bbcae..92cf414a 100644
--- a/autogpt/models/command.py
+++ b/autogpt/models/command.py
@@ -1,7 +1,9 @@
-from typing import Any, Callable, Dict, Optional
+from typing import Any, Callable, Optional
 
 from autogpt.config import Config
 
+from .command_parameter import CommandParameter
+
 
 class Command:
     """A class representing a command.
@@ -9,7 +11,7 @@ class Command:
     Attributes:
         name (str): The name of the command.
         description (str): A brief description of what the command does.
-        signature (str): The signature of the function that the command executes. Defaults to None.
+        parameters (list): The parameters of the function that the command executes.
     """
 
     def __init__(
@@ -17,14 +19,14 @@ class Command:
         name: str,
         description: str,
         method: Callable[..., Any],
-        signature: Dict[str, Dict[str, Any]],
+        parameters: list[CommandParameter],
         enabled: bool | Callable[[Config], bool] = True,
         disabled_reason: Optional[str] = None,
     ):
         self.name = name
         self.description = description
         self.method = method
-        self.signature = signature
+        self.parameters = parameters
         self.enabled = enabled
         self.disabled_reason = disabled_reason
 
@@ -38,4 +40,8 @@ class Command:
         return self.method(*args, **kwargs)
 
     def __str__(self) -> str:
-        return f"{self.name}: {self.description}, args: {self.signature}"
+        params = [
+            f"{param.name}: {param.type if param.required else f'Optional[{param.type}]'}"
+            for param in self.parameters
+        ]
+        return f"{self.name}: {self.description}, params: ({', '.join(params)})"
diff --git a/autogpt/models/command_parameter.py b/autogpt/models/command_parameter.py
new file mode 100644
index 00000000..ec130c87
--- /dev/null
+++ b/autogpt/models/command_parameter.py
@@ -0,0 +1,12 @@
+import dataclasses
+
+
+@dataclasses.dataclass
+class CommandParameter:
+    name: str
+    type: str
+    description: str
+    required: bool
+
+    def __repr__(self):
+        return f"CommandParameter('{self.name}', '{self.type}', '{self.description}', {self.required})"
diff --git a/autogpt/models/command_registry.py b/autogpt/models/command_registry.py
index 29d0143d..96418d26 100644
--- a/autogpt/models/command_registry.py
+++ b/autogpt/models/command_registry.py
@@ -15,6 +15,8 @@ class CommandRegistry:
     directory.
     """
 
+    commands: dict[str, Command]
+
     def __init__(self):
         self.commands = {}
 
diff --git a/autogpt/processing/text.py b/autogpt/processing/text.py
index 78eabf45..24851b1c 100644
--- a/autogpt/processing/text.py
+++ b/autogpt/processing/text.py
@@ -114,8 +114,8 @@ def summarize_text(
 
         logger.debug(f"Summarizing with {model}:\n{summarization_prompt.dump()}\n")
         summary = create_chat_completion(
-            summarization_prompt, config, temperature=0, max_tokens=500
-        )
+            prompt=summarization_prompt, config=config, temperature=0, max_tokens=500
+        ).content
 
         logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
         return summary.strip(), None
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index 2a0334bf..3fff9536 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,6 +1,8 @@
 """ A module for generating custom prompt strings."""
+import json
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
+from autogpt.config import Config
 from autogpt.json_utils.utilities import llm_response_schema
 
 if TYPE_CHECKING:
@@ -127,7 +129,7 @@ class PromptGenerator:
         else:
             return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
 
-    def generate_prompt_string(self) -> str:
+    def generate_prompt_string(self, config: Config) -> str:
         """
         Generate a prompt string based on the constraints, commands, resources,
             and performance evaluations.
@@ -137,11 +139,26 @@ class PromptGenerator:
         """
         return (
             f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
-            "Commands:\n"
-            f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
+            f"{generate_commands(self, config)}"
             f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
             "Performance Evaluation:\n"
             f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
             "Respond with only valid JSON conforming to the following schema: \n"
-            f"{llm_response_schema()}\n"
+            f"{json.dumps(llm_response_schema(config))}\n"
         )
+
+
+def generate_commands(self, config: Config) -> str:
+    """
+    Generate a prompt string based on the constraints, commands, resources,
+        and performance evaluations.
+
+    Returns:
+        str: The generated prompt string.
+    """
+    if config.openai_functions:
+        return ""
+    return (
+        "Commands:\n"
+        f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
+    )
diff --git a/autogpt/setup.py b/autogpt/setup.py
index 2fe8b3a9..f17a91e0 100644
--- a/autogpt/setup.py
+++ b/autogpt/setup.py
@@ -185,7 +185,7 @@ def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
             ],
         ),
         config,
-    )
+    ).content
 
     # Debug LLM Output
     logger.debug(f"AI Config Generator Raw Output: {output}")
diff --git a/tests/unit/test_agent_manager.py b/tests/unit/test_agent_manager.py
index a372b726..7140db05 100644
--- a/tests/unit/test_agent_manager.py
+++ b/tests/unit/test_agent_manager.py
@@ -1,7 +1,9 @@
 import pytest
 
 from autogpt.agent.agent_manager import AgentManager
+from autogpt.llm import ChatModelResponse
 from autogpt.llm.chat import create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 
 
 @pytest.fixture
@@ -27,12 +29,16 @@ def model():
 
 
 @pytest.fixture(autouse=True)
-def mock_create_chat_completion(mocker):
+def mock_create_chat_completion(mocker, config):
     mock_create_chat_completion = mocker.patch(
         "autogpt.agent.agent_manager.create_chat_completion",
         wraps=create_chat_completion,
     )
-    mock_create_chat_completion.return_value = "irrelevant"
+    mock_create_chat_completion.return_value = ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[config.fast_llm_model],
+        content="irrelevant",
+        function_call={},
+    )
     return mock_create_chat_completion
 
 
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index cb3f539a..9b52cead 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -5,10 +5,13 @@ from pathlib import Path
 
 import pytest
 
-from autogpt.models.command import Command
+from autogpt.models.command import Command, CommandParameter
 from autogpt.models.command_registry import CommandRegistry
 
-SIGNATURE = "(arg1: int, arg2: str) -> str"
+PARAMETERS = [
+    CommandParameter("arg1", "int", description="Argument 1", required=True),
+    CommandParameter("arg2", "str", description="Argument 2", required=False),
+]
 
 
 class TestCommand:
@@ -26,13 +29,16 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         assert cmd.name == "example"
         assert cmd.description == "Example command"
         assert cmd.method == self.example_command_method
-        assert cmd.signature == "(arg1: int, arg2: str) -> str"
+        assert (
+            str(cmd)
+            == "example: Example command, params: (arg1: int, arg2: Optional[str])"
+        )
 
     def test_command_call(self):
         """Test that Command(*args) calls and returns the result of method(*args)."""
@@ -41,13 +47,14 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature={
-                "prompt": {
-                    "type": "string",
-                    "description": "The prompt used to generate the image",
-                    "required": True,
-                },
-            },
+            parameters=[
+                CommandParameter(
+                    name="prompt",
+                    type="string",
+                    description="The prompt used to generate the image",
+                    required=True,
+                ),
+            ],
         )
         result = cmd(arg1=1, arg2="test")
         assert result == "1 - test"
@@ -58,22 +65,11 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
         with pytest.raises(TypeError):
             cmd(arg1="invalid", does_not_exist="test")
 
-    def test_command_custom_signature(self):
-        custom_signature = "custom_arg1: int, custom_arg2: str"
-        cmd = Command(
-            name="example",
-            description="Example command",
-            method=self.example_command_method,
-            signature=custom_signature,
-        )
-
-        assert cmd.signature == custom_signature
-
 
 class TestCommandRegistry:
     @staticmethod
@@ -87,7 +83,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -102,7 +98,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -117,7 +113,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -139,7 +135,7 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
@@ -161,13 +157,13 @@ class TestCommandRegistry:
             name="example",
             description="Example command",
             method=self.example_command_method,
-            signature=SIGNATURE,
+            parameters=PARAMETERS,
         )
 
         registry.register(cmd)
         command_prompt = registry.command_prompt()
 
-        assert f"(arg1: int, arg2: str)" in command_prompt
+        assert f"(arg1: int, arg2: Optional[str])" in command_prompt
 
     def test_import_mock_commands_module(self):
         """Test that the registry can import a module with mock command plugins."""
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
index 14b60895..a3650005 100644
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -7,7 +7,7 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.config import AIConfig
 from autogpt.config.config import Config
-from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.memory.message_history import MessageHistory
@@ -45,10 +45,14 @@ def test_message_history_batch_summary(mocker, agent, config):
     message_count = 0
 
     # Setting the mock output and inputs
-    mock_summary_text = "I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings."
+    mock_summary_response = ChatModelResponse(
+        model_info=OPEN_AI_CHAT_MODELS[model],
+        content="I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings.",
+        function_call={},
+    )
     mock_summary = mocker.patch(
         "autogpt.memory.message_history.create_chat_completion",
-        return_value=mock_summary_text,
+        return_value=mock_summary_response,
     )
 
     system_prompt = 'You are AIJobSearcher, an AI designed to search for job openings for software engineer role\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. Find any job openings for software engineers online\n2. Go through each of the websites and job openings to summarize their requirements and URL, and skip that if you already visit the website\n\nIt takes money to let you run. Your API budget is $5.000\n\nConstraints:\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n3. No user assistance\n4. Exclusively use the commands listed in double quotes e.g. "command name"\n\nCommands:\n1. google_search: Google Search, args: "query": "<query>"\n2. browse_website: Browse Website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"\n3. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"\n\nResources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou should only respond in JSON format as described below \nResponse Format: \n{\n    "thoughts": {\n        "text": "thought",\n        "reasoning": "reasoning",\n        "plan": "- short bulleted\\n- list that conveys\\n- long-term plan",\n        "criticism": "constructive self-criticism",\n        "speak": "thoughts summary to say to user"\n    },\n    "command": {\n        "name": "command name",\n        "args": {\n            "arg name": "value"\n        }\n    }\n} \nEnsure the response can be parsed by Python json.loads'
@@ -139,6 +143,6 @@ def test_message_history_batch_summary(mocker, agent, config):
     assert new_summary_message == Message(
         role="system",
         content="This reminds you of these events from your past: \n"
-        + mock_summary_text,
+        + mock_summary_response.content,
         type=None,
     )
diff --git a/tests/unit/test_prompt_generator.py b/tests/unit/test_prompt_generator.py
index 1fa1754d..c5ffaf78 100644
--- a/tests/unit/test_prompt_generator.py
+++ b/tests/unit/test_prompt_generator.py
@@ -1,115 +1,152 @@
-from unittest import TestCase
-
 from autogpt.prompts.generator import PromptGenerator
 
 
-class TestPromptGenerator(TestCase):
+def test_add_constraint():
     """
-    Test cases for the PromptGenerator class, which is responsible for generating
-    prompts for the AI with constraints, commands, resources, and performance evaluations.
+    Test if the add_constraint() method adds a constraint to the generator's constraints list.
+    """
+    constraint = "Constraint1"
+    generator = PromptGenerator()
+    generator.add_constraint(constraint)
+    assert constraint in generator.constraints
+
+
+def test_add_command():
+    """
+    Test if the add_command() method adds a command to the generator's commands list.
+    """
+    command_label = "Command Label"
+    command_name = "command_name"
+    args = {"arg1": "value1", "arg2": "value2"}
+    generator = PromptGenerator()
+    generator.add_command(command_label, command_name, args)
+    command = {
+        "label": command_label,
+        "name": command_name,
+        "args": args,
+        "function": None,
+    }
+    assert command in generator.commands
+
+
+def test_add_resource():
+    """
+    Test if the add_resource() method adds a resource to the generator's resources list.
+    """
+    resource = "Resource1"
+    generator = PromptGenerator()
+    generator.add_resource(resource)
+    assert resource in generator.resources
+
+
+def test_add_performance_evaluation():
+    """
+    Test if the add_performance_evaluation() method adds an evaluation to the generator's
+    performance_evaluation list.
+    """
+    evaluation = "Evaluation1"
+    generator = PromptGenerator()
+    generator.add_performance_evaluation(evaluation)
+    assert evaluation in generator.performance_evaluation
+
+
+def test_generate_prompt_string(config):
+    """
+    Test if the generate_prompt_string() method generates a prompt string with all the added
+    constraints, commands, resources, and evaluations.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        """
-        Set up the initial state for each test method by creating an instance of PromptGenerator.
-        """
-        cls.generator = PromptGenerator()
+    # Define the test data
+    constraints = ["Constraint1", "Constraint2"]
+    commands = [
+        {
+            "label": "Command1",
+            "name": "command_name1",
+            "args": {"arg1": "value1"},
+        },
+        {
+            "label": "Command2",
+            "name": "command_name2",
+            "args": {},
+        },
+    ]
+    resources = ["Resource1", "Resource2"]
+    evaluations = ["Evaluation1", "Evaluation2"]
 
-    # Test whether the add_constraint() method adds a constraint to the generator's constraints list
-    def test_add_constraint(self):
-        """
-        Test if the add_constraint() method adds a constraint to the generator's constraints list.
-        """
-        constraint = "Constraint1"
-        self.generator.add_constraint(constraint)
-        self.assertIn(constraint, self.generator.constraints)
+    # Add test data to the generator
+    generator = PromptGenerator()
+    for constraint in constraints:
+        generator.add_constraint(constraint)
+    for command in commands:
+        generator.add_command(command["label"], command["name"], command["args"])
+    for resource in resources:
+        generator.add_resource(resource)
+    for evaluation in evaluations:
+        generator.add_performance_evaluation(evaluation)
 
-    # Test whether the add_command() method adds a command to the generator's commands list
-    def test_add_command(self):
-        """
-        Test if the add_command() method adds a command to the generator's commands list.
-        """
-        command_label = "Command Label"
-        command_name = "command_name"
-        args = {"arg1": "value1", "arg2": "value2"}
-        self.generator.add_command(command_label, command_name, args)
-        command = {
-            "label": command_label,
-            "name": command_name,
-            "args": args,
-            "function": None,
-        }
-        self.assertIn(command, self.generator.commands)
+    # Generate the prompt string and verify its correctness
+    prompt_string = generator.generate_prompt_string(config)
+    assert prompt_string is not None
 
-    def test_add_resource(self):
-        """
-        Test if the add_resource() method adds a resource to the generator's resources list.
-        """
-        resource = "Resource1"
-        self.generator.add_resource(resource)
-        self.assertIn(resource, self.generator.resources)
+    # Check if all constraints, commands, resources, and evaluations are present in the prompt string
+    for constraint in constraints:
+        assert constraint in prompt_string
+    for command in commands:
+        assert command["name"] in prompt_string
+        for key, value in command["args"].items():
+            assert f'"{key}": "{value}"' in prompt_string
+    for resource in resources:
+        assert resource in prompt_string
+    for evaluation in evaluations:
+        assert evaluation in prompt_string
 
-    def test_add_performance_evaluation(self):
-        """
-        Test if the add_performance_evaluation() method adds an evaluation to the generator's
-        performance_evaluation list.
-        """
-        evaluation = "Evaluation1"
-        self.generator.add_performance_evaluation(evaluation)
-        self.assertIn(evaluation, self.generator.performance_evaluation)
 
-    def test_generate_prompt_string(self):
-        """
-        Test if the generate_prompt_string() method generates a prompt string with all the added
-        constraints, commands, resources, and evaluations.
-        """
-        # Define the test data
-        constraints = ["Constraint1", "Constraint2"]
-        commands = [
-            {
-                "label": "Command1",
-                "name": "command_name1",
-                "args": {"arg1": "value1"},
-            },
-            {
-                "label": "Command2",
-                "name": "command_name2",
-                "args": {},
-            },
-        ]
-        resources = ["Resource1", "Resource2"]
-        evaluations = ["Evaluation1", "Evaluation2"]
+def test_generate_prompt_string(config):
+    """
+    Test if the generate_prompt_string() method generates a prompt string with all the added
+    constraints, commands, resources, and evaluations.
+    """
 
-        # Add test data to the generator
-        for constraint in constraints:
-            self.generator.add_constraint(constraint)
-        for command in commands:
-            self.generator.add_command(
-                command["label"], command["name"], command["args"]
-            )
-        for resource in resources:
-            self.generator.add_resource(resource)
-        for evaluation in evaluations:
-            self.generator.add_performance_evaluation(evaluation)
+    # Define the test data
+    constraints = ["Constraint1", "Constraint2"]
+    commands = [
+        {
+            "label": "Command1",
+            "name": "command_name1",
+            "args": {"arg1": "value1"},
+        },
+        {
+            "label": "Command2",
+            "name": "command_name2",
+            "args": {},
+        },
+    ]
+    resources = ["Resource1", "Resource2"]
+    evaluations = ["Evaluation1", "Evaluation2"]
 
-        # Generate the prompt string and verify its correctness
-        prompt_string = self.generator.generate_prompt_string()
-        self.assertIsNotNone(prompt_string)
+    # Add test data to the generator
+    generator = PromptGenerator()
+    for constraint in constraints:
+        generator.add_constraint(constraint)
+    for command in commands:
+        generator.add_command(command["label"], command["name"], command["args"])
+    for resource in resources:
+        generator.add_resource(resource)
+    for evaluation in evaluations:
+        generator.add_performance_evaluation(evaluation)
 
-        # Check if all constraints, commands, resources, and evaluations are present in the prompt string
-        for constraint in constraints:
-            self.assertIn(constraint, prompt_string)
-        for command in commands:
-            self.assertIn(command["name"], prompt_string)
-            for key, value in command["args"].items():
-                self.assertIn(f'"{key}": "{value}"', prompt_string)
-        for resource in resources:
-            self.assertIn(resource, prompt_string)
-        for evaluation in evaluations:
-            self.assertIn(evaluation, prompt_string)
+    # Generate the prompt string and verify its correctness
+    prompt_string = generator.generate_prompt_string(config)
+    assert prompt_string is not None
 
-        self.assertIn("constraints", prompt_string.lower())
-        self.assertIn("commands", prompt_string.lower())
-        self.assertIn("resources", prompt_string.lower())
-        self.assertIn("performance evaluation", prompt_string.lower())
+    # Check if all constraints, commands, resources, and evaluations are present in the prompt string
+    for constraint in constraints:
+        assert constraint in prompt_string
+    for command in commands:
+        assert command["name"] in prompt_string
+        for key, value in command["args"].items():
+            assert f'"{key}": "{value}"' in prompt_string
+    for resource in resources:
+        assert resource in prompt_string
+    for evaluation in evaluations:
+        assert evaluation in prompt_string