Agent loop v2: Planning & Task Management (part 1: refactoring) (#4799)

* Move rename module `agent` -> `agents` * WIP: abstract agent structure into base class and port Agent * Move command arg path sanitization to decorator * Add fallback token limit in llm.utils.create_chat_completion * Rebase `MessageHistory` class on `ChatSequence` class * Fix linting * Consolidate logging modules * Wham Bam Boom * Fix tests & linting complaints * Update Agent class docstring * Fix Agent import in autogpt.llm.providers.openai * Fix agent kwarg in test_execute_code.py * Fix benchmarks.py * Clean up lingering Agent(ai_name=...) initializations * Fix agent kwarg * Make sanitize_path_arg decorator more robust * Fix linting * Fix command enabling lambda's * Use relative paths in file ops logger * Fix test_execute_python_file_not_found * Fix Config model validation breaking on .plugins * Define validator for Config.plugins * Fix Config model issues * Fix agent iteration budget in testing * Fix declaration of context_while_think * Fix Agent.parse_and_process_response signature * Fix Agent cycle_budget usages * Fix budget checking in BaseAgent.__next__ * Fix cycle budget initialization * Fix function calling in BaseAgent.think() * Include functions in token length calculation * Fix Config errors * Add debug thing to patched_api_requestor to investigate HTTP 400 errors * If this works I'm gonna be sad * Fix BaseAgent cycle budget logic and document attributes * Document attributes on `Agent` * Fix import issues between Agent and MessageHistory * Improve typing * Extract application code from the agent (#4982) * Extract application code from the agent * Wrap interaction loop in a function and call in benchmarks * Forgot the important function call * Add docstrings and inline comments to run loop * Update typing and docstrings in agent * Docstring formatting * Separate prompt construction from on_before_think * Use `self.default_cycle_instruction` in `Agent.think()` * Fix formatting * hot fix the SIGINT handler (#4997) The signal handler in the autogpt/main.py doesn't work properly because of the clean_input(...) func. This commit remedies this issue. The issue is mentioned in 3966cdfd69 (r1264278776) * Update the sigint handler to be smart enough to actually work (#4999) * Update the sigint handler to be smart enough to actually work * Update autogpt/main.py Co-authored-by: Reinier van der Leer <github@pwuts.nl> * Can still use context manager * Merge in upstream --------- Co-authored-by: Reinier van der Leer <github@pwuts.nl> * Fix CI * Fix initial prompt construction * off by one error * allow exit/EXIT to shut down app * Remove dead code --------- Co-authored-by: collijk <collijk@uw.edu> Co-authored-by: Cyrus <39694513+cyrus-hawk@users.noreply.github.com>
2026-02-11 01:04:20 +01:00 · 2023-07-20 17:34:49 +02:00
parent 08a1e22973
commit db95d4cb84
24 changed files with 855 additions and 589 deletions
--- a/autogpt/agents/init.py
+++ b/autogpt/agents/init.py
@@ -1,3 +1,4 @@
 from .agent import Agent
+from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName

-__all__ = ["Agent"]
+__all__ = ["BaseAgent", "Agent", "CommandName", "CommandArgs", "AgentThoughts"]
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -1,315 +1,215 @@
+from __future__ import annotations
+
 import json
-import signal
-import sys
+import time
 from datetime import datetime
 from pathlib import Path
+from typing import TYPE_CHECKING, Any, Optional

-from colorama import Fore, Style
+if TYPE_CHECKING:
+    from autogpt.config import AIConfig, Config
+    from autogpt.llm.base import ChatModelResponse, ChatSequence
+    from autogpt.memory.vector import VectorMemory
+    from autogpt.models.command_registry import CommandRegistry

-from autogpt.config import Config
-from autogpt.config.ai_config import AIConfig
-from autogpt.json_utils.utilities import extract_json_from_response, validate_json
-from autogpt.llm import ChatModelResponse
-from autogpt.llm.chat import chat_with_ai
-from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.base import Message
 from autogpt.llm.utils import count_string_tokens
-from autogpt.logs import (
+from autogpt.logs import logger
+from autogpt.logs.log_cycle import (
    FULL_MESSAGE_HISTORY_FILE_NAME,
    NEXT_ACTION_FILE_NAME,
    USER_INPUT_FILE_NAME,
    LogCycleHandler,
-    logger,
-    print_assistant_thoughts,
-    remove_ansi_escape,
 )
-from autogpt.memory.message_history import MessageHistory
-from autogpt.memory.vector import VectorMemory
-from autogpt.models.command_registry import CommandRegistry
-from autogpt.speech import say_text
-from autogpt.spinner import Spinner
-from autogpt.utils import clean_input
 from autogpt.workspace import Workspace

+from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName

-class Agent:
-    """Agent class for interacting with Auto-GPT.

-    Attributes:
-        ai_name: The name of the agent.
-        memory: The memory object to use.
-        next_action_count: The number of actions to execute.
-        system_prompt: The system prompt is the initial prompt that defines everything
-          the AI needs to know to achieve its task successfully.
-        Currently, the dynamic and customizable information in the system prompt are
-          ai_name, description and goals.
-
-        triggering_prompt: The last sentence the AI will see before answering.
-            For Auto-GPT, this prompt is:
-            Determine exactly one command to use, and respond using the format specified
-              above:
-            The triggering prompt is not part of the system prompt because between the
-              system prompt and the triggering
-            prompt we have contextual information that can distract the AI and make it
-              forget that its goal is to find the next task to achieve.
-            SYSTEM PROMPT
-            CONTEXTUAL INFORMATION (memory, previous conversations, anything relevant)
-            TRIGGERING PROMPT
-
-        The triggering prompt reminds the AI about its short term meta task
-        (defining the next task)
-    """
+class Agent(BaseAgent):
+    """Agent class for interacting with Auto-GPT."""

    def __init__(
        self,
-        ai_name: str,
-        memory: VectorMemory,
-        next_action_count: int,
-        command_registry: CommandRegistry,
        ai_config: AIConfig,
-        system_prompt: str,
+        command_registry: CommandRegistry,
+        memory: VectorMemory,
        triggering_prompt: str,
        workspace_directory: str | Path,
        config: Config,
+        cycle_budget: Optional[int] = None,
    ):
-        self.ai_name = ai_name
+        super().__init__(
+            ai_config=ai_config,
+            command_registry=command_registry,
+            config=config,
+            default_cycle_instruction=triggering_prompt,
+            cycle_budget=cycle_budget,
+        )
+
        self.memory = memory
-        self.history = MessageHistory.for_model(config.smart_llm, agent=self)
-        self.next_action_count = next_action_count
-        self.command_registry = command_registry
-        self.config = config
-        self.ai_config = ai_config
-        self.system_prompt = system_prompt
-        self.triggering_prompt = triggering_prompt
+        """VectorMemoryProvider used to manage the agent's context (TODO)"""
+
        self.workspace = Workspace(workspace_directory, config.restrict_to_workspace)
+        """Workspace that the agent has access to, e.g. for reading/writing files."""
+
        self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
-        self.cycle_count = 0
+        """Timestamp the agent was created; only used for structured debug logging."""
+
        self.log_cycle_handler = LogCycleHandler()
-        self.smart_token_limit = OPEN_AI_CHAT_MODELS.get(config.smart_llm).max_tokens
+        """LogCycleHandler for structured debug logging."""

-    def start_interaction_loop(self):
-        # Interaction Loop
-        self.cycle_count = 0
-        command_name = None
-        arguments = None
-        user_input = ""
+    def construct_base_prompt(self, *args, **kwargs) -> ChatSequence:
+        if kwargs.get("prepend_messages") is None:
+            kwargs["prepend_messages"] = []

-        # Signal handler for interrupting y -N
-        def signal_handler(signum, frame):
-            if self.next_action_count == 0:
-                sys.exit()
-            else:
-                print(
-                    Fore.RED
-                    + "Interrupt signal received. Stopping continuous command execution."
-                    + Style.RESET_ALL
-                )
-                self.next_action_count = 0
+        # Clock
+        kwargs["prepend_messages"].append(
+            Message("system", f"The current time and date is {time.strftime('%c')}"),
+        )

-        signal.signal(signal.SIGINT, signal_handler)
+        # Add budget information (if any) to prompt
+        api_manager = ApiManager()
+        if api_manager.get_total_budget() > 0.0:
+            remaining_budget = (
+                api_manager.get_total_budget() - api_manager.get_total_cost()
+            )
+            if remaining_budget < 0:
+                remaining_budget = 0

-        while True:
-            # Discontinue if continuous limit is reached
-            self.cycle_count += 1
-            self.log_cycle_handler.log_count_within_cycle = 0
+            budget_msg = Message(
+                "system",
+                f"Your remaining API budget is ${remaining_budget:.3f}"
+                + (
+                    " BUDGET EXCEEDED! SHUT DOWN!\n\n"
+                    if remaining_budget == 0
+                    else " Budget very nearly exceeded! Shut down gracefully!\n\n"
+                    if remaining_budget < 0.005
+                    else " Budget nearly exceeded. Finish up.\n\n"
+                    if remaining_budget < 0.01
+                    else ""
+                ),
+            )
+            logger.debug(budget_msg)
+
+            if kwargs.get("append_messages") is None:
+                kwargs["append_messages"] = []
+            kwargs["append_messages"].append(budget_msg)
+
+        return super().construct_base_prompt(*args, **kwargs)
+
+    def on_before_think(self, *args, **kwargs) -> ChatSequence:
+        prompt = super().on_before_think(*args, **kwargs)
+
+        self.log_cycle_handler.log_count_within_cycle = 0
+        self.log_cycle_handler.log_cycle(
+            self.ai_config.ai_name,
+            self.created_at,
+            self.cycle_count,
+            self.history.raw(),
+            FULL_MESSAGE_HISTORY_FILE_NAME,
+        )
+        return prompt
+
+    def execute(
+        self,
+        command_name: str | None,
+        command_args: dict[str, str] | None,
+        user_input: str | None,
+    ) -> str:
+        # Execute command
+        if command_name is not None and command_name.lower().startswith("error"):
+            result = f"Could not execute command: {command_name}{command_args}"
+        elif command_name == "human_feedback":
+            result = f"Human feedback: {user_input}"
            self.log_cycle_handler.log_cycle(
                self.ai_config.ai_name,
                self.created_at,
                self.cycle_count,
-                [m.raw() for m in self.history],
-                FULL_MESSAGE_HISTORY_FILE_NAME,
+                user_input,
+                USER_INPUT_FILE_NAME,
            )
-            if (
-                self.config.continuous_mode
-                and self.config.continuous_limit > 0
-                and self.cycle_count > self.config.continuous_limit
-            ):
-                logger.typewriter_log(
-                    "Continuous Limit Reached: ",
-                    Fore.YELLOW,
-                    f"{self.config.continuous_limit}",
-                )
-                break
-            # Send message to AI, get response
-            with Spinner("Thinking... ", plain_output=self.config.plain_output):
-                assistant_reply = chat_with_ai(
-                    self.config,
-                    self,
-                    self.system_prompt,
-                    self.triggering_prompt,
-                    self.smart_token_limit,
-                    self.config.smart_llm,
-                )

-            try:
-                assistant_reply_json = extract_json_from_response(
-                    assistant_reply.content
-                )
-                validate_json(assistant_reply_json, self.config)
-            except json.JSONDecodeError as e:
-                logger.error(f"Exception while validating assistant reply JSON: {e}")
-                assistant_reply_json = {}
+        else:
+            for plugin in self.config.plugins:
+                if not plugin.can_handle_pre_command():
+                    continue
+                command_name, arguments = plugin.pre_command(command_name, command_args)
+            command_result = execute_command(
+                command_name=command_name,
+                arguments=command_args,
+                agent=self,
+            )
+            result = f"Command {command_name} returned: " f"{command_result}"
+
+            result_tlength = count_string_tokens(str(command_result), self.llm.name)
+            memory_tlength = count_string_tokens(
+                str(self.history.summary_message()), self.llm.name
+            )
+            if result_tlength + memory_tlength > self.send_token_limit:
+                result = f"Failure: command {command_name} returned too much output. \
+                    Do not execute this command again with the same arguments."

            for plugin in self.config.plugins:
-                if not plugin.can_handle_post_planning():
+                if not plugin.can_handle_post_command():
                    continue
-                assistant_reply_json = plugin.post_planning(assistant_reply_json)
+                result = plugin.post_command(command_name, result)
+        # Check if there's a result from the command append it to the message
+        if result is None:
+            self.history.add("system", "Unable to execute command", "action_result")
+        else:
+            self.history.add("system", result, "action_result")

-            # Print Assistant thoughts
-            if assistant_reply_json != {}:
-                # Get command name and arguments
-                try:
-                    print_assistant_thoughts(
-                        self.ai_name, assistant_reply_json, self.config
-                    )
-                    command_name, arguments = extract_command(
-                        assistant_reply_json, assistant_reply, self.config
-                    )
-                    if self.config.speak_mode:
-                        say_text(f"I want to execute {command_name}", self.config)
+        return result

-                except Exception as e:
-                    logger.error("Error: \n", str(e))
-            self.log_cycle_handler.log_cycle(
-                self.ai_config.ai_name,
-                self.created_at,
-                self.cycle_count,
-                assistant_reply_json,
-                NEXT_ACTION_FILE_NAME,
+    def parse_and_process_response(
+        self, llm_response: ChatModelResponse, *args, **kwargs
+    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+        if not llm_response.content:
+            raise SyntaxError("Assistant response has no text content")
+
+        assistant_reply_dict = extract_dict_from_response(llm_response.content)
+
+        valid, errors = validate_dict(assistant_reply_dict, self.config)
+        if not valid:
+            raise SyntaxError(
+                "Validation of response failed:\n  "
+                + ";\n  ".join([str(e) for e in errors])
            )

-            # First log new-line so user can differentiate sections better in console
-            logger.typewriter_log("\n")
-            logger.typewriter_log(
-                "NEXT ACTION: ",
-                Fore.CYAN,
-                f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
-                f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
-            )
+        for plugin in self.config.plugins:
+            if not plugin.can_handle_post_planning():
+                continue
+            assistant_reply_dict = plugin.post_planning(assistant_reply_dict)

-            if not self.config.continuous_mode and self.next_action_count == 0:
-                # ### GET USER AUTHORIZATION TO EXECUTE COMMAND ###
-                # Get key press: Prompt the user to press enter to continue or escape
-                # to exit
-                self.user_input = ""
-                logger.info(
-                    f"Enter '{self.config.authorise_key}' to authorise command, "
-                    f"'{self.config.authorise_key} -N' to run N continuous commands, "
-                    f"'{self.config.exit_key}' to exit program, or enter feedback for "
-                    f"{self.ai_name}..."
-                )
-                while True:
-                    if self.config.chat_messages_enabled:
-                        console_input = clean_input(
-                            self.config, "Waiting for your response..."
-                        )
-                    else:
-                        console_input = clean_input(
-                            self.config, Fore.MAGENTA + "Input:" + Style.RESET_ALL
-                        )
-                    if console_input.lower().strip() == self.config.authorise_key:
-                        user_input = "GENERATE NEXT COMMAND JSON"
-                        break
-                    elif console_input.lower().strip() == "":
-                        logger.warn("Invalid input format.")
-                        continue
-                    elif console_input.lower().startswith(
-                        f"{self.config.authorise_key} -"
-                    ):
-                        try:
-                            self.next_action_count = abs(
-                                int(console_input.split(" ")[1])
-                            )
-                            user_input = "GENERATE NEXT COMMAND JSON"
-                        except ValueError:
-                            logger.warn(
-                                f"Invalid input format. Please enter '{self.config.authorise_key} -n' "
-                                "where n is the number of continuous tasks."
-                            )
-                            continue
-                        break
-                    elif console_input.lower() == self.config.exit_key:
-                        user_input = "EXIT"
-                        break
-                    else:
-                        user_input = console_input
-                        command_name = "human_feedback"
-                        self.log_cycle_handler.log_cycle(
-                            self.ai_config.ai_name,
-                            self.created_at,
-                            self.cycle_count,
-                            user_input,
-                            USER_INPUT_FILE_NAME,
-                        )
-                        break
+        response = None, None, assistant_reply_dict

-                if user_input == "GENERATE NEXT COMMAND JSON":
-                    logger.typewriter_log(
-                        "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=",
-                        Fore.MAGENTA,
-                        "",
-                    )
-                elif user_input == "EXIT":
-                    logger.info("Exiting...")
-                    break
-            else:
-                # First log new-line so user can differentiate sections better in console
-                logger.typewriter_log("\n")
-                # Print authorized commands left value
-                logger.typewriter_log(
-                    f"{Fore.CYAN}AUTHORISED COMMANDS LEFT: {Style.RESET_ALL}{self.next_action_count}"
+        # Print Assistant thoughts
+        if assistant_reply_dict != {}:
+            # Get command name and arguments
+            try:
+                command_name, arguments = extract_command(
+                    assistant_reply_dict, llm_response, self.config
                )
+                response = command_name, arguments, assistant_reply_dict
+            except Exception as e:
+                logger.error("Error: \n", str(e))

-            # Execute command
-            if command_name is not None and command_name.lower().startswith("error"):
-                result = f"Could not execute command: {arguments}"
-            elif command_name == "human_feedback":
-                result = f"Human feedback: {user_input}"
-            else:
-                for plugin in self.config.plugins:
-                    if not plugin.can_handle_pre_command():
-                        continue
-                    command_name, arguments = plugin.pre_command(
-                        command_name, arguments
-                    )
-                command_result = execute_command(
-                    command_name=command_name,
-                    arguments=arguments,
-                    agent=self,
-                )
-                result = f"Command {command_name} returned: " f"{command_result}"
-
-                result_tlength = count_string_tokens(
-                    str(command_result), self.config.smart_llm
-                )
-                memory_tlength = count_string_tokens(
-                    str(self.history.summary_message()), self.config.smart_llm
-                )
-                if result_tlength + memory_tlength + 600 > self.smart_token_limit:
-                    result = f"Failure: command {command_name} returned too much output. \
-                        Do not execute this command again with the same arguments."
-
-                for plugin in self.config.plugins:
-                    if not plugin.can_handle_post_command():
-                        continue
-                    result = plugin.post_command(command_name, result)
-                if self.next_action_count > 0:
-                    self.next_action_count -= 1
-
-            # Check if there's a result from the command append it to the message
-            # history
-            if result is not None:
-                self.history.add("system", result, "action_result")
-                logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result)
-            else:
-                self.history.add("system", "Unable to execute command", "action_result")
-                logger.typewriter_log(
-                    "SYSTEM: ", Fore.YELLOW, "Unable to execute command"
-                )
+        self.log_cycle_handler.log_cycle(
+            self.ai_config.ai_name,
+            self.created_at,
+            self.cycle_count,
+            assistant_reply_dict,
+            NEXT_ACTION_FILE_NAME,
+        )
+        return response


 def extract_command(
    assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
-):
+) -> tuple[str, dict[str, str]]:
    """Parse the response and return the command name and arguments

    Args:
@@ -327,27 +227,29 @@ def extract_command(
    """
    if config.openai_functions:
        if assistant_reply.function_call is None:
-            return "Error:", "No 'function_call' in assistant reply"
+            return "Error:", {"message": "No 'function_call' in assistant reply"}
        assistant_reply_json["command"] = {
            "name": assistant_reply.function_call.name,
            "args": json.loads(assistant_reply.function_call.arguments),
        }
    try:
        if "command" not in assistant_reply_json:
-            return "Error:", "Missing 'command' object in JSON"
+            return "Error:", {"message": "Missing 'command' object in JSON"}

        if not isinstance(assistant_reply_json, dict):
            return (
                "Error:",
-                f"The previous message sent was not a dictionary {assistant_reply_json}",
+                {
+                    "message": f"The previous message sent was not a dictionary {assistant_reply_json}"
+                },
            )

        command = assistant_reply_json["command"]
        if not isinstance(command, dict):
-            return "Error:", "'command' object is not a dictionary"
+            return "Error:", {"message": "'command' object is not a dictionary"}

        if "name" not in command:
-            return "Error:", "Missing 'name' field in 'command' object"
+            return "Error:", {"message": "Missing 'name' field in 'command' object"}

        command_name = command["name"]

@@ -356,17 +258,17 @@ def extract_command(

        return command_name, arguments
    except json.decoder.JSONDecodeError:
-        return "Error:", "Invalid JSON"
+        return "Error:", {"message": "Invalid JSON"}
    # All other errors, return "Error: + error message"
    except Exception as e:
-        return "Error:", str(e)
+        return "Error:", {"message": str(e)}


 def execute_command(
    command_name: str,
    arguments: dict[str, str],
    agent: Agent,
-):
+) -> Any:
    """Execute the command and return the result

    Args:
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -0,0 +1,318 @@
+from __future__ import annotations
+
+from abc import ABCMeta, abstractmethod
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:
+    from autogpt.config import AIConfig, Config
+
+    from autogpt.models.command_registry import CommandRegistry
+
+from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, get_openai_command_specs
+from autogpt.llm.utils import count_message_tokens, create_chat_completion
+from autogpt.logs import logger
+from autogpt.memory.message_history import MessageHistory
+from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
+
+CommandName = str
+CommandArgs = dict[str, str]
+AgentThoughts = dict[str, Any]
+
+
+class BaseAgent(metaclass=ABCMeta):
+    """Base class for all Auto-GPT agents."""
+
+    def __init__(
+        self,
+        ai_config: AIConfig,
+        command_registry: CommandRegistry,
+        config: Config,
+        big_brain: bool = True,
+        default_cycle_instruction: str = DEFAULT_TRIGGERING_PROMPT,
+        cycle_budget: Optional[int] = 1,
+        send_token_limit: Optional[int] = None,
+        summary_max_tlength: Optional[int] = None,
+    ):
+        self.ai_config = ai_config
+        """The AIConfig or "personality" object associated with this agent."""
+
+        self.command_registry = command_registry
+        """The registry containing all commands available to the agent."""
+
+        self.config = config
+        """The applicable application configuration."""
+
+        self.big_brain = big_brain
+        """
+        Whether this agent uses the configured smart LLM (default) to think,
+        as opposed to the configured fast LLM.
+        """
+
+        self.default_cycle_instruction = default_cycle_instruction
+        """The default instruction passed to the AI for a thinking cycle."""
+
+        self.cycle_budget = cycle_budget
+        """
+        The number of cycles that the agent is allowed to run unsupervised.
+
+        `None` for unlimited continuous execution,
+        `1` to require user approval for every step,
+        `0` to stop the agent.
+        """
+
+        self.cycles_remaining = cycle_budget
+        """The number of cycles remaining within the `cycle_budget`."""
+
+        self.cycle_count = 0
+        """The number of cycles that the agent has run since its initialization."""
+
+        self.system_prompt = ai_config.construct_full_prompt(config)
+        """
+        The system prompt sets up the AI's personality and explains its goals,
+        available resources, and restrictions.
+        """
+
+        llm_name = self.config.smart_llm if self.big_brain else self.config.fast_llm
+        self.llm = OPEN_AI_CHAT_MODELS[llm_name]
+        """The LLM that the agent uses to think."""
+
+        self.send_token_limit = send_token_limit or self.llm.max_tokens * 3 // 4
+        """
+        The token limit for prompt construction. Should leave room for the completion;
+        defaults to 75% of `llm.max_tokens`.
+        """
+
+        self.history = MessageHistory(
+            self.llm,
+            max_summary_tlength=summary_max_tlength or self.send_token_limit // 6,
+        )
+
+    def think(
+        self,
+        instruction: Optional[str] = None,
+    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+        """Runs the agent for one cycle.
+
+        Params:
+            instruction: The instruction to put at the end of the prompt.
+
+        Returns:
+            The command name and arguments, if any, and the agent's thoughts.
+        """
+
+        instruction = instruction or self.default_cycle_instruction
+
+        prompt: ChatSequence = self.construct_prompt(instruction)
+        prompt = self.on_before_think(prompt, instruction)
+
+        raw_response = create_chat_completion(
+            prompt,
+            self.config,
+            functions=get_openai_command_specs(self.command_registry)
+            if self.config.openai_functions
+            else None,
+        )
+        self.cycle_count += 1
+
+        return self.on_response(raw_response, prompt, instruction)
+
+    @abstractmethod
+    def execute(
+        self,
+        command_name: str | None,
+        command_args: dict[str, str] | None,
+        user_input: str | None,
+    ) -> str:
+        """Executes the given command, if any, and returns the agent's response.
+
+        Params:
+            command_name: The name of the command to execute, if any.
+            command_args: The arguments to pass to the command, if any.
+            user_input: The user's input, if any.
+
+        Returns:
+            The results of the command.
+        """
+        ...
+
+    def construct_base_prompt(
+        self,
+        prepend_messages: list[Message] = [],
+        append_messages: list[Message] = [],
+        reserve_tokens: int = 0,
+    ) -> ChatSequence:
+        """Constructs and returns a prompt with the following structure:
+        1. System prompt
+        2. `prepend_messages`
+        3. Message history of the agent, truncated & prepended with running summary as needed
+        4. `append_messages`
+
+        Params:
+            prepend_messages: Messages to insert between the system prompt and message history
+            append_messages: Messages to insert after the message history
+            reserve_tokens: Number of tokens to reserve for content that is added later
+        """
+
+        prompt = ChatSequence.for_model(
+            self.llm.name,
+            [Message("system", self.system_prompt)] + prepend_messages,
+        )
+
+        # Reserve tokens for messages to be appended later, if any
+        reserve_tokens += self.history.max_summary_tlength
+        if append_messages:
+            reserve_tokens += count_message_tokens(append_messages, self.llm.name)
+
+        # Fill message history, up to a margin of reserved_tokens.
+        # Trim remaining historical messages and add them to the running summary.
+        history_start_index = len(prompt)
+        trimmed_history = add_history_upto_token_limit(
+            prompt, self.history, self.send_token_limit - reserve_tokens
+        )
+        if trimmed_history:
+            new_summary_msg, _ = self.history.trim_messages(list(prompt), self.config)
+            prompt.insert(history_start_index, new_summary_msg)
+
+        if append_messages:
+            prompt.extend(append_messages)
+
+        return prompt
+
+    def construct_prompt(self, cycle_instruction: str) -> ChatSequence:
+        """Constructs and returns a prompt with the following structure:
+        1. System prompt
+        2. Message history of the agent, truncated & prepended with running summary as needed
+        3. `cycle_instruction`
+
+        Params:
+            cycle_instruction: The final instruction for a thinking cycle
+        """
+
+        if not cycle_instruction:
+            raise ValueError("No instruction given")
+
+        cycle_instruction_msg = Message("user", cycle_instruction)
+        cycle_instruction_tlength = count_message_tokens(
+            cycle_instruction_msg, self.llm.name
+        )
+        prompt = self.construct_base_prompt(reserve_tokens=cycle_instruction_tlength)
+
+        # ADD user input message ("triggering prompt")
+        prompt.append(cycle_instruction_msg)
+
+        return prompt
+
+    def on_before_think(self, prompt: ChatSequence, instruction: str) -> ChatSequence:
+        """Called after constructing the prompt but before executing it.
+
+        Calls the `on_planning` hook of any enabled and capable plugins, adding their
+        output to the prompt.
+
+        Params:
+            instruction: The instruction for the current cycle, also used in constructing the prompt
+
+        Returns:
+            The prompt to execute
+        """
+        current_tokens_used = prompt.token_length
+        plugin_count = len(self.config.plugins)
+        for i, plugin in enumerate(self.config.plugins):
+            if not plugin.can_handle_on_planning():
+                continue
+            plugin_response = plugin.on_planning(
+                self.ai_config.prompt_generator, prompt.raw()
+            )
+            if not plugin_response or plugin_response == "":
+                continue
+            message_to_add = Message("system", plugin_response)
+            tokens_to_add = count_message_tokens(message_to_add, self.llm.name)
+            if current_tokens_used + tokens_to_add > self.send_token_limit:
+                logger.debug(f"Plugin response too long, skipping: {plugin_response}")
+                logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
+                break
+            prompt.insert(
+                -1, message_to_add
+            )  # HACK: assumes cycle instruction to be at the end
+            current_tokens_used += tokens_to_add
+        return prompt
+
+    def on_response(
+        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+        """Called upon receiving a response from the chat model.
+
+        Adds the last/newest message in the prompt and the response to `history`,
+        and calls `self.parse_and_process_response()` to do the rest.
+
+        Params:
+            llm_response: The raw response from the chat model
+            prompt: The prompt that was executed
+            instruction: The instruction for the current cycle, also used in constructing the prompt
+
+        Returns:
+            The parsed command name and command args, if any, and the agent thoughts.
+        """
+
+        # Save assistant reply to message history
+        self.history.append(prompt[-1])
+        self.history.add(
+            "assistant", llm_response.content, "ai_response"
+        )  # FIXME: support function calls
+
+        try:
+            return self.parse_and_process_response(llm_response, prompt, instruction)
+        except SyntaxError as e:
+            logger.error(f"Response could not be parsed: {e}")
+            # TODO: tune this message
+            self.history.add(
+                "system",
+                f"Your response could not be parsed: {e}"
+                "\n\nRemember to only respond using the specified format above!",
+            )
+            return None, None, {}
+
+        # TODO: update memory/context
+
+    @abstractmethod
+    def parse_and_process_response(
+        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+        """Validate, parse & process the LLM's response.
+
+        Must be implemented by derivative classes: no base implementation is provided,
+        since the implementation depends on the role of the derivative Agent.
+
+        Params:
+            llm_response: The raw response from the chat model
+            prompt: The prompt that was executed
+            instruction: The instruction for the current cycle, also used in constructing the prompt
+
+        Returns:
+            The parsed command name and command args, if any, and the agent thoughts.
+        """
+        pass
+
+
+def add_history_upto_token_limit(
+    prompt: ChatSequence, history: MessageHistory, t_limit: int
+) -> list[Message]:
+    current_prompt_length = prompt.token_length
+    insertion_index = len(prompt)
+    limit_reached = False
+    trimmed_messages: list[Message] = []
+    for cycle in reversed(list(history.per_cycle())):
+        messages_to_add = [msg for msg in cycle if msg is not None]
+        tokens_to_add = count_message_tokens(messages_to_add, prompt.model.name)
+        if current_prompt_length + tokens_to_add > t_limit:
+            limit_reached = True
+
+        if not limit_reached:
+            # Add the most recent message to the start of the chain,
+            #  after the system prompts.
+            prompt.insert(insertion_index, *messages_to_add)
+            current_prompt_length += tokens_to_add
+        else:
+            trimmed_messages = messages_to_add + trimmed_messages
+
+    return trimmed_messages
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -2,7 +2,7 @@
 import ast
 import json
 import os.path
-from typing import Any
+from typing import Any, Literal

 from jsonschema import Draft7Validator

@@ -12,7 +12,7 @@ from autogpt.logs import logger
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"


-def extract_json_from_response(response_content: str) -> dict:
+def extract_dict_from_response(response_content: str) -> dict[str, Any]:
    # Sometimes the response includes the JSON in a code block with ```
    if response_content.startswith("```") and response_content.endswith("```"):
        # Discard the first and last ```, then re-join in case the response naturally included ```
@@ -33,16 +33,19 @@ def llm_response_schema(
 ) -> dict[str, Any]:
    filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
    with open(filename, "r") as f:
-        json_schema = json.load(f)
+        try:
+            json_schema = json.load(f)
+        except Exception as e:
+            raise RuntimeError(f"Failed to load JSON schema: {e}")
    if config.openai_functions:
        del json_schema["properties"]["command"]
        json_schema["required"].remove("command")
    return json_schema


-def validate_json(
-    json_object: object, config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
-) -> bool:
+def validate_dict(
+    object: object, config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+) -> tuple[Literal[True], None] | tuple[Literal[False], list]:
    """
    :type schema_name: object
    :param schema_name: str
@@ -50,24 +53,23 @@ def validate_json(

    Returns:
        bool: Whether the json_object is valid or not
+        list: Errors found in the json_object, or None if the object is valid
    """
    schema = llm_response_schema(config, schema_name)
    validator = Draft7Validator(schema)

-    if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
+    if errors := sorted(validator.iter_errors(object), key=lambda e: e.path):
        for error in errors:
            logger.debug(f"JSON Validation Error: {error}")

        if config.debug_mode:
-            logger.error(
-                json.dumps(json_object, indent=4)
-            )  # Replace 'json_object' with the variable containing the JSON data
+            logger.error(json.dumps(object, indent=4))
            logger.error("The following issues were found:")

            for error in errors:
                logger.error(f"Error: {error.message}")
-        return False
+        return False, errors

    logger.debug("The JSON object is valid.")

-    return True
+    return True, None
--- a/autogpt/llm/init.py
+++ b/autogpt/llm/init.py
@@ -1,6 +1,7 @@
 from autogpt.llm.base import (
    ChatModelInfo,
    ChatModelResponse,
+    ChatSequence,
    EmbeddingModelInfo,
    EmbeddingModelResponse,
    LLMResponse,
@@ -10,6 +11,7 @@ from autogpt.llm.base import (

 __all__ = [
    "Message",
+    "ChatSequence",
    "ModelInfo",
    "ChatModelInfo",
    "EmbeddingModelInfo",
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -1,203 +0,0 @@
-from __future__ import annotations
-
-import time
-from typing import TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from autogpt.agents.agent import Agent
-
-from autogpt.config import Config
-from autogpt.llm.api_manager import ApiManager
-from autogpt.llm.base import ChatSequence, Message
-from autogpt.llm.providers.openai import (
-    count_openai_functions_tokens,
-    get_openai_command_specs,
-)
-from autogpt.llm.utils import count_message_tokens, create_chat_completion
-from autogpt.logs import CURRENT_CONTEXT_FILE_NAME, logger
-
-
-# TODO: Change debug from hardcode to argument
-def chat_with_ai(
-    config: Config,
-    agent: Agent,
-    system_prompt: str,
-    triggering_prompt: str,
-    token_limit: int,
-    model: str | None = None,
-):
-    """
-    Interact with the OpenAI API, sending the prompt, user input,
-        message history, and permanent memory.
-
-    Args:
-        config (Config): The config to use.
-        agent (Agent): The agent to use.
-        system_prompt (str): The prompt explaining the rules to the AI.
-        triggering_prompt (str): The input from the user.
-        token_limit (int): The maximum number of tokens allowed in the API call.
-        model (str, optional): The model to use. By default, the config.smart_llm will be used.
-
-    Returns:
-    str: The AI's response.
-    """
-    if model is None:
-        model = config.smart_llm
-
-    # Reserve 1000 tokens for the response
-    logger.debug(f"Token limit: {token_limit}")
-    send_token_limit = token_limit - 1000
-
-    # if len(agent.history) == 0:
-    #     relevant_memory = ""
-    # else:
-    #     recent_history = agent.history[-5:]
-    #     shuffle(recent_history)
-    #     relevant_memories = agent.memory.get_relevant(
-    #         str(recent_history), 5
-    #     )
-    #     if relevant_memories:
-    #         shuffle(relevant_memories)
-    #     relevant_memory = str(relevant_memories)
-    # logger.debug(f"Memory Stats: {agent.memory.get_stats()}")
-    relevant_memory = []
-
-    message_sequence = ChatSequence.for_model(
-        model,
-        [
-            Message("system", system_prompt),
-            Message("system", f"The current time and date is {time.strftime('%c')}"),
-            # Message(
-            #     "system",
-            #     f"This reminds you of these events from your past:\n{relevant_memory}\n\n",
-            # ),
-        ],
-    )
-
-    # Count the currently used tokens
-    current_tokens_used = message_sequence.token_length
-    insertion_index = len(message_sequence)
-
-    # Account for tokens used by OpenAI functions
-    openai_functions = None
-    if agent.config.openai_functions:
-        openai_functions = get_openai_command_specs(agent.command_registry)
-        functions_tlength = count_openai_functions_tokens(openai_functions, model)
-        current_tokens_used += functions_tlength
-        logger.debug(f"OpenAI Functions take up {functions_tlength} tokens in API call")
-
-    # Account for user input (appended later)
-    user_input_msg = Message("user", triggering_prompt)
-    current_tokens_used += count_message_tokens(user_input_msg, model)
-
-    current_tokens_used += agent.history.max_summary_tlength  # Reserve space
-    current_tokens_used += 500  # Reserve space for the openai functions TODO improve
-
-    # Add historical Messages until the token limit is reached
-    #  or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle())):
-        messages_to_add = [msg for msg in cycle if msg is not None]
-        tokens_to_add = count_message_tokens(messages_to_add, model)
-        if current_tokens_used + tokens_to_add > send_token_limit:
-            break
-
-        # Add the most recent message to the start of the chain,
-        #  after the system prompts.
-        message_sequence.insert(insertion_index, *messages_to_add)
-        current_tokens_used += tokens_to_add
-
-    # Update & add summary of trimmed messages
-    if len(agent.history) > 0:
-        new_summary_message, trimmed_messages = agent.history.trim_messages(
-            current_message_chain=list(message_sequence), config=agent.config
-        )
-        tokens_to_add = count_message_tokens(new_summary_message, model)
-        message_sequence.insert(insertion_index, new_summary_message)
-        current_tokens_used += tokens_to_add - agent.history.max_summary_tlength
-
-        # FIXME: uncomment when memory is back in use
-        # memory_store = get_memory(config)
-        # for _, ai_msg, result_msg in agent.history.per_cycle(trimmed_messages):
-        #     memory_to_add = MemoryItem.from_ai_action(ai_msg, result_msg)
-        #     logger.debug(f"Storing the following memory:\n{memory_to_add.dump()}")
-        #     memory_store.add(memory_to_add)
-
-    api_manager = ApiManager()
-    # inform the AI about its remaining budget (if it has one)
-    if api_manager.get_total_budget() > 0.0:
-        remaining_budget = api_manager.get_total_budget() - api_manager.get_total_cost()
-        if remaining_budget < 0:
-            remaining_budget = 0
-        budget_message = f"Your remaining API budget is ${remaining_budget:.3f}" + (
-            " BUDGET EXCEEDED! SHUT DOWN!\n\n"
-            if remaining_budget == 0
-            else " Budget very nearly exceeded! Shut down gracefully!\n\n"
-            if remaining_budget < 0.005
-            else " Budget nearly exceeded. Finish up.\n\n"
-            if remaining_budget < 0.01
-            else "\n\n"
-        )
-        logger.debug(budget_message)
-        message_sequence.add("system", budget_message)
-        current_tokens_used += count_message_tokens(message_sequence[-1], model)
-
-    # Append user input, the length of this is accounted for above
-    message_sequence.append(user_input_msg)
-
-    plugin_count = len(config.plugins)
-    for i, plugin in enumerate(config.plugins):
-        if not plugin.can_handle_on_planning():
-            continue
-        plugin_response = plugin.on_planning(
-            agent.ai_config.prompt_generator, message_sequence.raw()
-        )
-        if not plugin_response or plugin_response == "":
-            continue
-        tokens_to_add = count_message_tokens(Message("system", plugin_response), model)
-        if current_tokens_used + tokens_to_add > send_token_limit:
-            logger.debug(f"Plugin response too long, skipping: {plugin_response}")
-            logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
-            break
-        message_sequence.add("system", plugin_response)
-        current_tokens_used += tokens_to_add
-
-    # Calculate remaining tokens
-    tokens_remaining = token_limit - current_tokens_used
-    # assert tokens_remaining >= 0, "Tokens remaining is negative.
-    # This should never happen, please submit a bug report at
-    #  https://www.github.com/Torantulino/Auto-GPT"
-
-    # Debug print the current context
-    logger.debug(f"Token limit: {token_limit}")
-    logger.debug(f"Send Token Count: {current_tokens_used}")
-    logger.debug(f"Tokens remaining for response: {tokens_remaining}")
-    logger.debug("------------ CONTEXT SENT TO AI ---------------")
-    for message in message_sequence:
-        # Skip printing the prompt
-        if message.role == "system" and message.content == system_prompt:
-            continue
-        logger.debug(f"{message.role.capitalize()}: {message.content}")
-        logger.debug("")
-    logger.debug("----------- END OF CONTEXT ----------------")
-    agent.log_cycle_handler.log_cycle(
-        agent.ai_name,
-        agent.created_at,
-        agent.cycle_count,
-        message_sequence.raw(),
-        CURRENT_CONTEXT_FILE_NAME,
-    )
-
-    # TODO: use a model defined elsewhere, so that model can contain
-    # temperature and other settings we care about
-    assistant_reply = create_chat_completion(
-        prompt=message_sequence,
-        config=agent.config,
-        functions=openai_functions,
-        max_tokens=tokens_remaining,
-    )
-
-    # Update full message history
-    agent.history.append(user_input_msg)
-    agent.history.add("assistant", assistant_reply.content, "ai_response")
-
-    return assistant_reply
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -53,7 +53,7 @@ OPEN_AI_CHAT_MODELS = {
            name="gpt-4-0613",
            prompt_token_cost=0.03,
            completion_token_cost=0.06,
-            max_tokens=8192,
+            max_tokens=8191,
        ),
        ChatModelInfo(
            name="gpt-4-32k-0314",
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -1,20 +1,27 @@
 """The application entry point.  Can be invoked by a CLI or any other front end application."""
+import enum
 import logging
+import math
+import signal
 import sys
 from pathlib import Path
+from types import FrameType
 from typing import Optional

 from colorama import Fore, Style

-from autogpt.agents import Agent
-from autogpt.config.config import ConfigBuilder, check_openai_api_key
+from autogpt.agents import Agent, AgentThoughts, CommandArgs, CommandName
+from autogpt.config import AIConfig, Config, ConfigBuilder, check_openai_api_key
 from autogpt.configurator import create_config
-from autogpt.logs import logger
+from autogpt.logs import logger, print_assistant_thoughts, remove_ansi_escape
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.plugins import scan_plugins
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT, construct_main_ai_config
+from autogpt.speech import say_text
+from autogpt.spinner import Spinner
 from autogpt.utils import (
+    clean_input,
    get_current_git_branch,
    get_latest_bulletin,
    get_legal_warning,
@@ -166,10 +173,7 @@ def run_auto_gpt(
        goals=ai_goals,
    )
    ai_config.command_registry = command_registry
-    ai_name = ai_config.ai_name
    # print(prompt)
-    # Initialize variables
-    next_action_count = 0

    # add chat plugins capable of report to logger
    if config.chat_messages_enabled:
@@ -186,19 +190,269 @@ def run_auto_gpt(
        "Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
    )
    logger.typewriter_log("Using Browser:", Fore.GREEN, config.selenium_web_browser)
-    system_prompt = ai_config.construct_full_prompt(config)
-    if config.debug_mode:
-        logger.typewriter_log("Prompt:", Fore.GREEN, system_prompt)

    agent = Agent(
-        ai_name=ai_name,
        memory=memory,
-        next_action_count=next_action_count,
        command_registry=command_registry,
-        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace_directory,
        ai_config=ai_config,
        config=config,
    )
-    agent.start_interaction_loop()
+
+    run_interaction_loop(agent)
+
+
+def _get_cycle_budget(continuous_mode: bool, continuous_limit: int) -> int | None:
+    # Translate from the continuous_mode/continuous_limit config
+    # to a cycle_budget (maximum number of cycles to run without checking in with the
+    # user) and a count of cycles_remaining before we check in..
+    if continuous_mode:
+        cycle_budget = continuous_limit if continuous_limit else math.inf
+    else:
+        cycle_budget = 1
+
+    return cycle_budget
+
+
+class UserFeedback(str, enum.Enum):
+    """Enum for user feedback."""
+
+    AUTHORIZE = "GENERATE NEXT COMMAND JSON"
+    EXIT = "EXIT"
+    TEXT = "TEXT"
+
+
+def run_interaction_loop(
+    agent: Agent,
+) -> None:
+    """Run the main interaction loop for the agent.
+
+    Args:
+        agent: The agent to run the interaction loop for.
+
+    Returns:
+        None
+    """
+    # These contain both application config and agent config, so grab them here.
+    config = agent.config
+    ai_config = agent.ai_config
+    logger.debug(f"{ai_config.ai_name} System Prompt: {agent.system_prompt}")
+
+    cycle_budget = cycles_remaining = _get_cycle_budget(
+        config.continuous_mode, config.continuous_limit
+    )
+    spinner = Spinner("Thinking...", plain_output=config.plain_output)
+
+    def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
+        nonlocal cycle_budget, cycles_remaining, spinner
+        if cycles_remaining in [0, 1, math.inf]:
+            logger.typewriter_log(
+                "Interrupt signal received. Stopping continuous command execution "
+                "immediately.",
+                Fore.RED,
+            )
+            sys.exit()
+        else:
+            restart_spinner = spinner.running
+            if spinner.running:
+                spinner.stop()
+
+            logger.typewriter_log(
+                "Interrupt signal received. Stopping continuous command execution.",
+                Fore.RED,
+            )
+            cycles_remaining = 1
+            if restart_spinner:
+                spinner.start()
+
+    # Set up an interrupt signal for the agent.
+    signal.signal(signal.SIGINT, graceful_agent_interrupt)
+
+    #########################
+    # Application Main Loop #
+    #########################
+
+    while cycles_remaining > 0:
+        logger.debug(f"Cycle budget: {cycle_budget}; remaining: {cycles_remaining}")
+
+        ########
+        # Plan #
+        ########
+        # Have the agent determine the next action to take.
+        with spinner:
+            command_name, command_args, assistant_reply_dict = agent.think()
+
+        ###############
+        # Update User #
+        ###############
+        # Print the assistant's thoughts and the next command to the user.
+        update_user(config, ai_config, command_name, command_args, assistant_reply_dict)
+
+        ##################
+        # Get user input #
+        ##################
+        if cycles_remaining == 1:  # Last cycle
+            user_feedback, user_input, new_cycles_remaining = get_user_feedback(
+                config,
+                ai_config,
+            )
+
+            if user_feedback == UserFeedback.AUTHORIZE:
+                if new_cycles_remaining is not None:
+                    # Case 1: User is altering the cycle budget.
+                    if cycle_budget > 1:
+                        cycle_budget = new_cycles_remaining + 1
+                    # Case 2: User is running iteratively and
+                    #   has initiated a one-time continuous cycle
+                    cycles_remaining = new_cycles_remaining + 1
+                else:
+                    # Case 1: Continuous iteration was interrupted -> resume
+                    if cycle_budget > 1:
+                        logger.typewriter_log(
+                            "RESUMING CONTINUOUS EXECUTION: ",
+                            Fore.MAGENTA,
+                            f"The cycle budget is {cycle_budget}.",
+                        )
+                    # Case 2: The agent used up its cycle budget -> reset
+                    cycles_remaining = cycle_budget + 1
+                logger.typewriter_log(
+                    "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=",
+                    Fore.MAGENTA,
+                    "",
+                )
+            elif user_feedback == UserFeedback.EXIT:
+                logger.typewriter_log("Exiting...", Fore.YELLOW)
+                exit()
+            else:  # user_feedback == UserFeedback.TEXT
+                command_name = "human_feedback"
+        else:
+            user_input = None
+            # First log new-line so user can differentiate sections better in console
+            logger.typewriter_log("\n")
+            if cycles_remaining != math.inf:
+                # Print authorized commands left value
+                logger.typewriter_log(
+                    "AUTHORISED COMMANDS LEFT: ", Fore.CYAN, f"{cycles_remaining}"
+                )
+
+        ###################
+        # Execute Command #
+        ###################
+        # Decrement the cycle counter first to reduce the likelihood of a SIGINT
+        # happening during command execution, setting the cycles remaining to 1,
+        # and then having the decrement set it to 0, exiting the application.
+        if command_name != "human_feedback":
+            cycles_remaining -= 1
+        result = agent.execute(command_name, command_args, user_input)
+
+        if result is not None:
+            logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result)
+        else:
+            logger.typewriter_log("SYSTEM: ", Fore.YELLOW, "Unable to execute command")
+
+
+def update_user(
+    config: Config,
+    ai_config: AIConfig,
+    command_name: CommandName | None,
+    command_args: CommandArgs | None,
+    assistant_reply_dict: AgentThoughts,
+) -> None:
+    """Prints the assistant's thoughts and the next command to the user.
+
+    Args:
+        config: The program's configuration.
+        ai_config: The AI's configuration.
+        command_name: The name of the command to execute.
+        command_args: The arguments for the command.
+        assistant_reply_dict: The assistant's reply.
+    """
+
+    print_assistant_thoughts(ai_config.ai_name, assistant_reply_dict, config)
+
+    if command_name is not None:
+        if config.speak_mode:
+            say_text(f"I want to execute {command_name}", config)
+
+        # First log new-line so user can differentiate sections better in console
+        logger.typewriter_log("\n")
+        logger.typewriter_log(
+            "NEXT ACTION: ",
+            Fore.CYAN,
+            f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
+            f"ARGUMENTS = {Fore.CYAN}{command_args}{Style.RESET_ALL}",
+        )
+    elif command_name.lower().startswith("error"):
+        logger.typewriter_log(
+            "ERROR: ",
+            Fore.RED,
+            f"The Agent failed to select an action. " f"Error message: {command_name}",
+        )
+    else:
+        logger.typewriter_log(
+            "NO ACTION SELECTED: ",
+            Fore.RED,
+            f"The Agent failed to select an action.",
+        )
+
+
+def get_user_feedback(
+    config: Config,
+    ai_config: AIConfig,
+) -> tuple[UserFeedback, str, int | None]:
+    """Gets the user's feedback on the assistant's reply.
+
+    Args:
+        config: The program's configuration.
+        ai_config: The AI's configuration.
+
+    Returns:
+        A tuple of the user's feedback, the user's input, and the number of
+        cycles remaining if the user has initiated a continuous cycle.
+    """
+    # ### GET USER AUTHORIZATION TO EXECUTE COMMAND ###
+    # Get key press: Prompt the user to press enter to continue or escape
+    # to exit
+    logger.info(
+        f"Enter '{config.authorise_key}' to authorise command, "
+        f"'{config.authorise_key} -N' to run N continuous commands, "
+        f"'{config.exit_key}' to exit program, or enter feedback for "
+        f"{ai_config.ai_name}..."
+    )
+
+    user_feedback = None
+    user_input = ""
+    new_cycles_remaining = None
+
+    while user_feedback is None:
+        # Get input from user
+        if config.chat_messages_enabled:
+            console_input = clean_input(config, "Waiting for your response...")
+        else:
+            console_input = clean_input(
+                config, Fore.MAGENTA + "Input:" + Style.RESET_ALL
+            )
+
+        # Parse user input
+        if console_input.lower().strip() == config.authorise_key:
+            user_feedback = UserFeedback.AUTHORIZE
+        elif console_input.lower().strip() == "":
+            logger.warn("Invalid input format.")
+        elif console_input.lower().startswith(f"{config.authorise_key} -"):
+            try:
+                user_feedback = UserFeedback.AUTHORIZE
+                new_cycles_remaining = abs(int(console_input.split(" ")[1]))
+            except ValueError:
+                logger.warn(
+                    f"Invalid input format. "
+                    f"Please enter '{config.authorise_key} -N'"
+                    " where N is the number of continuous tasks."
+                )
+        elif console_input.lower() in [config.exit_key, "exit"]:
+            user_feedback = UserFeedback.EXIT
+        else:
+            user_feedback = UserFeedback.TEXT
+            user_input = console_input
+
+    return user_feedback, user_input, new_cycles_remaining
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -3,13 +3,13 @@ from __future__ import annotations
 import copy
 import json
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Iterator, Optional

 if TYPE_CHECKING:
-    from autogpt.agents import Agent
+    from autogpt.agents import Agent, BaseAgent
+    from autogpt.config import Config

-from autogpt.config import Config
-from autogpt.json_utils.utilities import extract_json_from_response
+from autogpt.json_utils.utilities import extract_dict_from_response
 from autogpt.llm.base import ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import (
@@ -17,13 +17,18 @@ from autogpt.llm.utils import (
    count_string_tokens,
    create_chat_completion,
 )
-from autogpt.logs import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME, logger
+from autogpt.logs import (
+    PROMPT_SUMMARY_FILE_NAME,
+    SUMMARY_FILE_NAME,
+    LogCycleHandler,
+    logger,
+)


@dataclass
 class MessageHistory(ChatSequence):
    max_summary_tlength: int = 500
-    agent: Optional[Agent] = None
+    agent: Optional[BaseAgent | Agent] = None
    summary: str = "I was created"
    last_trimmed_index: int = 0

@@ -80,7 +85,9 @@ Latest Development:

        return new_summary_message, new_messages_not_in_chain

-    def per_cycle(self, messages: list[Message] | None = None):
+    def per_cycle(
+        self, messages: Optional[list[Message]] = None
+    ) -> Iterator[tuple[Message | None, Message, Message]]:
        """
        Yields:
            Message: a message containing user input
@@ -98,7 +105,7 @@ Latest Development:
            result_message = messages[i + 1]
            try:
                assert (
-                    extract_json_from_response(ai_message.content) != {}
+                    extract_dict_from_response(ai_message.content) != {}
                ), "AI response is not a valid JSON object"
                assert result_message.type == "action_result"

@@ -153,7 +160,7 @@ Latest Development:

                # Remove "thoughts" dictionary from "content"
                try:
-                    content_dict = extract_json_from_response(event.content)
+                    content_dict = extract_dict_from_response(event.content)
                    if "thoughts" in content_dict:
                        del content_dict["thoughts"]
                    event.content = json.dumps(content_dict)
@@ -177,7 +184,7 @@ Latest Development:
        )
        max_input_tokens = summ_model.max_tokens - max_summary_length
        summary_tlength = count_string_tokens(self.summary, summ_model.name)
-        batch = []
+        batch: list[Message] = []
        batch_tlength = 0

        # TODO: Put a cap on length of total new events and drop some previous events to
@@ -190,7 +197,7 @@ Latest Development:
                > max_input_tokens - prompt_template_length - summary_tlength
            ):
                # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, config, max_summary_length)
+                self._update_summary_with_batch(batch, config, max_summary_length)
                summary_tlength = count_string_tokens(self.summary, summ_model.name)
                batch = [event]
                batch_tlength = event_tlength
@@ -200,19 +207,25 @@ Latest Development:

        if batch:
            # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, config, max_summary_length)
+            self._update_summary_with_batch(batch, config, max_summary_length)

        return self.summary_message()

-    def summarize_batch(
+    def _update_summary_with_batch(
        self, new_events_batch: list[Message], config: Config, max_output_length: int
-    ):
+    ) -> None:
        prompt = MessageHistory.SUMMARIZATION_PROMPT.format(
            summary=self.summary, new_events=new_events_batch
        )

        prompt = ChatSequence.for_model(config.fast_llm, [Message("user", prompt)])
-        if self.agent:
+        if (
+            self.agent is not None
+            and hasattr(self.agent, "created_at")
+            and isinstance(
+                getattr(self.agent, "log_cycle_handler", None), LogCycleHandler
+            )
+        ):
            self.agent.log_cycle_handler.log_cycle(
                self.agent.ai_config.ai_name,
                self.agent.created_at,
@@ -225,7 +238,13 @@ Latest Development:
            prompt, config, max_tokens=max_output_length
        ).content

-        if self.agent:
+        if (
+            self.agent is not None
+            and hasattr(self.agent, "created_at")
+            and isinstance(
+                getattr(self.agent, "log_cycle_handler", None), LogCycleHandler
+            )
+        ):
            self.agent.log_cycle_handler.log_cycle(
                self.agent.ai_config.ai_name,
                self.agent.created_at,
--- a/autogpt/setup.py
+++ b/autogpt/setup.py
@@ -9,7 +9,7 @@ from autogpt import utils
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.llm.base import ChatSequence, Message
-from autogpt.llm.chat import create_chat_completion
+from autogpt.llm.utils import create_chat_completion
 from autogpt.logs import logger
 from autogpt.prompts.default_prompts import (
    DEFAULT_SYSTEM_PROMPT_AICONFIG_AUTOMATIC,
--- a/autogpt/spinner.py
+++ b/autogpt/spinner.py
@@ -42,12 +42,21 @@ class Spinner:
        sys.stdout.write(f"{next(self.spinner)} {self.message}\r")
        sys.stdout.flush()

-    def __enter__(self):
-        """Start the spinner"""
+    def start(self):
        self.running = True
        self.spinner_thread = threading.Thread(target=self.spin)
        self.spinner_thread.start()

+    def stop(self):
+        self.running = False
+        if self.spinner_thread is not None:
+            self.spinner_thread.join()
+        sys.stdout.write(f"\r{' ' * (len(self.message) + 2)}\r")
+        sys.stdout.flush()
+
+    def __enter__(self):
+        """Start the spinner"""
+        self.start()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
@@ -58,19 +67,4 @@ class Spinner:
            exc_value (Exception): The exception value.
            exc_traceback (Exception): The exception traceback.
        """
-        self.running = False
-        if self.spinner_thread is not None:
-            self.spinner_thread.join()
-        sys.stdout.write(f"\r{' ' * (len(self.message) + 2)}\r")
-        sys.stdout.flush()
-
-    def update_message(self, new_message, delay=0.1):
-        """Update the spinner message
-        Args:
-            new_message (str): New message to display.
-            delay (float): The delay in seconds between each spinner update.
-        """
-        self.delay = delay
-        self.message = new_message
-        if self.plain_output:
-            self.print_message()
+        self.stop()
--- a/autogpt/utils.py
+++ b/autogpt/utils.py
@@ -55,7 +55,11 @@ def clean_input(config: Config, prompt: str = "", talk=False):

        # ask for input, default when just pressing Enter is y
        logger.info("Asking user via keyboard...")
-        answer = session.prompt(ANSI(prompt))
+
+        # handle_sigint must be set to False, so the signal handler in the
+        # autogpt/main.py could be employed properly. This referes to
+        # https://github.com/Significant-Gravitas/Auto-GPT/pull/4799/files/3966cdfd694c2a80c0333823c3bc3da090f85ed3#r1264278776
+        answer = session.prompt(ANSI(prompt), handle_sigint=False)
        return answer
    except KeyboardInterrupt:
        logger.info("You interrupted Auto-GPT")
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -1,6 +1,6 @@
 from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
-from autogpt.main import COMMAND_CATEGORIES
+from autogpt.main import COMMAND_CATEGORIES, run_interaction_loop
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
@@ -9,7 +9,7 @@ from autogpt.workspace import Workspace

 def run_task(task) -> None:
    agent = bootstrap_agent(task)
-    agent.start_interaction_loop()
+    run_interaction_loop(agent)


 def bootstrap_agent(task):
@@ -28,15 +28,11 @@ def bootstrap_agent(task):
        ai_goals=[task.user_input],
    )
    ai_config.command_registry = command_registry
-    system_prompt = ai_config.construct_full_prompt(config)
    return Agent(
-        ai_name="Auto-GPT",
        memory=get_memory(config),
        command_registry=command_registry,
        ai_config=ai_config,
        config=config,
-        next_action_count=0,
-        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=str(workspace_directory_path),
    )
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -54,14 +54,10 @@ def kubernetes_agent(
    system_prompt = ai_config.construct_full_prompt()
    agent_test_config.set_continuous_mode(False)
    agent = Agent(
-        # We also give the AI a name 
-        ai_name="Kubernetes-Demo",
        memory=memory_json_file,
-        full_message_history=[],
        command_registry=command_registry,
        config=ai_config,
        next_action_count=0,
-        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -57,7 +57,7 @@ def test_debug_code_challenge_a(

    output = execute_python_file(
        get_workspace_path(workspace, TEST_FILE_PATH),
-        dummy_agent,
+        agent=dummy_agent,
    )

    assert "error" not in output.lower(), f"Errors found in output: {output}!"
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -38,7 +38,7 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
        yield from input_sequence

    gen = input_generator()
-    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
+    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _, **kwargs: next(gen))


 def setup_mock_log_cycle_agent_name(
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,9 +6,8 @@ import pytest
 import yaml
 from pytest_mock import MockerFixture

-from autogpt.agents.agent import Agent
+from autogpt.agents import Agent
 from autogpt.config import AIConfig, Config, ConfigBuilder
-from autogpt.config.ai_config import AIConfig
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs import logger
 from autogpt.memory.vector import get_memory
@@ -98,16 +97,11 @@ def agent(config: Config, workspace: Workspace) -> Agent:
    memory_json_file = get_memory(config)
    memory_json_file.clear()

-    system_prompt = ai_config.construct_full_prompt(config)
-
    return Agent(
-        ai_name=ai_config.ai_name,
        memory=memory_json_file,
        command_registry=command_registry,
        ai_config=ai_config,
        config=config,
-        next_action_count=0,
-        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -33,13 +33,10 @@ def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
    ai_config.command_registry = command_registry

    agent = Agent(
-        ai_name="Dummy Agent",
        memory=memory_json_file,
        command_registry=command_registry,
        ai_config=ai_config,
        config=config,
-        next_action_count=0,
-        system_prompt="dummy_prompt",
        triggering_prompt="dummy triggering prompt",
        workspace_directory=workspace.root,
    )
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -37,7 +37,7 @@ def test_execute_python_file(python_test_file: str, random_string: str, agent: A


 def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
-    ai_name = agent.ai_name
+    ai_name = agent.ai_config.ai_name

    result: str = sut.execute_python_code(random_code, "test_code", agent=agent)
    assert result.replace("\r", "") == f"Hello {random_string}!\n"
@@ -65,7 +65,7 @@ def test_execute_python_code_disallows_name_arg_path_traversal(


 def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
-    ai_name = agent.ai_name
+    ai_name = agent.ai_config.ai_name
    destination = os.path.join(
        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
    )
--- a/tests/unit/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -2,9 +2,10 @@ from autogpt.agents.agent import Agent, execute_command


 def test_agent_initialization(agent: Agent):
-    assert agent.ai_name == "Base"
+    assert agent.ai_config.ai_name == "Base"
    assert agent.history.messages == []
-    assert agent.next_action_count == 0
+    assert agent.cycle_budget is None
+    assert "You are Base" in agent.system_prompt


 def test_execute_command_plugin(agent: Agent):
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -15,23 +15,17 @@ from autogpt.memory.message_history import MessageHistory

@pytest.fixture
 def agent(config: Config):
-    ai_name = "Test AI"
    memory = MagicMock()
-    next_action_count = 0
    command_registry = MagicMock()
-    ai_config = AIConfig(ai_name=ai_name)
-    system_prompt = "System prompt"
+    ai_config = AIConfig(ai_name="Test AI")
    triggering_prompt = "Triggering prompt"
    workspace_directory = "workspace_directory"

    agent = Agent(
-        ai_name=ai_name,
        memory=memory,
-        next_action_count=next_action_count,
        command_registry=command_registry,
        ai_config=ai_config,
        config=config,
-        system_prompt=system_prompt,
        triggering_prompt=triggering_prompt,
        workspace_directory=workspace_directory,
    )
@@ -39,7 +33,7 @@ def agent(config: Config):


 def test_message_history_batch_summary(mocker, agent: Agent, config: Config):
-    history = MessageHistory.for_model(agent.config.smart_llm, agent=agent)
+    history = MessageHistory(agent.llm, agent=agent)
    model = config.fast_llm
    message_tlength = 0
    message_count = 0
--- a/tests/unit/test_spinner.py
+++ b/tests/unit/test_spinner.py
@@ -47,24 +47,11 @@ def test_spinner_stops_spinning():
    """Tests that the spinner starts spinning and stops spinning without errors."""
    with Spinner() as spinner:
        time.sleep(1)
-        spinner.update_message(ALMOST_DONE_MESSAGE)
-        time.sleep(1)
-    assert spinner.running == False
-
-
-def test_spinner_updates_message_and_still_spins():
-    """Tests that the spinner message can be updated while the spinner is running and the spinner continues spinning."""
-    with Spinner() as spinner:
-        assert spinner.running == True
-        time.sleep(1)
-        spinner.update_message(ALMOST_DONE_MESSAGE)
-        time.sleep(1)
-        assert spinner.message == ALMOST_DONE_MESSAGE
-    assert spinner.running == False
+    assert not spinner.running


 def test_spinner_can_be_used_as_context_manager():
    """Tests that the spinner can be used as a context manager."""
    with Spinner() as spinner:
-        assert spinner.running == True
-    assert spinner.running == False
+        assert spinner.running
+    assert not spinner.running
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -5,7 +5,7 @@ import pytest
 import requests

 from autogpt.config import Config
-from autogpt.json_utils.utilities import extract_json_from_response, validate_json
+from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
 from autogpt.utils import (
    get_bulletin_from_web,
    get_current_git_branch,
@@ -187,22 +187,26 @@ def test_get_current_git_branch_failure(mock_repo):


 def test_validate_json_valid(valid_json_response, config: Config):
-    assert validate_json(valid_json_response, config)
+    valid, errors = validate_dict(valid_json_response, config)
+    assert valid
+    assert errors is None


 def test_validate_json_invalid(invalid_json_response, config: Config):
-    assert not validate_json(valid_json_response, config)
+    valid, errors = validate_dict(valid_json_response, config)
+    assert not valid
+    assert errors is not None


 def test_extract_json_from_response(valid_json_response: dict):
    emulated_response_from_openai = str(valid_json_response)
    assert (
-        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+        extract_dict_from_response(emulated_response_from_openai) == valid_json_response
    )


 def test_extract_json_from_response_wrapped_in_code_block(valid_json_response: dict):
    emulated_response_from_openai = "```" + str(valid_json_response) + "```"
    assert (
-        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+        extract_dict_from_response(emulated_response_from_openai) == valid_json_response
    )
--- a/tests/vcr/init.py
+++ b/tests/vcr/init.py
@@ -72,6 +72,10 @@ def patched_api_requestor(mocker: MockerFixture):
            headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
            headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")

+        print(
+            f"[DEBUG] Outgoing API request: {headers}\n{data.decode() if data else None}"
+        )
+
        # Add hash header for cheap & fast matching on cassette playback
        headers["X-Content-Hash"] = sha256(
            freeze_request_body(data), usedforsecurity=False