Link all challenges to benchmark python hook (#4786)

2025-12-22 08:24:26 +01:00 · 2023-06-24 06:20:58 -07:00
parent 307f6e50ad
commit cfdb24efac
13 changed files with 218 additions and 402 deletions
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -1,9 +1,10 @@
 from autogpt.agent import Agent
 from autogpt.config import AIConfig, Config
 from autogpt.main import COMMAND_CATEGORIES
 from autogpt.memory.vector import get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 from tests.integration.agent_factory import get_command_registry
 def run_task(task) -> None:
@@ -39,3 +40,13 @@ def bootstrap_agent(task):
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=str(workspace_directory_path),
    )
 def get_command_registry(config):
    command_registry = CommandRegistry()
    enabled_command_categories = [
        x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
    ]
    for command_category in enabled_command_categories:
        command_registry.import_commands(command_category)
    return command_registry
--- a/tests/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -1,25 +1,34 @@
 import pytest
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import run_challenge
 CYCLE_COUNT = 2
 USER_INPUTS = [
    "Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
 ]
@challenge()
 def test_browse_website(
    browser_agent: Agent,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
-    file_path = browser_agent.workspace.get_path("browse_website.txt")
+    run_challenge(
-    run_interaction_loop(
+        challenge_name,
-        monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
+        level_to_run,
        monkeypatch,
        USER_INPUTS[level_to_run - 1],
        CYCLE_COUNT,
    )
-    # content = read_file(file_path, config)
+    file_path = workspace.get_path("browse_website.txt")
-    content = open(file_path, encoding="utf-8").read()
+
    with open(file_path, "r") as file:
        content = file.read()
    assert "£25.89" in content, f"Expected £25.89, got {content}"
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -1,14 +1,8 @@
 import pytest
 from autogpt.workspace import Workspace
 from benchmarks import run_task
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.schema import Task
+from tests.challenges.utils import get_workspace_path, run_challenge
 from tests.challenges.utils import (
    get_workspace_path,
    setup_mock_input,
    setup_mock_log_cycle_agent_name,
 )
 CYCLE_COUNT_PER_LEVEL = [1, 1]
 EXPECTED_OUTPUTS_PER_LEVEL = [
@@ -23,18 +17,20 @@ USER_INPUTS = [
@challenge()
 def test_write_file(
    workspace: Workspace,
    patched_api_requestor: None,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
-    user_input = USER_INPUTS[level_to_run - 1]
+    run_challenge(
-    setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
+        challenge_name,
-    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
+        level_to_run,
-    task = Task(user_input=user_input)
+        monkeypatch,
-    run_task(task)
+        USER_INPUTS[level_to_run - 1],
        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
    )
    expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
 from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
    copy_file_into_workspace,
-    get_workspace_path_from_agent,
+    get_workspace_path,
-    run_interaction_loop,
+    run_challenge,
 )
 CYCLE_COUNT = 5
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
 DIRECTORY_PATH = Path(__file__).parent / "data"
 CODE_FILE_PATH = "code.py"
 TEST_FILE_PATH = "test.py"
 USER_INPUTS = [
    "1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
    "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
    "Make test.py run without errors.",
 ]
@challenge()
 def test_debug_code_challenge_a(
-    debug_code_agents: Agent,
+    dummy_agent: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: MockerFixture,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Test whether the agent can debug a simple code snippet.
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
    :patched_api_requestor: Sends api requests to our API CI pipeline
    :level_to_run: The level to run.
    """
    debug_code_agent = debug_code_agents[level_to_run - 1]
-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
-    run_interaction_loop(
+    run_challenge(
-        monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
+        challenge_name,
        level_to_run,
        monkeypatch,
        USER_INPUTS[level_to_run - 1],
        CYCLE_COUNT,
    )
    output = execute_python_file(
-        get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
+        get_workspace_path(workspace, TEST_FILE_PATH),
-        debug_code_agent,
+        dummy_agent,
    )
    assert "error" not in output.lower(), f"Errors found in output: {output}!"
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -1,24 +1,30 @@
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.commands.file_operations import read_file
+from autogpt.workspace import Workspace
 from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
 from autogpt.agent import Agent
 OUTPUT_LOCATION = "output.txt"
 USER_INPUTS = [
    "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
    "Write to a file called output.txt containing tesla's revenue in 2022.",
    "Write to a file called output.txt containing tesla's revenue every year since its creation.",
 ]
@challenge()
 def test_information_retrieval_challenge_a(
    information_retrieval_agents: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: MockerFixture,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -26,19 +32,17 @@ def test_information_retrieval_challenge_a(
    :param get_company_revenue_agent: The agent to test.
    :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
    """
-    information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
+    run_challenge(
    run_interaction_loop(
        monkeypatch,
        information_retrieval_agent,
        CYCLE_COUNT,
        challenge_name,
        level_to_run,
        monkeypatch,
        USER_INPUTS[level_to_run - 1],
        CYCLE_COUNT,
    )
-    file_path = get_workspace_path_from_agent(
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
-        information_retrieval_agent, OUTPUT_LOCATION
+    with open(file_path, "r") as file:
-    )
+        content = file.read()
    content = read_file(file_path, information_retrieval_agent)
    expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
    for revenue in expected_revenues:
        assert (
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -1,24 +1,25 @@
 import contextlib
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
 USER_INPUTS = [
    "Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
 ]
@challenge()
 def test_information_retrieval_challenge_b(
    get_nobel_prize_agent: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: MockerFixture,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
    :param level_to_run: The level to run.
    """
-    with contextlib.suppress(SystemExit):
+    run_challenge(
-        run_interaction_loop(
+        challenge_name,
-            monkeypatch,
+        level_to_run,
-            get_nobel_prize_agent,
+        monkeypatch,
-            CYCLE_COUNT,
+        USER_INPUTS[level_to_run - 1],
-            challenge_name,
+        CYCLE_COUNT,
-            level_to_run,
+    )
        )
    file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, get_nobel_prize_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
    with open(file_path, "r") as file:
        content = file.read()
    assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
    assert (
        "Konstantin Novoselov" in content
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -1,23 +1,26 @@
 from typing import Any, Dict
 import pytest
 import yaml
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "kube.yaml"
 USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
@challenge()
 def test_kubernetes_template_challenge_a(
    kubernetes_agent: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: MockerFixture,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Test the challenge_a function in a given agent by mocking user inputs
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
        monkeypatch (pytest.MonkeyPatch)
        level_to_run (int)
    """
-    run_interaction_loop(
+    run_challenge(
-        monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
+        challenge_name,
        level_to_run,
        monkeypatch,
        USER_INPUTS[level_to_run - 1],
        CYCLE_COUNT,
    )
-    file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
-    content = read_file(file_path, kubernetes_agent)
+    with open(file_path, "r") as file:
        content_string = file.read()
    for word in ["apiVersion", "kind", "metadata", "spec"]:
-        assert word in content, f"Expected the file to contain {word}"
+        assert word in content_string, f"Expected the file to contain {word}"
-    content = yaml.safe_load(content)
+    yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
    for word in ["Service", "Deployment", "Pod"]:
-        assert word in content["kind"], f"Expected the file to contain {word}"
+        assert word in yaml_as_dict.get(
            "kind", ""
        ), f"Expected the file to contain {word}"
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -1,49 +1,48 @@
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 OUTPUT_LOCATION = "output.txt"
 USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
 def test_memory_challenge_a(
    memory_management_agent: Agent,
    patched_api_requestor: MockerFixture,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    The agent reads a file containing a task_id. Then, it reads a series of other files.
    After reading 'n' files, the agent must write the task_id into a new file.
    Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        patched_api_requestor (MockerFixture)
        monkeypatch (pytest.MonkeyPatch)
        level_to_run (int)
    """
    task_id = "2314"
-    create_instructions_files(memory_management_agent, level_to_run, task_id)
+    create_instructions_files(workspace, level_to_run, task_id)
-    run_interaction_loop(
+    run_challenge(
-        monkeypatch,
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
        memory_management_agent,
        level_to_run + 2,
        challenge_name,
        level_to_run,
    )
-    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
-    content = read_file(file_path, memory_management_agent)
+    with open(file_path, "r") as file:
        content = file.read()
    assert task_id in content, f"Expected the file to contain {task_id}"
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
    num_files: int,
    task_id: str,
    base_filename: str = "instructions_",
@@ -51,7 +50,7 @@ def create_instructions_files(
    """
    Creates a series of instructions files for the memory challenge.
    Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        num_files (int)
        task_id (str)
        base_filename (str, optional)
@@ -59,8 +58,9 @@ def create_instructions_files(
    for i in range(1, num_files + 1):
        content = generate_content(i, task_id, base_filename, num_files)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
+        file_path = get_workspace_path(workspace, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        with open(file_path, "w") as file:
            file.write(content)
 def generate_content(
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -1,56 +1,50 @@
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.workspace import Workspace
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import (
+from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
    generate_noise,
    get_workspace_path_from_agent,
    run_interaction_loop,
 )
 NOISE = 1000
 OUTPUT_LOCATION = "output.txt"
 USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
 def test_memory_challenge_b(
    memory_management_agent: Agent,
    patched_api_requestor: MockerFixture,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
    the agent must write all the task_ids into a new file, filtering out the noise.
    Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        patched_api_requestor (MockerFixture)
        monkeypatch (pytest.MonkeyPatch)
        level_to_run (int)
    """
    task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
-    create_instructions_files(memory_management_agent, level_to_run, task_ids)
+    create_instructions_files(workspace, level_to_run, task_ids)
-    run_interaction_loop(
+    run_challenge(
-        monkeypatch,
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
        memory_management_agent,
        level_to_run + 2,
        challenge_name,
        level_to_run,
    )
-    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
-    content = read_file(file_path, memory_management_agent)
+    with open(file_path, "r") as file:
        content = file.read()
    for task_id in task_ids:
        assert task_id in content, f"Expected the file to contain {task_id}"
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
    level: int,
    task_ids: list,
    base_filename: str = "instructions_",
@@ -59,7 +53,7 @@ def create_instructions_files(
    Creates a series of instructions files for the memory challenge.
    Args:
        level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        num_files (int)
        task_ids (list)
        base_filename (str, optional)
@@ -67,9 +61,10 @@ def create_instructions_files(
    for i in range(1, level + 1):
        content = generate_content(i, task_ids, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
+        file_path = get_workspace_path(workspace, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        with open(file_path, "w") as file:
            file.write(content)
 def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -1,26 +1,24 @@
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.commands.file_operations import read_file
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import (
+from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
    generate_noise,
    get_workspace_path_from_agent,
    run_interaction_loop,
 )
 NOISE = 1200
 OUTPUT_LOCATION = "output.txt"
 USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
 def test_memory_challenge_c(
    memory_management_agent: Agent,
    patched_api_requestor: MockerFixture,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
    after seeing several of them.
    Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        patched_api_requestor (MockerFixture)
        monkeypatch (pytest.MonkeyPatch)
        level_to_run (int)
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
    level_silly_phrases = silly_phrases[:level_to_run]
    create_instructions_files(
-        memory_management_agent,
+        workspace,
        level_to_run,
        level_silly_phrases,
    )
-    run_interaction_loop(
+    run_challenge(
-        monkeypatch,
+        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
        memory_management_agent,
        level_to_run + 2,
        challenge_name,
        level_to_run,
    )
-    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
+
-    content = read_file(file_path, agent=memory_management_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
    content = read_file(file_path, agent=workspace)
    for phrase in level_silly_phrases:
        assert phrase in content, f"Expected the file to contain {phrase}"
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
    level: int,
    task_ids: list,
    base_filename: str = "instructions_",
@@ -76,7 +71,7 @@ def create_instructions_files(
    Creates a series of instructions files for the memory challenge.
    Args:
        level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        num_files (int)
        task_ids (list)
        base_filename (str, optional)
@@ -84,8 +79,9 @@ def create_instructions_files(
    for i in range(1, level + 1):
        content = generate_content(i, task_ids, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
+        file_path = get_workspace_path(workspace, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        with open(file_path, "w") as file:
            file.write(content)
 def generate_content(
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -4,28 +4,30 @@ from typing import Dict
 import pytest
 from pytest_mock import MockerFixture
-from autogpt.agent import Agent
+from autogpt.commands.file_operations import read_file
-from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_challenge
 LEVEL_CURRENTLY_BEATEN = 1
 MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"
 USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
 def test_memory_challenge_d(
    memory_management_agent: Agent,
    patched_api_requestor: MockerFixture,
    monkeypatch: pytest.MonkeyPatch,
    level_to_run: int,
    challenge_name: str,
    workspace: Workspace,
    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    The agent is given a series of events and must remember the respective beliefs of the characters.
    Args:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        user_selected_level (int)
    """
    sally_anne_test_phrases = [
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
        "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
    ]
    level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
-    create_instructions_files(
+    create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
-        memory_management_agent, level_to_run, level_sally_anne_test_phrases
+    run_challenge(
        challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
    )
    run_interaction_loop(
        monkeypatch,
        memory_management_agent,
        level_to_run + 2,
        challenge_name,
        level_to_run,
    )
    file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, memory_management_agent)
+    file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
    content = read_file(file_path, workspace)
    check_beliefs(content, level_to_run)
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
 def create_instructions_files(
-    memory_management_agent: Agent,
+    workspace: Workspace,
    level: int,
    test_phrases: list,
    base_filename: str = "instructions_",
@@ -185,15 +182,16 @@ def create_instructions_files(
    Creates a series of instructions files for the memory challenge.
    Args:
        level:
-        memory_management_agent (Agent)
+        workspace (Workspace)
        test_phrases (list)
        base_filename (str, optional)
    """
    for i in range(1, level + 1):
        content = generate_content(i, test_phrases, base_filename, level)
        file_name = f"{base_filename}{i}.txt"
-        file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
+        file_path = get_workspace_path(workspace, file_name)
-        write_to_file(file_path, content, memory_management_agent)
+        with open(file_path, "w") as file:
            file.write(content)
 def generate_content(
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -6,9 +6,10 @@ from typing import Any, Generator
 import pytest
 from autogpt.agent import Agent
 from autogpt.log_cycle.log_cycle import LogCycleHandler
 from autogpt.workspace import Workspace
 from benchmarks import run_task
 from tests.challenges.schema import Task
 def generate_noise(noise_size: int) -> str:
@@ -40,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
 def run_interaction_loop(
    monkeypatch: pytest.MonkeyPatch,
    agent: Agent,
    cycle_count: int,
    challenge_name: str,
    level_to_run: int,
 ) -> None:
    setup_mock_input(monkeypatch, cycle_count)
    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
    with contextlib.suppress(SystemExit):
        agent.start_interaction_loop()
 def setup_mock_log_cycle_agent_name(
    monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
 ) -> None:
@@ -69,13 +56,23 @@ def get_workspace_path(workspace: Workspace, file_name: str) -> str:
    return str(workspace.get_path(file_name))
 def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
    return str(agent.workspace.get_path(file_name))
 def copy_file_into_workspace(
-    agent: Agent, directory_path: Path, file_path: str
+    workspace: Workspace, directory_path: Path, file_path: str
 ) -> None:
-    workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
+    workspace_code_file_path = get_workspace_path(workspace, file_path)
    code_file_path = directory_path / file_path
    shutil.copy(code_file_path, workspace_code_file_path)
 def run_challenge(
    challenge_name: str,
    level_to_run: int,
    monkeypatch: pytest.MonkeyPatch,
    user_input: str,
    cycle_count: int,
 ) -> None:
    setup_mock_input(monkeypatch, cycle_count)
    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
    task = Task(user_input=user_input)
    with contextlib.suppress(SystemExit):
        run_task(task)
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -2,259 +2,46 @@ import pytest
 from autogpt.agent import Agent
 from autogpt.config import AIConfig, Config
-from autogpt.main import COMMAND_CATEGORIES
+from autogpt.memory.vector import get_memory
 from autogpt.memory.vector import NoMemory, get_memory
 from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
@pytest.fixture
-def agent_test_config(config: Config):
+def memory_json_file(config: Config):
-    config.set_continuous_mode(False)
+    was_memory_backend = config.memory_backend
    config.set_temperature(0)
    config.plain_output = True
    return config
-
+    config.set_memory_backend("json_file")
-@pytest.fixture
+    memory = get_memory(config)
 def memory_json_file(agent_test_config: Config):
    was_memory_backend = agent_test_config.memory_backend
    agent_test_config.set_memory_backend("json_file")
    memory = get_memory(agent_test_config)
    memory.clear()
    yield memory
-    agent_test_config.set_memory_backend(was_memory_backend)
+    config.set_memory_backend(was_memory_backend)
@pytest.fixture
-def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
+def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
    command_registry = CommandRegistry()
    command_registry.import_commands("autogpt.commands.file_operations")
    command_registry.import_commands("autogpt.commands.web_selenium")
    command_registry.import_commands("autogpt.app")
    command_registry.import_commands("autogpt.commands.task_statuses")
    ai_config = AIConfig(
-        ai_name="browse_website-GPT",
+        ai_name="Dummy Agent",
-        ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.",
+        ai_role="Dummy Role",
        ai_goals=[
-            "Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'",
+            "Dummy Task",
            'Write the price of the book to a file named "browse_website.txt".',
            "Use the task_complete command to complete the task.",
            "Do not use any other commands.",
        ],
    )
    ai_config.command_registry = command_registry
    system_prompt = ai_config.construct_full_prompt(agent_test_config)
    agent = Agent(
-        ai_name="",
+        ai_name="Dummy Agent",
        memory=memory_none,
        command_registry=command_registry,
        ai_config=ai_config,
        config=agent_test_config,
        next_action_count=0,
        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
    return agent
@pytest.fixture
 def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
    command_registry = get_command_registry(agent_test_config)
    ai_config = AIConfig(
        ai_name="Follow-Instructions-GPT",
        ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
        ai_goals=[
            "Use the command read_file to read the instructions_1.txt file",
            "Follow the instructions in the instructions_1.txt file",
        ],
    )
    ai_config.command_registry = command_registry
    system_prompt = ai_config.construct_full_prompt(agent_test_config)
    agent = Agent(
        ai_name="Follow-Instructions-GPT",
        memory=memory_json_file,
        command_registry=command_registry,
        ai_config=ai_config,
-        config=agent_test_config,
+        config=config,
        next_action_count=0,
-        system_prompt=system_prompt,
+        system_prompt="dummy_prompt",
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        triggering_prompt="dummy triggering prompt",
        workspace_directory=workspace.root,
    )
    return agent
@pytest.fixture
 def information_retrieval_agents(
    agent_test_config, memory_json_file, workspace: Workspace
 ):
    agents = []
    command_registry = get_command_registry(agent_test_config)
    ai_goals = [
        "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
        "Write to a file called output.txt containing tesla's revenue in 2022.",
        "Write to a file called output.txt containing tesla's revenue every year since its creation.",
    ]
    for ai_goal in ai_goals:
        ai_config = AIConfig(
            ai_name="Information Retrieval Agent",
            ai_role="an autonomous agent that specializes in retrieving information.",
            ai_goals=[ai_goal],
        )
        ai_config.command_registry = command_registry
        system_prompt = ai_config.construct_full_prompt(agent_test_config)
        agent_test_config.set_continuous_mode(False)
        agents.append(
            Agent(
                ai_name="Information Retrieval Agent",
                memory=memory_json_file,
                command_registry=command_registry,
                ai_config=ai_config,
                config=agent_test_config,
                next_action_count=0,
                system_prompt=system_prompt,
                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
                workspace_directory=workspace.root,
            )
        )
    return agents
@pytest.fixture
 def kubernetes_agent(
    agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
 ) -> Agent:
    command_registry = CommandRegistry()
    command_registry.import_commands("autogpt.commands.file_operations")
    command_registry.import_commands("autogpt.app")
    ai_config = AIConfig(
        ai_name="Kubernetes",
        ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
        ai_goals=[
            "Write a simple kubernetes deployment file and save it as a kube.yaml.",
            # You should make a simple nginx web server that uses docker and exposes the port 80.
        ],
    )
    ai_config.command_registry = command_registry
    system_prompt = ai_config.construct_full_prompt(agent_test_config)
    agent_test_config.set_continuous_mode(False)
    agent = Agent(
        ai_name="Kubernetes-Demo",
        memory=memory_json_file,
        command_registry=command_registry,
        ai_config=ai_config,
        config=agent_test_config,
        next_action_count=0,
        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
    return agent
@pytest.fixture
 def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
    command_registry = CommandRegistry()
    command_registry.import_commands("autogpt.commands.file_operations")
    command_registry.import_commands("autogpt.app")
    command_registry.import_commands("autogpt.commands.web_selenium")
    ai_config = AIConfig(
        ai_name="Get-PhysicsNobelPrize",
        ai_role="An autonomous agent that specializes in physics history.",
        ai_goals=[
            "Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
        ],
    )
    ai_config.command_registry = command_registry
    system_prompt = ai_config.construct_full_prompt(agent_test_config)
    agent_test_config.set_continuous_mode(False)
    agent = Agent(
        ai_name="Get-PhysicsNobelPrize",
        memory=memory_json_file,
        command_registry=command_registry,
        ai_config=ai_config,
        config=agent_test_config,
        next_action_count=0,
        system_prompt=system_prompt,
        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
        workspace_directory=workspace.root,
    )
    return agent
@pytest.fixture
 def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
    agents = []
    goals = [
        [
            "1- Run test.py using the execute_python_file command.",
            "2- Read code.py using the read_file command.",
            "3- Modify code.py using the write_to_file command."
            "Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
        ],
        [
            "1- Run test.py.",
            "2- Read code.py.",
            "3- Modify code.py."
            "Repeat step 1, 2 and 3 until test.py runs without errors.",
        ],
        ["1- Make test.py run without errors."],
    ]
    for goal in goals:
        ai_config = AIConfig(
            ai_name="Debug Code Agent",
            ai_role="an autonomous agent that specializes in debugging python code",
            ai_goals=goal,
        )
        command_registry = get_command_registry(agent_test_config)
        ai_config.command_registry = command_registry
        system_prompt = ai_config.construct_full_prompt(agent_test_config)
        agent_test_config.set_continuous_mode(False)
        agents.append(
            Agent(
                ai_name="Debug Code Agent",
                memory=memory_json_file,
                command_registry=command_registry,
                ai_config=ai_config,
                config=agent_test_config,
                next_action_count=0,
                system_prompt=system_prompt,
                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
                workspace_directory=workspace.root,
            )
        )
    return agents
 def get_command_registry(agent_test_config):
    command_registry = CommandRegistry()
    enabled_command_categories = [
        x
        for x in COMMAND_CATEGORIES
        if x not in agent_test_config.disabled_command_categories
    ]
    for command_category in enabled_command_categories:
        command_registry.import_commands(command_category)
    return command_registry