Link all challenges to benchmark python hook (#4786)

This commit is contained in:
merwanehamadi
2023-06-24 06:20:58 -07:00
committed by GitHub
parent 307f6e50ad
commit cfdb24efac
13 changed files with 218 additions and 402 deletions

View File

@@ -1,9 +1,10 @@
from autogpt.agent import Agent from autogpt.agent import Agent
from autogpt.config import AIConfig, Config from autogpt.config import AIConfig, Config
from autogpt.main import COMMAND_CATEGORIES
from autogpt.memory.vector import get_memory from autogpt.memory.vector import get_memory
from autogpt.models.command_registry import CommandRegistry
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace from autogpt.workspace import Workspace
from tests.integration.agent_factory import get_command_registry
def run_task(task) -> None: def run_task(task) -> None:
@@ -39,3 +40,13 @@ def bootstrap_agent(task):
triggering_prompt=DEFAULT_TRIGGERING_PROMPT, triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=str(workspace_directory_path), workspace_directory=str(workspace_directory_path),
) )
def get_command_registry(config):
command_registry = CommandRegistry()
enabled_command_categories = [
x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
]
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
return command_registry

View File

@@ -1,25 +1,34 @@
import pytest import pytest
from autogpt.agent import Agent from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import run_interaction_loop from tests.challenges.utils import run_challenge
CYCLE_COUNT = 2 CYCLE_COUNT = 2
USER_INPUTS = [
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
]
@challenge() @challenge()
def test_browse_website( def test_browse_website(
browser_agent: Agent,
patched_api_requestor: None, patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
file_path = browser_agent.workspace.get_path("browse_website.txt") run_challenge(
run_interaction_loop( challenge_name,
monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
) )
# content = read_file(file_path, config) file_path = workspace.get_path("browse_website.txt")
content = open(file_path, encoding="utf-8").read()
with open(file_path, "r") as file:
content = file.read()
assert "£25.89" in content, f"Expected £25.89, got {content}" assert "£25.89" in content, f"Expected £25.89, got {content}"

View File

@@ -1,14 +1,8 @@
import pytest import pytest
from autogpt.workspace import Workspace from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.schema import Task from tests.challenges.utils import get_workspace_path, run_challenge
from tests.challenges.utils import (
get_workspace_path,
setup_mock_input,
setup_mock_log_cycle_agent_name,
)
CYCLE_COUNT_PER_LEVEL = [1, 1] CYCLE_COUNT_PER_LEVEL = [1, 1]
EXPECTED_OUTPUTS_PER_LEVEL = [ EXPECTED_OUTPUTS_PER_LEVEL = [
@@ -23,18 +17,20 @@ USER_INPUTS = [
@challenge() @challenge()
def test_write_file( def test_write_file(
workspace: Workspace,
patched_api_requestor: None, patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture, patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
user_input = USER_INPUTS[level_to_run - 1] run_challenge(
setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1]) challenge_name,
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run) level_to_run,
task = Task(user_input=user_input) monkeypatch,
run_task(task) USER_INPUTS[level_to_run - 1],
CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
)
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1] expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]

View File

@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.agent import Agent
from autogpt.commands.execute_code import execute_python_file from autogpt.commands.execute_code import execute_python_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import ( from tests.challenges.utils import (
copy_file_into_workspace, copy_file_into_workspace,
get_workspace_path_from_agent, get_workspace_path,
run_interaction_loop, run_challenge,
) )
CYCLE_COUNT = 5 CYCLE_COUNT = 5
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
DIRECTORY_PATH = Path(__file__).parent / "data" DIRECTORY_PATH = Path(__file__).parent / "data"
CODE_FILE_PATH = "code.py" CODE_FILE_PATH = "code.py"
TEST_FILE_PATH = "test.py" TEST_FILE_PATH = "test.py"
USER_INPUTS = [
"1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
"1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
"Make test.py run without errors.",
]
@challenge() @challenge()
def test_debug_code_challenge_a( def test_debug_code_challenge_a(
debug_code_agents: Agent, dummy_agent: Agent,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
Test whether the agent can debug a simple code snippet. Test whether the agent can debug a simple code snippet.
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
:patched_api_requestor: Sends api requests to our API CI pipeline :patched_api_requestor: Sends api requests to our API CI pipeline
:level_to_run: The level to run. :level_to_run: The level to run.
""" """
debug_code_agent = debug_code_agents[level_to_run - 1]
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH) copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH) copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
run_interaction_loop( run_challenge(
monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
) )
output = execute_python_file( output = execute_python_file(
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH), get_workspace_path(workspace, TEST_FILE_PATH),
debug_code_agent, dummy_agent,
) )
assert "error" not in output.lower(), f"Errors found in output: {output}!" assert "error" not in output.lower(), f"Errors found in output: {output}!"

View File

@@ -1,24 +1,30 @@
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.commands.file_operations import read_file from autogpt.workspace import Workspace
from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3 CYCLE_COUNT = 3
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]] EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
from autogpt.agent import Agent
OUTPUT_LOCATION = "output.txt" OUTPUT_LOCATION = "output.txt"
USER_INPUTS = [
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
"Write to a file called output.txt containing tesla's revenue in 2022.",
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
]
@challenge() @challenge()
def test_information_retrieval_challenge_a( def test_information_retrieval_challenge_a(
information_retrieval_agents: Agent,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content. Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -26,19 +32,17 @@ def test_information_retrieval_challenge_a(
:param get_company_revenue_agent: The agent to test. :param get_company_revenue_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins. :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
""" """
information_retrieval_agent = information_retrieval_agents[level_to_run - 1] run_challenge(
run_interaction_loop(
monkeypatch,
information_retrieval_agent,
CYCLE_COUNT,
challenge_name, challenge_name,
level_to_run, level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
) )
file_path = get_workspace_path_from_agent( file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
information_retrieval_agent, OUTPUT_LOCATION with open(file_path, "r") as file:
) content = file.read()
content = read_file(file_path, information_retrieval_agent)
expected_revenues = EXPECTED_REVENUES[level_to_run - 1] expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
for revenue in expected_revenues: for revenue in expected_revenues:
assert ( assert (

View File

@@ -1,24 +1,25 @@
import contextlib
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.workspace import Workspace
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3 CYCLE_COUNT = 3
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt" OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
USER_INPUTS = [
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
]
@challenge() @challenge()
def test_information_retrieval_challenge_b( def test_information_retrieval_challenge_b(
get_nobel_prize_agent: Agent,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content. Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
:param level_to_run: The level to run. :param level_to_run: The level to run.
""" """
with contextlib.suppress(SystemExit): run_challenge(
run_interaction_loop(
monkeypatch,
get_nobel_prize_agent,
CYCLE_COUNT,
challenge_name, challenge_name,
level_to_run, level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
) )
file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
content = read_file(file_path, get_nobel_prize_agent) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content = file.read()
assert "Andre Geim" in content, "Expected the file to contain Andre Geim" assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
assert ( assert (
"Konstantin Novoselov" in content "Konstantin Novoselov" in content

View File

@@ -1,23 +1,26 @@
from typing import Any, Dict
import pytest import pytest
import yaml import yaml
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.workspace import Workspace
from autogpt.commands.file_operations import read_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3 CYCLE_COUNT = 3
OUTPUT_LOCATION = "kube.yaml" OUTPUT_LOCATION = "kube.yaml"
USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
@challenge() @challenge()
def test_kubernetes_template_challenge_a( def test_kubernetes_template_challenge_a(
kubernetes_agent: Agent,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
Test the challenge_a function in a given agent by mocking user inputs Test the challenge_a function in a given agent by mocking user inputs
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
monkeypatch (pytest.MonkeyPatch) monkeypatch (pytest.MonkeyPatch)
level_to_run (int) level_to_run (int)
""" """
run_interaction_loop( run_challenge(
monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
) )
file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, kubernetes_agent) with open(file_path, "r") as file:
content_string = file.read()
for word in ["apiVersion", "kind", "metadata", "spec"]: for word in ["apiVersion", "kind", "metadata", "spec"]:
assert word in content, f"Expected the file to contain {word}" assert word in content_string, f"Expected the file to contain {word}"
content = yaml.safe_load(content) yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
for word in ["Service", "Deployment", "Pod"]: for word in ["Service", "Deployment", "Pod"]:
assert word in content["kind"], f"Expected the file to contain {word}" assert word in yaml_as_dict.get(
"kind", ""
), f"Expected the file to contain {word}"

View File

@@ -1,49 +1,48 @@
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.workspace import Workspace
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop from tests.challenges.utils import get_workspace_path, run_challenge
OUTPUT_LOCATION = "output.txt" OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge() @challenge()
def test_memory_challenge_a( def test_memory_challenge_a(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
The agent reads a file containing a task_id. Then, it reads a series of other files. The agent reads a file containing a task_id. Then, it reads a series of other files.
After reading 'n' files, the agent must write the task_id into a new file. After reading 'n' files, the agent must write the task_id into a new file.
Args: Args:
memory_management_agent (Agent) workspace (Workspace)
patched_api_requestor (MockerFixture) patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch) monkeypatch (pytest.MonkeyPatch)
level_to_run (int) level_to_run (int)
""" """
task_id = "2314" task_id = "2314"
create_instructions_files(memory_management_agent, level_to_run, task_id) create_instructions_files(workspace, level_to_run, task_id)
run_interaction_loop( run_challenge(
monkeypatch, challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
) )
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent) with open(file_path, "r") as file:
content = file.read()
assert task_id in content, f"Expected the file to contain {task_id}" assert task_id in content, f"Expected the file to contain {task_id}"
def create_instructions_files( def create_instructions_files(
memory_management_agent: Agent, workspace: Workspace,
num_files: int, num_files: int,
task_id: str, task_id: str,
base_filename: str = "instructions_", base_filename: str = "instructions_",
@@ -51,7 +50,7 @@ def create_instructions_files(
""" """
Creates a series of instructions files for the memory challenge. Creates a series of instructions files for the memory challenge.
Args: Args:
memory_management_agent (Agent) workspace (Workspace)
num_files (int) num_files (int)
task_id (str) task_id (str)
base_filename (str, optional) base_filename (str, optional)
@@ -59,8 +58,9 @@ def create_instructions_files(
for i in range(1, num_files + 1): for i in range(1, num_files + 1):
content = generate_content(i, task_id, base_filename, num_files) content = generate_content(i, task_id, base_filename, num_files)
file_name = f"{base_filename}{i}.txt" file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name) file_path = get_workspace_path(workspace, file_name)
write_to_file(file_path, content, memory_management_agent) with open(file_path, "w") as file:
file.write(content)
def generate_content( def generate_content(

View File

@@ -1,56 +1,50 @@
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.workspace import Workspace
from autogpt.commands.file_operations import read_file, write_to_file
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import ( from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
generate_noise,
get_workspace_path_from_agent,
run_interaction_loop,
)
NOISE = 1000 NOISE = 1000
OUTPUT_LOCATION = "output.txt" OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge() @challenge()
def test_memory_challenge_b( def test_memory_challenge_b(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files, The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
the agent must write all the task_ids into a new file, filtering out the noise. the agent must write all the task_ids into a new file, filtering out the noise.
Args: Args:
memory_management_agent (Agent) workspace (Workspace)
patched_api_requestor (MockerFixture) patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch) monkeypatch (pytest.MonkeyPatch)
level_to_run (int) level_to_run (int)
""" """
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)] task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
create_instructions_files(memory_management_agent, level_to_run, task_ids) create_instructions_files(workspace, level_to_run, task_ids)
run_interaction_loop( run_challenge(
monkeypatch, challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
) )
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent) with open(file_path, "r") as file:
content = file.read()
for task_id in task_ids: for task_id in task_ids:
assert task_id in content, f"Expected the file to contain {task_id}" assert task_id in content, f"Expected the file to contain {task_id}"
def create_instructions_files( def create_instructions_files(
memory_management_agent: Agent, workspace: Workspace,
level: int, level: int,
task_ids: list, task_ids: list,
base_filename: str = "instructions_", base_filename: str = "instructions_",
@@ -59,7 +53,7 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge. Creates a series of instructions files for the memory challenge.
Args: Args:
level: level:
memory_management_agent (Agent) workspace (Workspace)
num_files (int) num_files (int)
task_ids (list) task_ids (list)
base_filename (str, optional) base_filename (str, optional)
@@ -67,9 +61,10 @@ def create_instructions_files(
for i in range(1, level + 1): for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level) content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt" file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name) file_path = get_workspace_path(workspace, file_name)
write_to_file(file_path, content, memory_management_agent) with open(file_path, "w") as file:
file.write(content)
def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str: def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:

View File

@@ -1,26 +1,24 @@
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.commands.file_operations import read_file
from autogpt.commands.file_operations import read_file, write_to_file from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import ( from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
generate_noise,
get_workspace_path_from_agent,
run_interaction_loop,
)
NOISE = 1200 NOISE = 1200
OUTPUT_LOCATION = "output.txt" OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge() @challenge()
def test_memory_challenge_c( def test_memory_challenge_c(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
Instead of reading task Ids from files as with the previous challenges, the agent now must remember Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
after seeing several of them. after seeing several of them.
Args: Args:
memory_management_agent (Agent) workspace (Workspace)
patched_api_requestor (MockerFixture) patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch) monkeypatch (pytest.MonkeyPatch)
level_to_run (int) level_to_run (int)
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
level_silly_phrases = silly_phrases[:level_to_run] level_silly_phrases = silly_phrases[:level_to_run]
create_instructions_files( create_instructions_files(
memory_management_agent, workspace,
level_to_run, level_to_run,
level_silly_phrases, level_silly_phrases,
) )
run_interaction_loop( run_challenge(
monkeypatch, challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
) )
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, agent=memory_management_agent) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, agent=workspace)
for phrase in level_silly_phrases: for phrase in level_silly_phrases:
assert phrase in content, f"Expected the file to contain {phrase}" assert phrase in content, f"Expected the file to contain {phrase}"
def create_instructions_files( def create_instructions_files(
memory_management_agent: Agent, workspace: Workspace,
level: int, level: int,
task_ids: list, task_ids: list,
base_filename: str = "instructions_", base_filename: str = "instructions_",
@@ -76,7 +71,7 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge. Creates a series of instructions files for the memory challenge.
Args: Args:
level: level:
memory_management_agent (Agent) workspace (Workspace)
num_files (int) num_files (int)
task_ids (list) task_ids (list)
base_filename (str, optional) base_filename (str, optional)
@@ -84,8 +79,9 @@ def create_instructions_files(
for i in range(1, level + 1): for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level) content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt" file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name) file_path = get_workspace_path(workspace, file_name)
write_to_file(file_path, content, memory_management_agent) with open(file_path, "w") as file:
file.write(content)
def generate_content( def generate_content(

View File

@@ -4,28 +4,30 @@ from typing import Dict
import pytest import pytest
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from autogpt.agent import Agent from autogpt.commands.file_operations import read_file
from autogpt.commands.file_operations import read_file, write_to_file from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop from tests.challenges.utils import get_workspace_path, run_challenge
LEVEL_CURRENTLY_BEATEN = 1 LEVEL_CURRENTLY_BEATEN = 1
MAX_LEVEL = 5 MAX_LEVEL = 5
OUTPUT_LOCATION = "output.txt" OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge() @challenge()
def test_memory_challenge_d( def test_memory_challenge_d(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture, patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch, monkeypatch: pytest.MonkeyPatch,
level_to_run: int, level_to_run: int,
challenge_name: str, challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None: ) -> None:
""" """
The agent is given a series of events and must remember the respective beliefs of the characters. The agent is given a series of events and must remember the respective beliefs of the characters.
Args: Args:
memory_management_agent (Agent) workspace (Workspace)
user_selected_level (int) user_selected_level (int)
""" """
sally_anne_test_phrases = [ sally_anne_test_phrases = [
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
"Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).", "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
] ]
level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run] level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
create_instructions_files( create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
memory_management_agent, level_to_run, level_sally_anne_test_phrases run_challenge(
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
) )
run_interaction_loop(
monkeypatch,
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent) file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, workspace)
check_beliefs(content, level_to_run) check_beliefs(content, level_to_run)
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
def create_instructions_files( def create_instructions_files(
memory_management_agent: Agent, workspace: Workspace,
level: int, level: int,
test_phrases: list, test_phrases: list,
base_filename: str = "instructions_", base_filename: str = "instructions_",
@@ -185,15 +182,16 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge. Creates a series of instructions files for the memory challenge.
Args: Args:
level: level:
memory_management_agent (Agent) workspace (Workspace)
test_phrases (list) test_phrases (list)
base_filename (str, optional) base_filename (str, optional)
""" """
for i in range(1, level + 1): for i in range(1, level + 1):
content = generate_content(i, test_phrases, base_filename, level) content = generate_content(i, test_phrases, base_filename, level)
file_name = f"{base_filename}{i}.txt" file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name) file_path = get_workspace_path(workspace, file_name)
write_to_file(file_path, content, memory_management_agent) with open(file_path, "w") as file:
file.write(content)
def generate_content( def generate_content(

View File

@@ -6,9 +6,10 @@ from typing import Any, Generator
import pytest import pytest
from autogpt.agent import Agent
from autogpt.log_cycle.log_cycle import LogCycleHandler from autogpt.log_cycle.log_cycle import LogCycleHandler
from autogpt.workspace import Workspace from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.schema import Task
def generate_noise(noise_size: int) -> str: def generate_noise(noise_size: int) -> str:
@@ -40,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen)) monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
def run_interaction_loop(
monkeypatch: pytest.MonkeyPatch,
agent: Agent,
cycle_count: int,
challenge_name: str,
level_to_run: int,
) -> None:
setup_mock_input(monkeypatch, cycle_count)
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
with contextlib.suppress(SystemExit):
agent.start_interaction_loop()
def setup_mock_log_cycle_agent_name( def setup_mock_log_cycle_agent_name(
monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
) -> None: ) -> None:
@@ -69,13 +56,23 @@ def get_workspace_path(workspace: Workspace, file_name: str) -> str:
return str(workspace.get_path(file_name)) return str(workspace.get_path(file_name))
def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
return str(agent.workspace.get_path(file_name))
def copy_file_into_workspace( def copy_file_into_workspace(
agent: Agent, directory_path: Path, file_path: str workspace: Workspace, directory_path: Path, file_path: str
) -> None: ) -> None:
workspace_code_file_path = get_workspace_path_from_agent(agent, file_path) workspace_code_file_path = get_workspace_path(workspace, file_path)
code_file_path = directory_path / file_path code_file_path = directory_path / file_path
shutil.copy(code_file_path, workspace_code_file_path) shutil.copy(code_file_path, workspace_code_file_path)
def run_challenge(
challenge_name: str,
level_to_run: int,
monkeypatch: pytest.MonkeyPatch,
user_input: str,
cycle_count: int,
) -> None:
setup_mock_input(monkeypatch, cycle_count)
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
task = Task(user_input=user_input)
with contextlib.suppress(SystemExit):
run_task(task)

View File

@@ -2,259 +2,46 @@ import pytest
from autogpt.agent import Agent from autogpt.agent import Agent
from autogpt.config import AIConfig, Config from autogpt.config import AIConfig, Config
from autogpt.main import COMMAND_CATEGORIES from autogpt.memory.vector import get_memory
from autogpt.memory.vector import NoMemory, get_memory
from autogpt.models.command_registry import CommandRegistry from autogpt.models.command_registry import CommandRegistry
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace from autogpt.workspace import Workspace
@pytest.fixture @pytest.fixture
def agent_test_config(config: Config): def memory_json_file(config: Config):
config.set_continuous_mode(False) was_memory_backend = config.memory_backend
config.set_temperature(0)
config.plain_output = True
return config
config.set_memory_backend("json_file")
@pytest.fixture memory = get_memory(config)
def memory_json_file(agent_test_config: Config):
was_memory_backend = agent_test_config.memory_backend
agent_test_config.set_memory_backend("json_file")
memory = get_memory(agent_test_config)
memory.clear() memory.clear()
yield memory yield memory
agent_test_config.set_memory_backend(was_memory_backend) config.set_memory_backend(was_memory_backend)
@pytest.fixture @pytest.fixture
def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace): def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry() command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.commands.web_selenium")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.task_statuses")
ai_config = AIConfig( ai_config = AIConfig(
ai_name="browse_website-GPT", ai_name="Dummy Agent",
ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.", ai_role="Dummy Role",
ai_goals=[ ai_goals=[
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'", "Dummy Task",
'Write the price of the book to a file named "browse_website.txt".',
"Use the task_complete command to complete the task.",
"Do not use any other commands.",
], ],
) )
ai_config.command_registry = command_registry ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent = Agent( agent = Agent(
ai_name="", ai_name="Dummy Agent",
memory=memory_none, memory=memory_json_file,
command_registry=command_registry, command_registry=command_registry,
ai_config=ai_config, ai_config=ai_config,
config=agent_test_config, config=config,
next_action_count=0, next_action_count=0,
system_prompt=system_prompt, system_prompt="dummy_prompt",
triggering_prompt=DEFAULT_TRIGGERING_PROMPT, triggering_prompt="dummy triggering prompt",
workspace_directory=workspace.root, workspace_directory=workspace.root,
) )
return agent return agent
@pytest.fixture
def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = get_command_registry(agent_test_config)
ai_config = AIConfig(
ai_name="Follow-Instructions-GPT",
ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
ai_goals=[
"Use the command read_file to read the instructions_1.txt file",
"Follow the instructions in the instructions_1.txt file",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent = Agent(
ai_name="Follow-Instructions-GPT",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def information_retrieval_agents(
agent_test_config, memory_json_file, workspace: Workspace
):
agents = []
command_registry = get_command_registry(agent_test_config)
ai_goals = [
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
"Write to a file called output.txt containing tesla's revenue in 2022.",
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
]
for ai_goal in ai_goals:
ai_config = AIConfig(
ai_name="Information Retrieval Agent",
ai_role="an autonomous agent that specializes in retrieving information.",
ai_goals=[ai_goal],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agents.append(
Agent(
ai_name="Information Retrieval Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
@pytest.fixture
def kubernetes_agent(
agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
) -> Agent:
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.app")
ai_config = AIConfig(
ai_name="Kubernetes",
ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
ai_goals=[
"Write a simple kubernetes deployment file and save it as a kube.yaml.",
# You should make a simple nginx web server that uses docker and exposes the port 80.
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agent = Agent(
ai_name="Kubernetes-Demo",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.web_selenium")
ai_config = AIConfig(
ai_name="Get-PhysicsNobelPrize",
ai_role="An autonomous agent that specializes in physics history.",
ai_goals=[
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agent = Agent(
ai_name="Get-PhysicsNobelPrize",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
agents = []
goals = [
[
"1- Run test.py using the execute_python_file command.",
"2- Read code.py using the read_file command.",
"3- Modify code.py using the write_to_file command."
"Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
],
[
"1- Run test.py.",
"2- Read code.py.",
"3- Modify code.py."
"Repeat step 1, 2 and 3 until test.py runs without errors.",
],
["1- Make test.py run without errors."],
]
for goal in goals:
ai_config = AIConfig(
ai_name="Debug Code Agent",
ai_role="an autonomous agent that specializes in debugging python code",
ai_goals=goal,
)
command_registry = get_command_registry(agent_test_config)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agents.append(
Agent(
ai_name="Debug Code Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
def get_command_registry(agent_test_config):
command_registry = CommandRegistry()
enabled_command_categories = [
x
for x in COMMAND_CATEGORIES
if x not in agent_test_config.disabled_command_categories
]
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
return command_registry