mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-22 08:24:26 +01:00
Link all challenges to benchmark python hook (#4786)
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.config import AIConfig, Config
|
||||
from autogpt.main import COMMAND_CATEGORIES
|
||||
from autogpt.memory.vector import get_memory
|
||||
from autogpt.models.command_registry import CommandRegistry
|
||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.integration.agent_factory import get_command_registry
|
||||
|
||||
|
||||
def run_task(task) -> None:
|
||||
@@ -39,3 +40,13 @@ def bootstrap_agent(task):
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=str(workspace_directory_path),
|
||||
)
|
||||
|
||||
|
||||
def get_command_registry(config):
|
||||
command_registry = CommandRegistry()
|
||||
enabled_command_categories = [
|
||||
x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
|
||||
]
|
||||
for command_category in enabled_command_categories:
|
||||
command_registry.import_commands(command_category)
|
||||
return command_registry
|
||||
|
||||
@@ -1,25 +1,34 @@
|
||||
import pytest
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import run_interaction_loop
|
||||
from tests.challenges.utils import run_challenge
|
||||
|
||||
CYCLE_COUNT = 2
|
||||
USER_INPUTS = [
|
||||
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
|
||||
]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_browse_website(
|
||||
browser_agent: Agent,
|
||||
patched_api_requestor: None,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
file_path = browser_agent.workspace.get_path("browse_website.txt")
|
||||
run_interaction_loop(
|
||||
monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
# content = read_file(file_path, config)
|
||||
content = open(file_path, encoding="utf-8").read()
|
||||
file_path = workspace.get_path("browse_website.txt")
|
||||
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
assert "£25.89" in content, f"Expected £25.89, got {content}"
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
import pytest
|
||||
|
||||
from autogpt.workspace import Workspace
|
||||
from benchmarks import run_task
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.schema import Task
|
||||
from tests.challenges.utils import (
|
||||
get_workspace_path,
|
||||
setup_mock_input,
|
||||
setup_mock_log_cycle_agent_name,
|
||||
)
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
CYCLE_COUNT_PER_LEVEL = [1, 1]
|
||||
EXPECTED_OUTPUTS_PER_LEVEL = [
|
||||
@@ -23,18 +17,20 @@ USER_INPUTS = [
|
||||
|
||||
@challenge()
|
||||
def test_write_file(
|
||||
workspace: Workspace,
|
||||
patched_api_requestor: None,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
user_input = USER_INPUTS[level_to_run - 1]
|
||||
setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
|
||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
||||
task = Task(user_input=user_input)
|
||||
run_task(task)
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
|
||||
)
|
||||
|
||||
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
|
||||
|
||||
|
||||
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.execute_code import execute_python_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import (
|
||||
copy_file_into_workspace,
|
||||
get_workspace_path_from_agent,
|
||||
run_interaction_loop,
|
||||
get_workspace_path,
|
||||
run_challenge,
|
||||
)
|
||||
|
||||
CYCLE_COUNT = 5
|
||||
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
|
||||
DIRECTORY_PATH = Path(__file__).parent / "data"
|
||||
CODE_FILE_PATH = "code.py"
|
||||
TEST_FILE_PATH = "test.py"
|
||||
USER_INPUTS = [
|
||||
"1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
|
||||
"1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
||||
"Make test.py run without errors.",
|
||||
]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_debug_code_challenge_a(
|
||||
debug_code_agents: Agent,
|
||||
dummy_agent: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: MockerFixture,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test whether the agent can debug a simple code snippet.
|
||||
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
|
||||
:patched_api_requestor: Sends api requests to our API CI pipeline
|
||||
:level_to_run: The level to run.
|
||||
"""
|
||||
debug_code_agent = debug_code_agents[level_to_run - 1]
|
||||
|
||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
|
||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
|
||||
copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
|
||||
copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
|
||||
|
||||
run_interaction_loop(
|
||||
monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
output = execute_python_file(
|
||||
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
|
||||
debug_code_agent,
|
||||
get_workspace_path(workspace, TEST_FILE_PATH),
|
||||
dummy_agent,
|
||||
)
|
||||
|
||||
assert "error" not in output.lower(), f"Errors found in output: {output}!"
|
||||
|
||||
@@ -1,24 +1,30 @@
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
CYCLE_COUNT = 3
|
||||
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
|
||||
from autogpt.agent import Agent
|
||||
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
USER_INPUTS = [
|
||||
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
|
||||
"Write to a file called output.txt containing tesla's revenue in 2022.",
|
||||
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
|
||||
]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_information_retrieval_challenge_a(
|
||||
information_retrieval_agents: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: MockerFixture,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
|
||||
@@ -26,19 +32,17 @@ def test_information_retrieval_challenge_a(
|
||||
:param get_company_revenue_agent: The agent to test.
|
||||
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
|
||||
"""
|
||||
information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
information_retrieval_agent,
|
||||
CYCLE_COUNT,
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
file_path = get_workspace_path_from_agent(
|
||||
information_retrieval_agent, OUTPUT_LOCATION
|
||||
)
|
||||
content = read_file(file_path, information_retrieval_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
|
||||
for revenue in expected_revenues:
|
||||
assert (
|
||||
|
||||
@@ -1,24 +1,25 @@
|
||||
import contextlib
|
||||
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
CYCLE_COUNT = 3
|
||||
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
|
||||
USER_INPUTS = [
|
||||
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
|
||||
]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_information_retrieval_challenge_b(
|
||||
get_nobel_prize_agent: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: MockerFixture,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
|
||||
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
|
||||
:param level_to_run: The level to run.
|
||||
"""
|
||||
|
||||
with contextlib.suppress(SystemExit):
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
get_nobel_prize_agent,
|
||||
CYCLE_COUNT,
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
)
|
||||
file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
content = read_file(file_path, get_nobel_prize_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
|
||||
assert (
|
||||
"Konstantin Novoselov" in content
|
||||
|
||||
@@ -1,23 +1,26 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
CYCLE_COUNT = 3
|
||||
OUTPUT_LOCATION = "kube.yaml"
|
||||
USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_kubernetes_template_challenge_a(
|
||||
kubernetes_agent: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: MockerFixture,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test the challenge_a function in a given agent by mocking user inputs
|
||||
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
|
||||
monkeypatch (pytest.MonkeyPatch)
|
||||
level_to_run (int)
|
||||
"""
|
||||
run_interaction_loop(
|
||||
monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
|
||||
content = read_file(file_path, kubernetes_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
with open(file_path, "r") as file:
|
||||
content_string = file.read()
|
||||
|
||||
for word in ["apiVersion", "kind", "metadata", "spec"]:
|
||||
assert word in content, f"Expected the file to contain {word}"
|
||||
assert word in content_string, f"Expected the file to contain {word}"
|
||||
|
||||
content = yaml.safe_load(content)
|
||||
yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
|
||||
for word in ["Service", "Deployment", "Pod"]:
|
||||
assert word in content["kind"], f"Expected the file to contain {word}"
|
||||
assert word in yaml_as_dict.get(
|
||||
"kind", ""
|
||||
), f"Expected the file to contain {word}"
|
||||
|
||||
@@ -1,49 +1,48 @@
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file, write_to_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
|
||||
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_memory_challenge_a(
|
||||
memory_management_agent: Agent,
|
||||
patched_api_requestor: MockerFixture,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
The agent reads a file containing a task_id. Then, it reads a series of other files.
|
||||
After reading 'n' files, the agent must write the task_id into a new file.
|
||||
Args:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
patched_api_requestor (MockerFixture)
|
||||
monkeypatch (pytest.MonkeyPatch)
|
||||
level_to_run (int)
|
||||
"""
|
||||
task_id = "2314"
|
||||
create_instructions_files(memory_management_agent, level_to_run, task_id)
|
||||
create_instructions_files(workspace, level_to_run, task_id)
|
||||
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
memory_management_agent,
|
||||
level_to_run + 2,
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
run_challenge(
|
||||
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||
)
|
||||
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
||||
content = read_file(file_path, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
assert task_id in content, f"Expected the file to contain {task_id}"
|
||||
|
||||
|
||||
def create_instructions_files(
|
||||
memory_management_agent: Agent,
|
||||
workspace: Workspace,
|
||||
num_files: int,
|
||||
task_id: str,
|
||||
base_filename: str = "instructions_",
|
||||
@@ -51,7 +50,7 @@ def create_instructions_files(
|
||||
"""
|
||||
Creates a series of instructions files for the memory challenge.
|
||||
Args:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
num_files (int)
|
||||
task_id (str)
|
||||
base_filename (str, optional)
|
||||
@@ -59,8 +58,9 @@ def create_instructions_files(
|
||||
for i in range(1, num_files + 1):
|
||||
content = generate_content(i, task_id, base_filename, num_files)
|
||||
file_name = f"{base_filename}{i}.txt"
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
||||
write_to_file(file_path, content, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, file_name)
|
||||
with open(file_path, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
def generate_content(
|
||||
|
||||
@@ -1,56 +1,50 @@
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file, write_to_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import (
|
||||
generate_noise,
|
||||
get_workspace_path_from_agent,
|
||||
run_interaction_loop,
|
||||
)
|
||||
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
|
||||
|
||||
NOISE = 1000
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_memory_challenge_b(
|
||||
memory_management_agent: Agent,
|
||||
patched_api_requestor: MockerFixture,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
|
||||
the agent must write all the task_ids into a new file, filtering out the noise.
|
||||
|
||||
Args:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
patched_api_requestor (MockerFixture)
|
||||
monkeypatch (pytest.MonkeyPatch)
|
||||
level_to_run (int)
|
||||
"""
|
||||
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
|
||||
create_instructions_files(memory_management_agent, level_to_run, task_ids)
|
||||
create_instructions_files(workspace, level_to_run, task_ids)
|
||||
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
memory_management_agent,
|
||||
level_to_run + 2,
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
run_challenge(
|
||||
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||
)
|
||||
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
||||
content = read_file(file_path, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
with open(file_path, "r") as file:
|
||||
content = file.read()
|
||||
for task_id in task_ids:
|
||||
assert task_id in content, f"Expected the file to contain {task_id}"
|
||||
|
||||
|
||||
def create_instructions_files(
|
||||
memory_management_agent: Agent,
|
||||
workspace: Workspace,
|
||||
level: int,
|
||||
task_ids: list,
|
||||
base_filename: str = "instructions_",
|
||||
@@ -59,7 +53,7 @@ def create_instructions_files(
|
||||
Creates a series of instructions files for the memory challenge.
|
||||
Args:
|
||||
level:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
num_files (int)
|
||||
task_ids (list)
|
||||
base_filename (str, optional)
|
||||
@@ -67,9 +61,10 @@ def create_instructions_files(
|
||||
for i in range(1, level + 1):
|
||||
content = generate_content(i, task_ids, base_filename, level)
|
||||
file_name = f"{base_filename}{i}.txt"
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
||||
file_path = get_workspace_path(workspace, file_name)
|
||||
|
||||
write_to_file(file_path, content, memory_management_agent)
|
||||
with open(file_path, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
|
||||
|
||||
@@ -1,26 +1,24 @@
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file, write_to_file
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import (
|
||||
generate_noise,
|
||||
get_workspace_path_from_agent,
|
||||
run_interaction_loop,
|
||||
)
|
||||
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
|
||||
|
||||
NOISE = 1200
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_memory_challenge_c(
|
||||
memory_management_agent: Agent,
|
||||
patched_api_requestor: MockerFixture,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Instead of reading task Ids from files as with the previous challenges, the agent now must remember
|
||||
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
|
||||
after seeing several of them.
|
||||
|
||||
Args:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
patched_api_requestor (MockerFixture)
|
||||
monkeypatch (pytest.MonkeyPatch)
|
||||
level_to_run (int)
|
||||
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
|
||||
|
||||
level_silly_phrases = silly_phrases[:level_to_run]
|
||||
create_instructions_files(
|
||||
memory_management_agent,
|
||||
workspace,
|
||||
level_to_run,
|
||||
level_silly_phrases,
|
||||
)
|
||||
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
memory_management_agent,
|
||||
level_to_run + 2,
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
run_challenge(
|
||||
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||
)
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
||||
content = read_file(file_path, agent=memory_management_agent)
|
||||
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
content = read_file(file_path, agent=workspace)
|
||||
for phrase in level_silly_phrases:
|
||||
assert phrase in content, f"Expected the file to contain {phrase}"
|
||||
|
||||
|
||||
def create_instructions_files(
|
||||
memory_management_agent: Agent,
|
||||
workspace: Workspace,
|
||||
level: int,
|
||||
task_ids: list,
|
||||
base_filename: str = "instructions_",
|
||||
@@ -76,7 +71,7 @@ def create_instructions_files(
|
||||
Creates a series of instructions files for the memory challenge.
|
||||
Args:
|
||||
level:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
num_files (int)
|
||||
task_ids (list)
|
||||
base_filename (str, optional)
|
||||
@@ -84,8 +79,9 @@ def create_instructions_files(
|
||||
for i in range(1, level + 1):
|
||||
content = generate_content(i, task_ids, base_filename, level)
|
||||
file_name = f"{base_filename}{i}.txt"
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
||||
write_to_file(file_path, content, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, file_name)
|
||||
with open(file_path, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
def generate_content(
|
||||
|
||||
@@ -4,28 +4,30 @@ from typing import Dict
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.file_operations import read_file, write_to_file
|
||||
from autogpt.commands.file_operations import read_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
||||
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||
|
||||
LEVEL_CURRENTLY_BEATEN = 1
|
||||
MAX_LEVEL = 5
|
||||
OUTPUT_LOCATION = "output.txt"
|
||||
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_memory_challenge_d(
|
||||
memory_management_agent: Agent,
|
||||
patched_api_requestor: MockerFixture,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
The agent is given a series of events and must remember the respective beliefs of the characters.
|
||||
Args:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
user_selected_level (int)
|
||||
"""
|
||||
sally_anne_test_phrases = [
|
||||
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
|
||||
"Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
|
||||
]
|
||||
level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
|
||||
create_instructions_files(
|
||||
memory_management_agent, level_to_run, level_sally_anne_test_phrases
|
||||
create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
|
||||
run_challenge(
|
||||
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||
)
|
||||
run_interaction_loop(
|
||||
monkeypatch,
|
||||
memory_management_agent,
|
||||
level_to_run + 2,
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
)
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
||||
|
||||
content = read_file(file_path, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||
|
||||
content = read_file(file_path, workspace)
|
||||
check_beliefs(content, level_to_run)
|
||||
|
||||
|
||||
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
|
||||
|
||||
|
||||
def create_instructions_files(
|
||||
memory_management_agent: Agent,
|
||||
workspace: Workspace,
|
||||
level: int,
|
||||
test_phrases: list,
|
||||
base_filename: str = "instructions_",
|
||||
@@ -185,15 +182,16 @@ def create_instructions_files(
|
||||
Creates a series of instructions files for the memory challenge.
|
||||
Args:
|
||||
level:
|
||||
memory_management_agent (Agent)
|
||||
workspace (Workspace)
|
||||
test_phrases (list)
|
||||
base_filename (str, optional)
|
||||
"""
|
||||
for i in range(1, level + 1):
|
||||
content = generate_content(i, test_phrases, base_filename, level)
|
||||
file_name = f"{base_filename}{i}.txt"
|
||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
||||
write_to_file(file_path, content, memory_management_agent)
|
||||
file_path = get_workspace_path(workspace, file_name)
|
||||
with open(file_path, "w") as file:
|
||||
file.write(content)
|
||||
|
||||
|
||||
def generate_content(
|
||||
|
||||
@@ -6,9 +6,10 @@ from typing import Any, Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.log_cycle.log_cycle import LogCycleHandler
|
||||
from autogpt.workspace import Workspace
|
||||
from benchmarks import run_task
|
||||
from tests.challenges.schema import Task
|
||||
|
||||
|
||||
def generate_noise(noise_size: int) -> str:
|
||||
@@ -40,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
|
||||
monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
|
||||
|
||||
|
||||
def run_interaction_loop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
agent: Agent,
|
||||
cycle_count: int,
|
||||
challenge_name: str,
|
||||
level_to_run: int,
|
||||
) -> None:
|
||||
setup_mock_input(monkeypatch, cycle_count)
|
||||
|
||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
||||
with contextlib.suppress(SystemExit):
|
||||
agent.start_interaction_loop()
|
||||
|
||||
|
||||
def setup_mock_log_cycle_agent_name(
|
||||
monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
|
||||
) -> None:
|
||||
@@ -69,13 +56,23 @@ def get_workspace_path(workspace: Workspace, file_name: str) -> str:
|
||||
return str(workspace.get_path(file_name))
|
||||
|
||||
|
||||
def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
|
||||
return str(agent.workspace.get_path(file_name))
|
||||
|
||||
|
||||
def copy_file_into_workspace(
|
||||
agent: Agent, directory_path: Path, file_path: str
|
||||
workspace: Workspace, directory_path: Path, file_path: str
|
||||
) -> None:
|
||||
workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
|
||||
workspace_code_file_path = get_workspace_path(workspace, file_path)
|
||||
code_file_path = directory_path / file_path
|
||||
shutil.copy(code_file_path, workspace_code_file_path)
|
||||
|
||||
|
||||
def run_challenge(
|
||||
challenge_name: str,
|
||||
level_to_run: int,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
user_input: str,
|
||||
cycle_count: int,
|
||||
) -> None:
|
||||
setup_mock_input(monkeypatch, cycle_count)
|
||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
||||
task = Task(user_input=user_input)
|
||||
with contextlib.suppress(SystemExit):
|
||||
run_task(task)
|
||||
|
||||
@@ -2,259 +2,46 @@ import pytest
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.config import AIConfig, Config
|
||||
from autogpt.main import COMMAND_CATEGORIES
|
||||
from autogpt.memory.vector import NoMemory, get_memory
|
||||
from autogpt.memory.vector import get_memory
|
||||
from autogpt.models.command_registry import CommandRegistry
|
||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||
from autogpt.workspace import Workspace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agent_test_config(config: Config):
|
||||
config.set_continuous_mode(False)
|
||||
config.set_temperature(0)
|
||||
config.plain_output = True
|
||||
return config
|
||||
def memory_json_file(config: Config):
|
||||
was_memory_backend = config.memory_backend
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_json_file(agent_test_config: Config):
|
||||
was_memory_backend = agent_test_config.memory_backend
|
||||
|
||||
agent_test_config.set_memory_backend("json_file")
|
||||
memory = get_memory(agent_test_config)
|
||||
config.set_memory_backend("json_file")
|
||||
memory = get_memory(config)
|
||||
memory.clear()
|
||||
yield memory
|
||||
|
||||
agent_test_config.set_memory_backend(was_memory_backend)
|
||||
config.set_memory_backend(was_memory_backend)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
|
||||
def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
|
||||
command_registry = CommandRegistry()
|
||||
command_registry.import_commands("autogpt.commands.file_operations")
|
||||
command_registry.import_commands("autogpt.commands.web_selenium")
|
||||
command_registry.import_commands("autogpt.app")
|
||||
command_registry.import_commands("autogpt.commands.task_statuses")
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="browse_website-GPT",
|
||||
ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.",
|
||||
ai_name="Dummy Agent",
|
||||
ai_role="Dummy Role",
|
||||
ai_goals=[
|
||||
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'",
|
||||
'Write the price of the book to a file named "browse_website.txt".',
|
||||
"Use the task_complete command to complete the task.",
|
||||
"Do not use any other commands.",
|
||||
"Dummy Task",
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
|
||||
agent = Agent(
|
||||
ai_name="",
|
||||
memory=memory_none,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
|
||||
command_registry = get_command_registry(agent_test_config)
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="Follow-Instructions-GPT",
|
||||
ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
|
||||
ai_goals=[
|
||||
"Use the command read_file to read the instructions_1.txt file",
|
||||
"Follow the instructions in the instructions_1.txt file",
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
|
||||
agent = Agent(
|
||||
ai_name="Follow-Instructions-GPT",
|
||||
ai_name="Dummy Agent",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
config=config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
system_prompt="dummy_prompt",
|
||||
triggering_prompt="dummy triggering prompt",
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def information_retrieval_agents(
|
||||
agent_test_config, memory_json_file, workspace: Workspace
|
||||
):
|
||||
agents = []
|
||||
command_registry = get_command_registry(agent_test_config)
|
||||
|
||||
ai_goals = [
|
||||
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
|
||||
"Write to a file called output.txt containing tesla's revenue in 2022.",
|
||||
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
|
||||
]
|
||||
for ai_goal in ai_goals:
|
||||
ai_config = AIConfig(
|
||||
ai_name="Information Retrieval Agent",
|
||||
ai_role="an autonomous agent that specializes in retrieving information.",
|
||||
ai_goals=[ai_goal],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
agent_test_config.set_continuous_mode(False)
|
||||
agents.append(
|
||||
Agent(
|
||||
ai_name="Information Retrieval Agent",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
)
|
||||
return agents
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kubernetes_agent(
|
||||
agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
|
||||
) -> Agent:
|
||||
command_registry = CommandRegistry()
|
||||
command_registry.import_commands("autogpt.commands.file_operations")
|
||||
command_registry.import_commands("autogpt.app")
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="Kubernetes",
|
||||
ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
|
||||
ai_goals=[
|
||||
"Write a simple kubernetes deployment file and save it as a kube.yaml.",
|
||||
# You should make a simple nginx web server that uses docker and exposes the port 80.
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
agent_test_config.set_continuous_mode(False)
|
||||
agent = Agent(
|
||||
ai_name="Kubernetes-Demo",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
|
||||
command_registry = CommandRegistry()
|
||||
command_registry.import_commands("autogpt.commands.file_operations")
|
||||
command_registry.import_commands("autogpt.app")
|
||||
command_registry.import_commands("autogpt.commands.web_selenium")
|
||||
|
||||
ai_config = AIConfig(
|
||||
ai_name="Get-PhysicsNobelPrize",
|
||||
ai_role="An autonomous agent that specializes in physics history.",
|
||||
ai_goals=[
|
||||
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
|
||||
],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
agent_test_config.set_continuous_mode(False)
|
||||
|
||||
agent = Agent(
|
||||
ai_name="Get-PhysicsNobelPrize",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
|
||||
agents = []
|
||||
goals = [
|
||||
[
|
||||
"1- Run test.py using the execute_python_file command.",
|
||||
"2- Read code.py using the read_file command.",
|
||||
"3- Modify code.py using the write_to_file command."
|
||||
"Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
|
||||
],
|
||||
[
|
||||
"1- Run test.py.",
|
||||
"2- Read code.py.",
|
||||
"3- Modify code.py."
|
||||
"Repeat step 1, 2 and 3 until test.py runs without errors.",
|
||||
],
|
||||
["1- Make test.py run without errors."],
|
||||
]
|
||||
|
||||
for goal in goals:
|
||||
ai_config = AIConfig(
|
||||
ai_name="Debug Code Agent",
|
||||
ai_role="an autonomous agent that specializes in debugging python code",
|
||||
ai_goals=goal,
|
||||
)
|
||||
command_registry = get_command_registry(agent_test_config)
|
||||
ai_config.command_registry = command_registry
|
||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
||||
agent_test_config.set_continuous_mode(False)
|
||||
agents.append(
|
||||
Agent(
|
||||
ai_name="Debug Code Agent",
|
||||
memory=memory_json_file,
|
||||
command_registry=command_registry,
|
||||
ai_config=ai_config,
|
||||
config=agent_test_config,
|
||||
next_action_count=0,
|
||||
system_prompt=system_prompt,
|
||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||
workspace_directory=workspace.root,
|
||||
)
|
||||
)
|
||||
return agents
|
||||
|
||||
|
||||
def get_command_registry(agent_test_config):
|
||||
command_registry = CommandRegistry()
|
||||
enabled_command_categories = [
|
||||
x
|
||||
for x in COMMAND_CATEGORIES
|
||||
if x not in agent_test_config.disabled_command_categories
|
||||
]
|
||||
for command_category in enabled_command_categories:
|
||||
command_registry.import_commands(command_category)
|
||||
return command_registry
|
||||
|
||||
Reference in New Issue
Block a user