Link all challenges to benchmark python hook (#4786)

This commit is contained in:
merwanehamadi
2023-06-24 06:20:58 -07:00
committed by GitHub
parent 307f6e50ad
commit cfdb24efac
13 changed files with 218 additions and 402 deletions

View File

@@ -1,9 +1,10 @@
from autogpt.agent import Agent
from autogpt.config import AIConfig, Config
from autogpt.main import COMMAND_CATEGORIES
from autogpt.memory.vector import get_memory
from autogpt.models.command_registry import CommandRegistry
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace
from tests.integration.agent_factory import get_command_registry
def run_task(task) -> None:
@@ -39,3 +40,13 @@ def bootstrap_agent(task):
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=str(workspace_directory_path),
)
def get_command_registry(config):
command_registry = CommandRegistry()
enabled_command_categories = [
x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
]
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
return command_registry

View File

@@ -1,25 +1,34 @@
import pytest
from autogpt.agent import Agent
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import run_interaction_loop
from tests.challenges.utils import run_challenge
CYCLE_COUNT = 2
USER_INPUTS = [
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
]
@challenge()
def test_browse_website(
browser_agent: Agent,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
file_path = browser_agent.workspace.get_path("browse_website.txt")
run_interaction_loop(
monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
)
# content = read_file(file_path, config)
content = open(file_path, encoding="utf-8").read()
file_path = workspace.get_path("browse_website.txt")
with open(file_path, "r") as file:
content = file.read()
assert "£25.89" in content, f"Expected £25.89, got {content}"

View File

@@ -1,14 +1,8 @@
import pytest
from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.schema import Task
from tests.challenges.utils import (
get_workspace_path,
setup_mock_input,
setup_mock_log_cycle_agent_name,
)
from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT_PER_LEVEL = [1, 1]
EXPECTED_OUTPUTS_PER_LEVEL = [
@@ -23,18 +17,20 @@ USER_INPUTS = [
@challenge()
def test_write_file(
workspace: Workspace,
patched_api_requestor: None,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
user_input = USER_INPUTS[level_to_run - 1]
setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
task = Task(user_input=user_input)
run_task(task)
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
)
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]

View File

@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.execute_code import execute_python_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
copy_file_into_workspace,
get_workspace_path_from_agent,
run_interaction_loop,
get_workspace_path,
run_challenge,
)
CYCLE_COUNT = 5
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
DIRECTORY_PATH = Path(__file__).parent / "data"
CODE_FILE_PATH = "code.py"
TEST_FILE_PATH = "test.py"
USER_INPUTS = [
"1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
"1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
"Make test.py run without errors.",
]
@challenge()
def test_debug_code_challenge_a(
debug_code_agents: Agent,
dummy_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
Test whether the agent can debug a simple code snippet.
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
:patched_api_requestor: Sends api requests to our API CI pipeline
:level_to_run: The level to run.
"""
debug_code_agent = debug_code_agents[level_to_run - 1]
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
run_interaction_loop(
monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
)
output = execute_python_file(
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
debug_code_agent,
get_workspace_path(workspace, TEST_FILE_PATH),
dummy_agent,
)
assert "error" not in output.lower(), f"Errors found in output: {output}!"

View File

@@ -1,24 +1,30 @@
import pytest
from pytest_mock import MockerFixture
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
from autogpt.agent import Agent
OUTPUT_LOCATION = "output.txt"
USER_INPUTS = [
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
"Write to a file called output.txt containing tesla's revenue in 2022.",
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
]
@challenge()
def test_information_retrieval_challenge_a(
information_retrieval_agents: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -26,19 +32,17 @@ def test_information_retrieval_challenge_a(
:param get_company_revenue_agent: The agent to test.
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
"""
information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
run_interaction_loop(
monkeypatch,
information_retrieval_agent,
CYCLE_COUNT,
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
)
file_path = get_workspace_path_from_agent(
information_retrieval_agent, OUTPUT_LOCATION
)
content = read_file(file_path, information_retrieval_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content = file.read()
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
for revenue in expected_revenues:
assert (

View File

@@ -1,24 +1,25 @@
import contextlib
import pytest
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
USER_INPUTS = [
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
]
@challenge()
def test_information_retrieval_challenge_b(
get_nobel_prize_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
:param level_to_run: The level to run.
"""
with contextlib.suppress(SystemExit):
run_interaction_loop(
monkeypatch,
get_nobel_prize_agent,
CYCLE_COUNT,
challenge_name,
level_to_run,
)
file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
)
content = read_file(file_path, get_nobel_prize_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content = file.read()
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
assert (
"Konstantin Novoselov" in content

View File

@@ -1,23 +1,26 @@
from typing import Any, Dict
import pytest
import yaml
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
from tests.challenges.utils import get_workspace_path, run_challenge
CYCLE_COUNT = 3
OUTPUT_LOCATION = "kube.yaml"
USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
@challenge()
def test_kubernetes_template_challenge_a(
kubernetes_agent: Agent,
monkeypatch: pytest.MonkeyPatch,
patched_api_requestor: MockerFixture,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
Test the challenge_a function in a given agent by mocking user inputs
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
"""
run_interaction_loop(
monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
run_challenge(
challenge_name,
level_to_run,
monkeypatch,
USER_INPUTS[level_to_run - 1],
CYCLE_COUNT,
)
file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
content = read_file(file_path, kubernetes_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content_string = file.read()
for word in ["apiVersion", "kind", "metadata", "spec"]:
assert word in content, f"Expected the file to contain {word}"
assert word in content_string, f"Expected the file to contain {word}"
content = yaml.safe_load(content)
yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
for word in ["Service", "Deployment", "Pod"]:
assert word in content["kind"], f"Expected the file to contain {word}"
assert word in yaml_as_dict.get(
"kind", ""
), f"Expected the file to contain {word}"

View File

@@ -1,49 +1,48 @@
import pytest
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
from tests.challenges.utils import get_workspace_path, run_challenge
OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
def test_memory_challenge_a(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
The agent reads a file containing a task_id. Then, it reads a series of other files.
After reading 'n' files, the agent must write the task_id into a new file.
Args:
memory_management_agent (Agent)
workspace (Workspace)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
"""
task_id = "2314"
create_instructions_files(memory_management_agent, level_to_run, task_id)
create_instructions_files(workspace, level_to_run, task_id)
run_interaction_loop(
monkeypatch,
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
run_challenge(
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content = file.read()
assert task_id in content, f"Expected the file to contain {task_id}"
def create_instructions_files(
memory_management_agent: Agent,
workspace: Workspace,
num_files: int,
task_id: str,
base_filename: str = "instructions_",
@@ -51,7 +50,7 @@ def create_instructions_files(
"""
Creates a series of instructions files for the memory challenge.
Args:
memory_management_agent (Agent)
workspace (Workspace)
num_files (int)
task_id (str)
base_filename (str, optional)
@@ -59,8 +58,9 @@ def create_instructions_files(
for i in range(1, num_files + 1):
content = generate_content(i, task_id, base_filename, num_files)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)
file_path = get_workspace_path(workspace, file_name)
with open(file_path, "w") as file:
file.write(content)
def generate_content(

View File

@@ -1,56 +1,50 @@
import pytest
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
generate_noise,
get_workspace_path_from_agent,
run_interaction_loop,
)
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
NOISE = 1000
OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
def test_memory_challenge_b(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
the agent must write all the task_ids into a new file, filtering out the noise.
Args:
memory_management_agent (Agent)
workspace (Workspace)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
"""
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
create_instructions_files(memory_management_agent, level_to_run, task_ids)
create_instructions_files(workspace, level_to_run, task_ids)
run_interaction_loop(
monkeypatch,
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
run_challenge(
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
with open(file_path, "r") as file:
content = file.read()
for task_id in task_ids:
assert task_id in content, f"Expected the file to contain {task_id}"
def create_instructions_files(
memory_management_agent: Agent,
workspace: Workspace,
level: int,
task_ids: list,
base_filename: str = "instructions_",
@@ -59,7 +53,7 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge.
Args:
level:
memory_management_agent (Agent)
workspace (Workspace)
num_files (int)
task_ids (list)
base_filename (str, optional)
@@ -67,9 +61,10 @@ def create_instructions_files(
for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
file_path = get_workspace_path(workspace, file_name)
write_to_file(file_path, content, memory_management_agent)
with open(file_path, "w") as file:
file.write(content)
def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:

View File

@@ -1,26 +1,24 @@
import pytest
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import (
generate_noise,
get_workspace_path_from_agent,
run_interaction_loop,
)
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
NOISE = 1200
OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
def test_memory_challenge_c(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
after seeing several of them.
Args:
memory_management_agent (Agent)
workspace (Workspace)
patched_api_requestor (MockerFixture)
monkeypatch (pytest.MonkeyPatch)
level_to_run (int)
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
level_silly_phrases = silly_phrases[:level_to_run]
create_instructions_files(
memory_management_agent,
workspace,
level_to_run,
level_silly_phrases,
)
run_interaction_loop(
monkeypatch,
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
run_challenge(
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, agent=memory_management_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, agent=workspace)
for phrase in level_silly_phrases:
assert phrase in content, f"Expected the file to contain {phrase}"
def create_instructions_files(
memory_management_agent: Agent,
workspace: Workspace,
level: int,
task_ids: list,
base_filename: str = "instructions_",
@@ -76,7 +71,7 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge.
Args:
level:
memory_management_agent (Agent)
workspace (Workspace)
num_files (int)
task_ids (list)
base_filename (str, optional)
@@ -84,8 +79,9 @@ def create_instructions_files(
for i in range(1, level + 1):
content = generate_content(i, task_ids, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)
file_path = get_workspace_path(workspace, file_name)
with open(file_path, "w") as file:
file.write(content)
def generate_content(

View File

@@ -4,28 +4,30 @@ from typing import Dict
import pytest
from pytest_mock import MockerFixture
from autogpt.agent import Agent
from autogpt.commands.file_operations import read_file, write_to_file
from autogpt.commands.file_operations import read_file
from autogpt.workspace import Workspace
from tests.challenges.challenge_decorator.challenge_decorator import challenge
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
from tests.challenges.utils import get_workspace_path, run_challenge
LEVEL_CURRENTLY_BEATEN = 1
MAX_LEVEL = 5
OUTPUT_LOCATION = "output.txt"
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
@challenge()
def test_memory_challenge_d(
memory_management_agent: Agent,
patched_api_requestor: MockerFixture,
monkeypatch: pytest.MonkeyPatch,
level_to_run: int,
challenge_name: str,
workspace: Workspace,
patched_make_workspace: pytest.fixture,
) -> None:
"""
The agent is given a series of events and must remember the respective beliefs of the characters.
Args:
memory_management_agent (Agent)
workspace (Workspace)
user_selected_level (int)
"""
sally_anne_test_phrases = [
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
"Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
]
level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
create_instructions_files(
memory_management_agent, level_to_run, level_sally_anne_test_phrases
create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
run_challenge(
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
)
run_interaction_loop(
monkeypatch,
memory_management_agent,
level_to_run + 2,
challenge_name,
level_to_run,
)
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
content = read_file(file_path, memory_management_agent)
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
content = read_file(file_path, workspace)
check_beliefs(content, level_to_run)
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
def create_instructions_files(
memory_management_agent: Agent,
workspace: Workspace,
level: int,
test_phrases: list,
base_filename: str = "instructions_",
@@ -185,15 +182,16 @@ def create_instructions_files(
Creates a series of instructions files for the memory challenge.
Args:
level:
memory_management_agent (Agent)
workspace (Workspace)
test_phrases (list)
base_filename (str, optional)
"""
for i in range(1, level + 1):
content = generate_content(i, test_phrases, base_filename, level)
file_name = f"{base_filename}{i}.txt"
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
write_to_file(file_path, content, memory_management_agent)
file_path = get_workspace_path(workspace, file_name)
with open(file_path, "w") as file:
file.write(content)
def generate_content(

View File

@@ -6,9 +6,10 @@ from typing import Any, Generator
import pytest
from autogpt.agent import Agent
from autogpt.log_cycle.log_cycle import LogCycleHandler
from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.schema import Task
def generate_noise(noise_size: int) -> str:
@@ -40,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
def run_interaction_loop(
monkeypatch: pytest.MonkeyPatch,
agent: Agent,
cycle_count: int,
challenge_name: str,
level_to_run: int,
) -> None:
setup_mock_input(monkeypatch, cycle_count)
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
with contextlib.suppress(SystemExit):
agent.start_interaction_loop()
def setup_mock_log_cycle_agent_name(
monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
) -> None:
@@ -69,13 +56,23 @@ def get_workspace_path(workspace: Workspace, file_name: str) -> str:
return str(workspace.get_path(file_name))
def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
return str(agent.workspace.get_path(file_name))
def copy_file_into_workspace(
agent: Agent, directory_path: Path, file_path: str
workspace: Workspace, directory_path: Path, file_path: str
) -> None:
workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
workspace_code_file_path = get_workspace_path(workspace, file_path)
code_file_path = directory_path / file_path
shutil.copy(code_file_path, workspace_code_file_path)
def run_challenge(
challenge_name: str,
level_to_run: int,
monkeypatch: pytest.MonkeyPatch,
user_input: str,
cycle_count: int,
) -> None:
setup_mock_input(monkeypatch, cycle_count)
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
task = Task(user_input=user_input)
with contextlib.suppress(SystemExit):
run_task(task)

View File

@@ -2,259 +2,46 @@ import pytest
from autogpt.agent import Agent
from autogpt.config import AIConfig, Config
from autogpt.main import COMMAND_CATEGORIES
from autogpt.memory.vector import NoMemory, get_memory
from autogpt.memory.vector import get_memory
from autogpt.models.command_registry import CommandRegistry
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
from autogpt.workspace import Workspace
@pytest.fixture
def agent_test_config(config: Config):
config.set_continuous_mode(False)
config.set_temperature(0)
config.plain_output = True
return config
def memory_json_file(config: Config):
was_memory_backend = config.memory_backend
@pytest.fixture
def memory_json_file(agent_test_config: Config):
was_memory_backend = agent_test_config.memory_backend
agent_test_config.set_memory_backend("json_file")
memory = get_memory(agent_test_config)
config.set_memory_backend("json_file")
memory = get_memory(config)
memory.clear()
yield memory
agent_test_config.set_memory_backend(was_memory_backend)
config.set_memory_backend(was_memory_backend)
@pytest.fixture
def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.commands.web_selenium")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.task_statuses")
ai_config = AIConfig(
ai_name="browse_website-GPT",
ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.",
ai_name="Dummy Agent",
ai_role="Dummy Role",
ai_goals=[
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'",
'Write the price of the book to a file named "browse_website.txt".',
"Use the task_complete command to complete the task.",
"Do not use any other commands.",
"Dummy Task",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent = Agent(
ai_name="",
memory=memory_none,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = get_command_registry(agent_test_config)
ai_config = AIConfig(
ai_name="Follow-Instructions-GPT",
ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
ai_goals=[
"Use the command read_file to read the instructions_1.txt file",
"Follow the instructions in the instructions_1.txt file",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent = Agent(
ai_name="Follow-Instructions-GPT",
ai_name="Dummy Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
config=config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
system_prompt="dummy_prompt",
triggering_prompt="dummy triggering prompt",
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def information_retrieval_agents(
agent_test_config, memory_json_file, workspace: Workspace
):
agents = []
command_registry = get_command_registry(agent_test_config)
ai_goals = [
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
"Write to a file called output.txt containing tesla's revenue in 2022.",
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
]
for ai_goal in ai_goals:
ai_config = AIConfig(
ai_name="Information Retrieval Agent",
ai_role="an autonomous agent that specializes in retrieving information.",
ai_goals=[ai_goal],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agents.append(
Agent(
ai_name="Information Retrieval Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
@pytest.fixture
def kubernetes_agent(
agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
) -> Agent:
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.app")
ai_config = AIConfig(
ai_name="Kubernetes",
ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
ai_goals=[
"Write a simple kubernetes deployment file and save it as a kube.yaml.",
# You should make a simple nginx web server that uses docker and exposes the port 80.
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agent = Agent(
ai_name="Kubernetes-Demo",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
command_registry = CommandRegistry()
command_registry.import_commands("autogpt.commands.file_operations")
command_registry.import_commands("autogpt.app")
command_registry.import_commands("autogpt.commands.web_selenium")
ai_config = AIConfig(
ai_name="Get-PhysicsNobelPrize",
ai_role="An autonomous agent that specializes in physics history.",
ai_goals=[
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
],
)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agent = Agent(
ai_name="Get-PhysicsNobelPrize",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
return agent
@pytest.fixture
def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
agents = []
goals = [
[
"1- Run test.py using the execute_python_file command.",
"2- Read code.py using the read_file command.",
"3- Modify code.py using the write_to_file command."
"Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
],
[
"1- Run test.py.",
"2- Read code.py.",
"3- Modify code.py."
"Repeat step 1, 2 and 3 until test.py runs without errors.",
],
["1- Make test.py run without errors."],
]
for goal in goals:
ai_config = AIConfig(
ai_name="Debug Code Agent",
ai_role="an autonomous agent that specializes in debugging python code",
ai_goals=goal,
)
command_registry = get_command_registry(agent_test_config)
ai_config.command_registry = command_registry
system_prompt = ai_config.construct_full_prompt(agent_test_config)
agent_test_config.set_continuous_mode(False)
agents.append(
Agent(
ai_name="Debug Code Agent",
memory=memory_json_file,
command_registry=command_registry,
ai_config=ai_config,
config=agent_test_config,
next_action_count=0,
system_prompt=system_prompt,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
workspace_directory=workspace.root,
)
)
return agents
def get_command_registry(agent_test_config):
command_registry = CommandRegistry()
enabled_command_categories = [
x
for x in COMMAND_CATEGORIES
if x not in agent_test_config.disabled_command_categories
]
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
return command_registry