mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-22 08:24:26 +01:00
Link all challenges to benchmark python hook (#4786)
This commit is contained in:
@@ -1,9 +1,10 @@
|
|||||||
from autogpt.agent import Agent
|
from autogpt.agent import Agent
|
||||||
from autogpt.config import AIConfig, Config
|
from autogpt.config import AIConfig, Config
|
||||||
|
from autogpt.main import COMMAND_CATEGORIES
|
||||||
from autogpt.memory.vector import get_memory
|
from autogpt.memory.vector import get_memory
|
||||||
|
from autogpt.models.command_registry import CommandRegistry
|
||||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
||||||
from autogpt.workspace import Workspace
|
from autogpt.workspace import Workspace
|
||||||
from tests.integration.agent_factory import get_command_registry
|
|
||||||
|
|
||||||
|
|
||||||
def run_task(task) -> None:
|
def run_task(task) -> None:
|
||||||
@@ -39,3 +40,13 @@ def bootstrap_agent(task):
|
|||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
||||||
workspace_directory=str(workspace_directory_path),
|
workspace_directory=str(workspace_directory_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_command_registry(config):
|
||||||
|
command_registry = CommandRegistry()
|
||||||
|
enabled_command_categories = [
|
||||||
|
x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
|
||||||
|
]
|
||||||
|
for command_category in enabled_command_categories:
|
||||||
|
command_registry.import_commands(command_category)
|
||||||
|
return command_registry
|
||||||
|
|||||||
@@ -1,25 +1,34 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.workspace import Workspace
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import run_interaction_loop
|
from tests.challenges.utils import run_challenge
|
||||||
|
|
||||||
CYCLE_COUNT = 2
|
CYCLE_COUNT = 2
|
||||||
|
USER_INPUTS = [
|
||||||
|
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'\nWrite the price of the book to a file named 'browse_website.txt'.'\nUse the task_complete command to complete the task.\nDo not use any other commands."
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_browse_website(
|
def test_browse_website(
|
||||||
browser_agent: Agent,
|
|
||||||
patched_api_requestor: None,
|
patched_api_requestor: None,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
file_path = browser_agent.workspace.get_path("browse_website.txt")
|
run_challenge(
|
||||||
run_interaction_loop(
|
challenge_name,
|
||||||
monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
|
level_to_run,
|
||||||
|
monkeypatch,
|
||||||
|
USER_INPUTS[level_to_run - 1],
|
||||||
|
CYCLE_COUNT,
|
||||||
)
|
)
|
||||||
|
|
||||||
# content = read_file(file_path, config)
|
file_path = workspace.get_path("browse_website.txt")
|
||||||
content = open(file_path, encoding="utf-8").read()
|
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
content = file.read()
|
||||||
assert "£25.89" in content, f"Expected £25.89, got {content}"
|
assert "£25.89" in content, f"Expected £25.89, got {content}"
|
||||||
|
|||||||
@@ -1,14 +1,8 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from autogpt.workspace import Workspace
|
from autogpt.workspace import Workspace
|
||||||
from benchmarks import run_task
|
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.schema import Task
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
from tests.challenges.utils import (
|
|
||||||
get_workspace_path,
|
|
||||||
setup_mock_input,
|
|
||||||
setup_mock_log_cycle_agent_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
CYCLE_COUNT_PER_LEVEL = [1, 1]
|
CYCLE_COUNT_PER_LEVEL = [1, 1]
|
||||||
EXPECTED_OUTPUTS_PER_LEVEL = [
|
EXPECTED_OUTPUTS_PER_LEVEL = [
|
||||||
@@ -23,18 +17,20 @@ USER_INPUTS = [
|
|||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_write_file(
|
def test_write_file(
|
||||||
workspace: Workspace,
|
|
||||||
patched_api_requestor: None,
|
patched_api_requestor: None,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
patched_make_workspace: pytest.fixture,
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
user_input = USER_INPUTS[level_to_run - 1]
|
run_challenge(
|
||||||
setup_mock_input(monkeypatch, CYCLE_COUNT_PER_LEVEL[level_to_run - 1])
|
challenge_name,
|
||||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
level_to_run,
|
||||||
task = Task(user_input=user_input)
|
monkeypatch,
|
||||||
run_task(task)
|
USER_INPUTS[level_to_run - 1],
|
||||||
|
CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
|
||||||
|
)
|
||||||
|
|
||||||
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
|
expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
|
||||||
|
|
||||||
|
|||||||
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
|
|||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.agent import Agent
|
||||||
from autogpt.commands.execute_code import execute_python_file
|
from autogpt.commands.execute_code import execute_python_file
|
||||||
|
from autogpt.workspace import Workspace
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import (
|
from tests.challenges.utils import (
|
||||||
copy_file_into_workspace,
|
copy_file_into_workspace,
|
||||||
get_workspace_path_from_agent,
|
get_workspace_path,
|
||||||
run_interaction_loop,
|
run_challenge,
|
||||||
)
|
)
|
||||||
|
|
||||||
CYCLE_COUNT = 5
|
CYCLE_COUNT = 5
|
||||||
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
|
|||||||
DIRECTORY_PATH = Path(__file__).parent / "data"
|
DIRECTORY_PATH = Path(__file__).parent / "data"
|
||||||
CODE_FILE_PATH = "code.py"
|
CODE_FILE_PATH = "code.py"
|
||||||
TEST_FILE_PATH = "test.py"
|
TEST_FILE_PATH = "test.py"
|
||||||
|
USER_INPUTS = [
|
||||||
|
"1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
|
||||||
|
"1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
||||||
|
"Make test.py run without errors.",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_debug_code_challenge_a(
|
def test_debug_code_challenge_a(
|
||||||
debug_code_agents: Agent,
|
dummy_agent: Agent,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Test whether the agent can debug a simple code snippet.
|
Test whether the agent can debug a simple code snippet.
|
||||||
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
|
|||||||
:patched_api_requestor: Sends api requests to our API CI pipeline
|
:patched_api_requestor: Sends api requests to our API CI pipeline
|
||||||
:level_to_run: The level to run.
|
:level_to_run: The level to run.
|
||||||
"""
|
"""
|
||||||
debug_code_agent = debug_code_agents[level_to_run - 1]
|
|
||||||
|
|
||||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
|
copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
|
||||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
|
copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
|
||||||
|
|
||||||
run_interaction_loop(
|
run_challenge(
|
||||||
monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
|
challenge_name,
|
||||||
|
level_to_run,
|
||||||
|
monkeypatch,
|
||||||
|
USER_INPUTS[level_to_run - 1],
|
||||||
|
CYCLE_COUNT,
|
||||||
)
|
)
|
||||||
|
|
||||||
output = execute_python_file(
|
output = execute_python_file(
|
||||||
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
|
get_workspace_path(workspace, TEST_FILE_PATH),
|
||||||
debug_code_agent,
|
dummy_agent,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert "error" not in output.lower(), f"Errors found in output: {output}!"
|
assert "error" not in output.lower(), f"Errors found in output: {output}!"
|
||||||
|
|||||||
@@ -1,24 +1,30 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.commands.file_operations import read_file
|
from autogpt.workspace import Workspace
|
||||||
|
from tests.challenges.basic_abilities.test_browse_website import USER_INPUTS
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
|
|
||||||
CYCLE_COUNT = 3
|
CYCLE_COUNT = 3
|
||||||
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
|
EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
|
||||||
from autogpt.agent import Agent
|
|
||||||
|
|
||||||
OUTPUT_LOCATION = "output.txt"
|
OUTPUT_LOCATION = "output.txt"
|
||||||
|
USER_INPUTS = [
|
||||||
|
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
|
||||||
|
"Write to a file called output.txt containing tesla's revenue in 2022.",
|
||||||
|
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_information_retrieval_challenge_a(
|
def test_information_retrieval_challenge_a(
|
||||||
information_retrieval_agents: Agent,
|
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
|
Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
|
||||||
@@ -26,19 +32,17 @@ def test_information_retrieval_challenge_a(
|
|||||||
:param get_company_revenue_agent: The agent to test.
|
:param get_company_revenue_agent: The agent to test.
|
||||||
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
|
:param monkeypatch: pytest's monkeypatch utility for modifying builtins.
|
||||||
"""
|
"""
|
||||||
information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
|
run_challenge(
|
||||||
run_interaction_loop(
|
|
||||||
monkeypatch,
|
|
||||||
information_retrieval_agent,
|
|
||||||
CYCLE_COUNT,
|
|
||||||
challenge_name,
|
challenge_name,
|
||||||
level_to_run,
|
level_to_run,
|
||||||
|
monkeypatch,
|
||||||
|
USER_INPUTS[level_to_run - 1],
|
||||||
|
CYCLE_COUNT,
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = get_workspace_path_from_agent(
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
information_retrieval_agent, OUTPUT_LOCATION
|
with open(file_path, "r") as file:
|
||||||
)
|
content = file.read()
|
||||||
content = read_file(file_path, information_retrieval_agent)
|
|
||||||
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
|
expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
|
||||||
for revenue in expected_revenues:
|
for revenue in expected_revenues:
|
||||||
assert (
|
assert (
|
||||||
|
|||||||
@@ -1,24 +1,25 @@
|
|||||||
import contextlib
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.workspace import Workspace
|
||||||
from autogpt.commands.file_operations import read_file
|
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
|
|
||||||
CYCLE_COUNT = 3
|
CYCLE_COUNT = 3
|
||||||
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
|
OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
|
||||||
|
USER_INPUTS = [
|
||||||
|
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt."
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_information_retrieval_challenge_b(
|
def test_information_retrieval_challenge_b(
|
||||||
get_nobel_prize_agent: Agent,
|
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
|
Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
|
||||||
@@ -29,17 +30,18 @@ def test_information_retrieval_challenge_b(
|
|||||||
:param level_to_run: The level to run.
|
:param level_to_run: The level to run.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with contextlib.suppress(SystemExit):
|
run_challenge(
|
||||||
run_interaction_loop(
|
challenge_name,
|
||||||
monkeypatch,
|
level_to_run,
|
||||||
get_nobel_prize_agent,
|
monkeypatch,
|
||||||
CYCLE_COUNT,
|
USER_INPUTS[level_to_run - 1],
|
||||||
challenge_name,
|
CYCLE_COUNT,
|
||||||
level_to_run,
|
)
|
||||||
)
|
|
||||||
file_path = get_workspace_path_from_agent(get_nobel_prize_agent, OUTPUT_LOCATION)
|
|
||||||
|
|
||||||
content = read_file(file_path, get_nobel_prize_agent)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
|
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
content = file.read()
|
||||||
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
|
assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
|
||||||
assert (
|
assert (
|
||||||
"Konstantin Novoselov" in content
|
"Konstantin Novoselov" in content
|
||||||
|
|||||||
@@ -1,23 +1,26 @@
|
|||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import yaml
|
import yaml
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.workspace import Workspace
|
||||||
from autogpt.commands.file_operations import read_file
|
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
|
|
||||||
CYCLE_COUNT = 3
|
CYCLE_COUNT = 3
|
||||||
OUTPUT_LOCATION = "kube.yaml"
|
OUTPUT_LOCATION = "kube.yaml"
|
||||||
|
USER_INPUTS = ["Write a simple kubernetes deployment file and save it as a kube.yaml."]
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_kubernetes_template_challenge_a(
|
def test_kubernetes_template_challenge_a(
|
||||||
kubernetes_agent: Agent,
|
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Test the challenge_a function in a given agent by mocking user inputs
|
Test the challenge_a function in a given agent by mocking user inputs
|
||||||
@@ -28,16 +31,23 @@ def test_kubernetes_template_challenge_a(
|
|||||||
monkeypatch (pytest.MonkeyPatch)
|
monkeypatch (pytest.MonkeyPatch)
|
||||||
level_to_run (int)
|
level_to_run (int)
|
||||||
"""
|
"""
|
||||||
run_interaction_loop(
|
run_challenge(
|
||||||
monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
|
challenge_name,
|
||||||
|
level_to_run,
|
||||||
|
monkeypatch,
|
||||||
|
USER_INPUTS[level_to_run - 1],
|
||||||
|
CYCLE_COUNT,
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = get_workspace_path_from_agent(kubernetes_agent, OUTPUT_LOCATION)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
content = read_file(file_path, kubernetes_agent)
|
with open(file_path, "r") as file:
|
||||||
|
content_string = file.read()
|
||||||
|
|
||||||
for word in ["apiVersion", "kind", "metadata", "spec"]:
|
for word in ["apiVersion", "kind", "metadata", "spec"]:
|
||||||
assert word in content, f"Expected the file to contain {word}"
|
assert word in content_string, f"Expected the file to contain {word}"
|
||||||
|
|
||||||
content = yaml.safe_load(content)
|
yaml_as_dict: Dict[str, Any] = yaml.safe_load(content_string)
|
||||||
for word in ["Service", "Deployment", "Pod"]:
|
for word in ["Service", "Deployment", "Pod"]:
|
||||||
assert word in content["kind"], f"Expected the file to contain {word}"
|
assert word in yaml_as_dict.get(
|
||||||
|
"kind", ""
|
||||||
|
), f"Expected the file to contain {word}"
|
||||||
|
|||||||
@@ -1,49 +1,48 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.workspace import Workspace
|
||||||
from autogpt.commands.file_operations import read_file, write_to_file
|
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
|
|
||||||
OUTPUT_LOCATION = "output.txt"
|
OUTPUT_LOCATION = "output.txt"
|
||||||
|
|
||||||
|
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_memory_challenge_a(
|
def test_memory_challenge_a(
|
||||||
memory_management_agent: Agent,
|
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
The agent reads a file containing a task_id. Then, it reads a series of other files.
|
The agent reads a file containing a task_id. Then, it reads a series of other files.
|
||||||
After reading 'n' files, the agent must write the task_id into a new file.
|
After reading 'n' files, the agent must write the task_id into a new file.
|
||||||
Args:
|
Args:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
patched_api_requestor (MockerFixture)
|
patched_api_requestor (MockerFixture)
|
||||||
monkeypatch (pytest.MonkeyPatch)
|
monkeypatch (pytest.MonkeyPatch)
|
||||||
level_to_run (int)
|
level_to_run (int)
|
||||||
"""
|
"""
|
||||||
task_id = "2314"
|
task_id = "2314"
|
||||||
create_instructions_files(memory_management_agent, level_to_run, task_id)
|
create_instructions_files(workspace, level_to_run, task_id)
|
||||||
|
|
||||||
run_interaction_loop(
|
run_challenge(
|
||||||
monkeypatch,
|
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||||
memory_management_agent,
|
|
||||||
level_to_run + 2,
|
|
||||||
challenge_name,
|
|
||||||
level_to_run,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
content = read_file(file_path, memory_management_agent)
|
with open(file_path, "r") as file:
|
||||||
|
content = file.read()
|
||||||
assert task_id in content, f"Expected the file to contain {task_id}"
|
assert task_id in content, f"Expected the file to contain {task_id}"
|
||||||
|
|
||||||
|
|
||||||
def create_instructions_files(
|
def create_instructions_files(
|
||||||
memory_management_agent: Agent,
|
workspace: Workspace,
|
||||||
num_files: int,
|
num_files: int,
|
||||||
task_id: str,
|
task_id: str,
|
||||||
base_filename: str = "instructions_",
|
base_filename: str = "instructions_",
|
||||||
@@ -51,7 +50,7 @@ def create_instructions_files(
|
|||||||
"""
|
"""
|
||||||
Creates a series of instructions files for the memory challenge.
|
Creates a series of instructions files for the memory challenge.
|
||||||
Args:
|
Args:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
num_files (int)
|
num_files (int)
|
||||||
task_id (str)
|
task_id (str)
|
||||||
base_filename (str, optional)
|
base_filename (str, optional)
|
||||||
@@ -59,8 +58,9 @@ def create_instructions_files(
|
|||||||
for i in range(1, num_files + 1):
|
for i in range(1, num_files + 1):
|
||||||
content = generate_content(i, task_id, base_filename, num_files)
|
content = generate_content(i, task_id, base_filename, num_files)
|
||||||
file_name = f"{base_filename}{i}.txt"
|
file_name = f"{base_filename}{i}.txt"
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
file_path = get_workspace_path(workspace, file_name)
|
||||||
write_to_file(file_path, content, memory_management_agent)
|
with open(file_path, "w") as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
|
||||||
def generate_content(
|
def generate_content(
|
||||||
|
|||||||
@@ -1,56 +1,50 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.workspace import Workspace
|
||||||
from autogpt.commands.file_operations import read_file, write_to_file
|
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import (
|
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
|
||||||
generate_noise,
|
|
||||||
get_workspace_path_from_agent,
|
|
||||||
run_interaction_loop,
|
|
||||||
)
|
|
||||||
|
|
||||||
NOISE = 1000
|
NOISE = 1000
|
||||||
OUTPUT_LOCATION = "output.txt"
|
OUTPUT_LOCATION = "output.txt"
|
||||||
|
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_memory_challenge_b(
|
def test_memory_challenge_b(
|
||||||
memory_management_agent: Agent,
|
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
|
The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
|
||||||
the agent must write all the task_ids into a new file, filtering out the noise.
|
the agent must write all the task_ids into a new file, filtering out the noise.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
patched_api_requestor (MockerFixture)
|
patched_api_requestor (MockerFixture)
|
||||||
monkeypatch (pytest.MonkeyPatch)
|
monkeypatch (pytest.MonkeyPatch)
|
||||||
level_to_run (int)
|
level_to_run (int)
|
||||||
"""
|
"""
|
||||||
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
|
task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
|
||||||
create_instructions_files(memory_management_agent, level_to_run, task_ids)
|
create_instructions_files(workspace, level_to_run, task_ids)
|
||||||
|
|
||||||
run_interaction_loop(
|
run_challenge(
|
||||||
monkeypatch,
|
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||||
memory_management_agent,
|
|
||||||
level_to_run + 2,
|
|
||||||
challenge_name,
|
|
||||||
level_to_run,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
content = read_file(file_path, memory_management_agent)
|
with open(file_path, "r") as file:
|
||||||
|
content = file.read()
|
||||||
for task_id in task_ids:
|
for task_id in task_ids:
|
||||||
assert task_id in content, f"Expected the file to contain {task_id}"
|
assert task_id in content, f"Expected the file to contain {task_id}"
|
||||||
|
|
||||||
|
|
||||||
def create_instructions_files(
|
def create_instructions_files(
|
||||||
memory_management_agent: Agent,
|
workspace: Workspace,
|
||||||
level: int,
|
level: int,
|
||||||
task_ids: list,
|
task_ids: list,
|
||||||
base_filename: str = "instructions_",
|
base_filename: str = "instructions_",
|
||||||
@@ -59,7 +53,7 @@ def create_instructions_files(
|
|||||||
Creates a series of instructions files for the memory challenge.
|
Creates a series of instructions files for the memory challenge.
|
||||||
Args:
|
Args:
|
||||||
level:
|
level:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
num_files (int)
|
num_files (int)
|
||||||
task_ids (list)
|
task_ids (list)
|
||||||
base_filename (str, optional)
|
base_filename (str, optional)
|
||||||
@@ -67,9 +61,10 @@ def create_instructions_files(
|
|||||||
for i in range(1, level + 1):
|
for i in range(1, level + 1):
|
||||||
content = generate_content(i, task_ids, base_filename, level)
|
content = generate_content(i, task_ids, base_filename, level)
|
||||||
file_name = f"{base_filename}{i}.txt"
|
file_name = f"{base_filename}{i}.txt"
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
file_path = get_workspace_path(workspace, file_name)
|
||||||
|
|
||||||
write_to_file(file_path, content, memory_management_agent)
|
with open(file_path, "w") as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
|
||||||
def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
|
def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
|
||||||
|
|||||||
@@ -1,26 +1,24 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.commands.file_operations import read_file
|
||||||
from autogpt.commands.file_operations import read_file, write_to_file
|
from autogpt.workspace import Workspace
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import (
|
from tests.challenges.utils import generate_noise, get_workspace_path, run_challenge
|
||||||
generate_noise,
|
|
||||||
get_workspace_path_from_agent,
|
|
||||||
run_interaction_loop,
|
|
||||||
)
|
|
||||||
|
|
||||||
NOISE = 1200
|
NOISE = 1200
|
||||||
OUTPUT_LOCATION = "output.txt"
|
OUTPUT_LOCATION = "output.txt"
|
||||||
|
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_memory_challenge_c(
|
def test_memory_challenge_c(
|
||||||
memory_management_agent: Agent,
|
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Instead of reading task Ids from files as with the previous challenges, the agent now must remember
|
Instead of reading task Ids from files as with the previous challenges, the agent now must remember
|
||||||
@@ -28,7 +26,7 @@ def test_memory_challenge_c(
|
|||||||
after seeing several of them.
|
after seeing several of them.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
patched_api_requestor (MockerFixture)
|
patched_api_requestor (MockerFixture)
|
||||||
monkeypatch (pytest.MonkeyPatch)
|
monkeypatch (pytest.MonkeyPatch)
|
||||||
level_to_run (int)
|
level_to_run (int)
|
||||||
@@ -48,26 +46,23 @@ def test_memory_challenge_c(
|
|||||||
|
|
||||||
level_silly_phrases = silly_phrases[:level_to_run]
|
level_silly_phrases = silly_phrases[:level_to_run]
|
||||||
create_instructions_files(
|
create_instructions_files(
|
||||||
memory_management_agent,
|
workspace,
|
||||||
level_to_run,
|
level_to_run,
|
||||||
level_silly_phrases,
|
level_silly_phrases,
|
||||||
)
|
)
|
||||||
|
|
||||||
run_interaction_loop(
|
run_challenge(
|
||||||
monkeypatch,
|
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||||
memory_management_agent,
|
|
||||||
level_to_run + 2,
|
|
||||||
challenge_name,
|
|
||||||
level_to_run,
|
|
||||||
)
|
)
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
|
||||||
content = read_file(file_path, agent=memory_management_agent)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
|
content = read_file(file_path, agent=workspace)
|
||||||
for phrase in level_silly_phrases:
|
for phrase in level_silly_phrases:
|
||||||
assert phrase in content, f"Expected the file to contain {phrase}"
|
assert phrase in content, f"Expected the file to contain {phrase}"
|
||||||
|
|
||||||
|
|
||||||
def create_instructions_files(
|
def create_instructions_files(
|
||||||
memory_management_agent: Agent,
|
workspace: Workspace,
|
||||||
level: int,
|
level: int,
|
||||||
task_ids: list,
|
task_ids: list,
|
||||||
base_filename: str = "instructions_",
|
base_filename: str = "instructions_",
|
||||||
@@ -76,7 +71,7 @@ def create_instructions_files(
|
|||||||
Creates a series of instructions files for the memory challenge.
|
Creates a series of instructions files for the memory challenge.
|
||||||
Args:
|
Args:
|
||||||
level:
|
level:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
num_files (int)
|
num_files (int)
|
||||||
task_ids (list)
|
task_ids (list)
|
||||||
base_filename (str, optional)
|
base_filename (str, optional)
|
||||||
@@ -84,8 +79,9 @@ def create_instructions_files(
|
|||||||
for i in range(1, level + 1):
|
for i in range(1, level + 1):
|
||||||
content = generate_content(i, task_ids, base_filename, level)
|
content = generate_content(i, task_ids, base_filename, level)
|
||||||
file_name = f"{base_filename}{i}.txt"
|
file_name = f"{base_filename}{i}.txt"
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
file_path = get_workspace_path(workspace, file_name)
|
||||||
write_to_file(file_path, content, memory_management_agent)
|
with open(file_path, "w") as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
|
||||||
def generate_content(
|
def generate_content(
|
||||||
|
|||||||
@@ -4,28 +4,30 @@ from typing import Dict
|
|||||||
import pytest
|
import pytest
|
||||||
from pytest_mock import MockerFixture
|
from pytest_mock import MockerFixture
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.commands.file_operations import read_file
|
||||||
from autogpt.commands.file_operations import read_file, write_to_file
|
from autogpt.workspace import Workspace
|
||||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||||
from tests.challenges.utils import get_workspace_path_from_agent, run_interaction_loop
|
from tests.challenges.utils import get_workspace_path, run_challenge
|
||||||
|
|
||||||
LEVEL_CURRENTLY_BEATEN = 1
|
LEVEL_CURRENTLY_BEATEN = 1
|
||||||
MAX_LEVEL = 5
|
MAX_LEVEL = 5
|
||||||
OUTPUT_LOCATION = "output.txt"
|
OUTPUT_LOCATION = "output.txt"
|
||||||
|
USER_INPUT = "Use the command read_file to read the instructions_1.txt file\nFollow the instructions in the instructions_1.txt file"
|
||||||
|
|
||||||
|
|
||||||
@challenge()
|
@challenge()
|
||||||
def test_memory_challenge_d(
|
def test_memory_challenge_d(
|
||||||
memory_management_agent: Agent,
|
|
||||||
patched_api_requestor: MockerFixture,
|
patched_api_requestor: MockerFixture,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
level_to_run: int,
|
level_to_run: int,
|
||||||
challenge_name: str,
|
challenge_name: str,
|
||||||
|
workspace: Workspace,
|
||||||
|
patched_make_workspace: pytest.fixture,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
The agent is given a series of events and must remember the respective beliefs of the characters.
|
The agent is given a series of events and must remember the respective beliefs of the characters.
|
||||||
Args:
|
Args:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
user_selected_level (int)
|
user_selected_level (int)
|
||||||
"""
|
"""
|
||||||
sally_anne_test_phrases = [
|
sally_anne_test_phrases = [
|
||||||
@@ -36,19 +38,14 @@ def test_memory_challenge_d(
|
|||||||
"Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
|
"Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
|
||||||
]
|
]
|
||||||
level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
|
level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
|
||||||
create_instructions_files(
|
create_instructions_files(workspace, level_to_run, level_sally_anne_test_phrases)
|
||||||
memory_management_agent, level_to_run, level_sally_anne_test_phrases
|
run_challenge(
|
||||||
|
challenge_name, level_to_run, monkeypatch, USER_INPUT, level_to_run + 2
|
||||||
)
|
)
|
||||||
run_interaction_loop(
|
|
||||||
monkeypatch,
|
|
||||||
memory_management_agent,
|
|
||||||
level_to_run + 2,
|
|
||||||
challenge_name,
|
|
||||||
level_to_run,
|
|
||||||
)
|
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, OUTPUT_LOCATION)
|
|
||||||
|
|
||||||
content = read_file(file_path, memory_management_agent)
|
file_path = get_workspace_path(workspace, OUTPUT_LOCATION)
|
||||||
|
|
||||||
|
content = read_file(file_path, workspace)
|
||||||
check_beliefs(content, level_to_run)
|
check_beliefs(content, level_to_run)
|
||||||
|
|
||||||
|
|
||||||
@@ -176,7 +173,7 @@ def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
|
|||||||
|
|
||||||
|
|
||||||
def create_instructions_files(
|
def create_instructions_files(
|
||||||
memory_management_agent: Agent,
|
workspace: Workspace,
|
||||||
level: int,
|
level: int,
|
||||||
test_phrases: list,
|
test_phrases: list,
|
||||||
base_filename: str = "instructions_",
|
base_filename: str = "instructions_",
|
||||||
@@ -185,15 +182,16 @@ def create_instructions_files(
|
|||||||
Creates a series of instructions files for the memory challenge.
|
Creates a series of instructions files for the memory challenge.
|
||||||
Args:
|
Args:
|
||||||
level:
|
level:
|
||||||
memory_management_agent (Agent)
|
workspace (Workspace)
|
||||||
test_phrases (list)
|
test_phrases (list)
|
||||||
base_filename (str, optional)
|
base_filename (str, optional)
|
||||||
"""
|
"""
|
||||||
for i in range(1, level + 1):
|
for i in range(1, level + 1):
|
||||||
content = generate_content(i, test_phrases, base_filename, level)
|
content = generate_content(i, test_phrases, base_filename, level)
|
||||||
file_name = f"{base_filename}{i}.txt"
|
file_name = f"{base_filename}{i}.txt"
|
||||||
file_path = get_workspace_path_from_agent(memory_management_agent, file_name)
|
file_path = get_workspace_path(workspace, file_name)
|
||||||
write_to_file(file_path, content, memory_management_agent)
|
with open(file_path, "w") as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
|
||||||
def generate_content(
|
def generate_content(
|
||||||
|
|||||||
@@ -6,9 +6,10 @@ from typing import Any, Generator
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from autogpt.agent import Agent
|
|
||||||
from autogpt.log_cycle.log_cycle import LogCycleHandler
|
from autogpt.log_cycle.log_cycle import LogCycleHandler
|
||||||
from autogpt.workspace import Workspace
|
from autogpt.workspace import Workspace
|
||||||
|
from benchmarks import run_task
|
||||||
|
from tests.challenges.schema import Task
|
||||||
|
|
||||||
|
|
||||||
def generate_noise(noise_size: int) -> str:
|
def generate_noise(noise_size: int) -> str:
|
||||||
@@ -40,20 +41,6 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
|
|||||||
monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
|
monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
|
||||||
|
|
||||||
|
|
||||||
def run_interaction_loop(
|
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
|
||||||
agent: Agent,
|
|
||||||
cycle_count: int,
|
|
||||||
challenge_name: str,
|
|
||||||
level_to_run: int,
|
|
||||||
) -> None:
|
|
||||||
setup_mock_input(monkeypatch, cycle_count)
|
|
||||||
|
|
||||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
|
||||||
with contextlib.suppress(SystemExit):
|
|
||||||
agent.start_interaction_loop()
|
|
||||||
|
|
||||||
|
|
||||||
def setup_mock_log_cycle_agent_name(
|
def setup_mock_log_cycle_agent_name(
|
||||||
monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
|
monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
|
||||||
) -> None:
|
) -> None:
|
||||||
@@ -69,13 +56,23 @@ def get_workspace_path(workspace: Workspace, file_name: str) -> str:
|
|||||||
return str(workspace.get_path(file_name))
|
return str(workspace.get_path(file_name))
|
||||||
|
|
||||||
|
|
||||||
def get_workspace_path_from_agent(agent: Agent, file_name: str) -> str:
|
|
||||||
return str(agent.workspace.get_path(file_name))
|
|
||||||
|
|
||||||
|
|
||||||
def copy_file_into_workspace(
|
def copy_file_into_workspace(
|
||||||
agent: Agent, directory_path: Path, file_path: str
|
workspace: Workspace, directory_path: Path, file_path: str
|
||||||
) -> None:
|
) -> None:
|
||||||
workspace_code_file_path = get_workspace_path_from_agent(agent, file_path)
|
workspace_code_file_path = get_workspace_path(workspace, file_path)
|
||||||
code_file_path = directory_path / file_path
|
code_file_path = directory_path / file_path
|
||||||
shutil.copy(code_file_path, workspace_code_file_path)
|
shutil.copy(code_file_path, workspace_code_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
def run_challenge(
|
||||||
|
challenge_name: str,
|
||||||
|
level_to_run: int,
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
user_input: str,
|
||||||
|
cycle_count: int,
|
||||||
|
) -> None:
|
||||||
|
setup_mock_input(monkeypatch, cycle_count)
|
||||||
|
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
||||||
|
task = Task(user_input=user_input)
|
||||||
|
with contextlib.suppress(SystemExit):
|
||||||
|
run_task(task)
|
||||||
|
|||||||
@@ -2,259 +2,46 @@ import pytest
|
|||||||
|
|
||||||
from autogpt.agent import Agent
|
from autogpt.agent import Agent
|
||||||
from autogpt.config import AIConfig, Config
|
from autogpt.config import AIConfig, Config
|
||||||
from autogpt.main import COMMAND_CATEGORIES
|
from autogpt.memory.vector import get_memory
|
||||||
from autogpt.memory.vector import NoMemory, get_memory
|
|
||||||
from autogpt.models.command_registry import CommandRegistry
|
from autogpt.models.command_registry import CommandRegistry
|
||||||
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
|
|
||||||
from autogpt.workspace import Workspace
|
from autogpt.workspace import Workspace
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def agent_test_config(config: Config):
|
def memory_json_file(config: Config):
|
||||||
config.set_continuous_mode(False)
|
was_memory_backend = config.memory_backend
|
||||||
config.set_temperature(0)
|
|
||||||
config.plain_output = True
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
config.set_memory_backend("json_file")
|
||||||
@pytest.fixture
|
memory = get_memory(config)
|
||||||
def memory_json_file(agent_test_config: Config):
|
|
||||||
was_memory_backend = agent_test_config.memory_backend
|
|
||||||
|
|
||||||
agent_test_config.set_memory_backend("json_file")
|
|
||||||
memory = get_memory(agent_test_config)
|
|
||||||
memory.clear()
|
memory.clear()
|
||||||
yield memory
|
yield memory
|
||||||
|
|
||||||
agent_test_config.set_memory_backend(was_memory_backend)
|
config.set_memory_backend(was_memory_backend)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
|
def dummy_agent(config: Config, memory_json_file, workspace: Workspace):
|
||||||
command_registry = CommandRegistry()
|
command_registry = CommandRegistry()
|
||||||
command_registry.import_commands("autogpt.commands.file_operations")
|
|
||||||
command_registry.import_commands("autogpt.commands.web_selenium")
|
|
||||||
command_registry.import_commands("autogpt.app")
|
|
||||||
command_registry.import_commands("autogpt.commands.task_statuses")
|
|
||||||
|
|
||||||
ai_config = AIConfig(
|
ai_config = AIConfig(
|
||||||
ai_name="browse_website-GPT",
|
ai_name="Dummy Agent",
|
||||||
ai_role="an AI designed to use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html, answer the question 'What is the price of the book?' and write the price to a file named \"browse_website.txt\", and use the task_complete command to complete the task.",
|
ai_role="Dummy Role",
|
||||||
ai_goals=[
|
ai_goals=[
|
||||||
"Use the browse_website command to visit http://books.toscrape.com/catalogue/meditations_33/index.html and answer the question 'What is the price of the book?'",
|
"Dummy Task",
|
||||||
'Write the price of the book to a file named "browse_website.txt".',
|
|
||||||
"Use the task_complete command to complete the task.",
|
|
||||||
"Do not use any other commands.",
|
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
ai_config.command_registry = command_registry
|
ai_config.command_registry = command_registry
|
||||||
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
|
|
||||||
agent = Agent(
|
agent = Agent(
|
||||||
ai_name="",
|
ai_name="Dummy Agent",
|
||||||
memory=memory_none,
|
|
||||||
command_registry=command_registry,
|
|
||||||
ai_config=ai_config,
|
|
||||||
config=agent_test_config,
|
|
||||||
next_action_count=0,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
|
||||||
workspace_directory=workspace.root,
|
|
||||||
)
|
|
||||||
|
|
||||||
return agent
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
|
|
||||||
command_registry = get_command_registry(agent_test_config)
|
|
||||||
|
|
||||||
ai_config = AIConfig(
|
|
||||||
ai_name="Follow-Instructions-GPT",
|
|
||||||
ai_role="an AI designed to read the instructions_1.txt file using the read_file method and follow the instructions in the file.",
|
|
||||||
ai_goals=[
|
|
||||||
"Use the command read_file to read the instructions_1.txt file",
|
|
||||||
"Follow the instructions in the instructions_1.txt file",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
ai_config.command_registry = command_registry
|
|
||||||
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
ai_name="Follow-Instructions-GPT",
|
|
||||||
memory=memory_json_file,
|
memory=memory_json_file,
|
||||||
command_registry=command_registry,
|
command_registry=command_registry,
|
||||||
ai_config=ai_config,
|
ai_config=ai_config,
|
||||||
config=agent_test_config,
|
config=config,
|
||||||
next_action_count=0,
|
next_action_count=0,
|
||||||
system_prompt=system_prompt,
|
system_prompt="dummy_prompt",
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
triggering_prompt="dummy triggering prompt",
|
||||||
workspace_directory=workspace.root,
|
workspace_directory=workspace.root,
|
||||||
)
|
)
|
||||||
|
|
||||||
return agent
|
return agent
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def information_retrieval_agents(
|
|
||||||
agent_test_config, memory_json_file, workspace: Workspace
|
|
||||||
):
|
|
||||||
agents = []
|
|
||||||
command_registry = get_command_registry(agent_test_config)
|
|
||||||
|
|
||||||
ai_goals = [
|
|
||||||
"Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
|
|
||||||
"Write to a file called output.txt containing tesla's revenue in 2022.",
|
|
||||||
"Write to a file called output.txt containing tesla's revenue every year since its creation.",
|
|
||||||
]
|
|
||||||
for ai_goal in ai_goals:
|
|
||||||
ai_config = AIConfig(
|
|
||||||
ai_name="Information Retrieval Agent",
|
|
||||||
ai_role="an autonomous agent that specializes in retrieving information.",
|
|
||||||
ai_goals=[ai_goal],
|
|
||||||
)
|
|
||||||
ai_config.command_registry = command_registry
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
agent_test_config.set_continuous_mode(False)
|
|
||||||
agents.append(
|
|
||||||
Agent(
|
|
||||||
ai_name="Information Retrieval Agent",
|
|
||||||
memory=memory_json_file,
|
|
||||||
command_registry=command_registry,
|
|
||||||
ai_config=ai_config,
|
|
||||||
config=agent_test_config,
|
|
||||||
next_action_count=0,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
|
||||||
workspace_directory=workspace.root,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return agents
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def kubernetes_agent(
|
|
||||||
agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
|
|
||||||
) -> Agent:
|
|
||||||
command_registry = CommandRegistry()
|
|
||||||
command_registry.import_commands("autogpt.commands.file_operations")
|
|
||||||
command_registry.import_commands("autogpt.app")
|
|
||||||
|
|
||||||
ai_config = AIConfig(
|
|
||||||
ai_name="Kubernetes",
|
|
||||||
ai_role="an autonomous agent that specializes in creating Kubernetes deployment templates.",
|
|
||||||
ai_goals=[
|
|
||||||
"Write a simple kubernetes deployment file and save it as a kube.yaml.",
|
|
||||||
# You should make a simple nginx web server that uses docker and exposes the port 80.
|
|
||||||
],
|
|
||||||
)
|
|
||||||
ai_config.command_registry = command_registry
|
|
||||||
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
agent_test_config.set_continuous_mode(False)
|
|
||||||
agent = Agent(
|
|
||||||
ai_name="Kubernetes-Demo",
|
|
||||||
memory=memory_json_file,
|
|
||||||
command_registry=command_registry,
|
|
||||||
ai_config=ai_config,
|
|
||||||
config=agent_test_config,
|
|
||||||
next_action_count=0,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
|
||||||
workspace_directory=workspace.root,
|
|
||||||
)
|
|
||||||
|
|
||||||
return agent
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Workspace):
|
|
||||||
command_registry = CommandRegistry()
|
|
||||||
command_registry.import_commands("autogpt.commands.file_operations")
|
|
||||||
command_registry.import_commands("autogpt.app")
|
|
||||||
command_registry.import_commands("autogpt.commands.web_selenium")
|
|
||||||
|
|
||||||
ai_config = AIConfig(
|
|
||||||
ai_name="Get-PhysicsNobelPrize",
|
|
||||||
ai_role="An autonomous agent that specializes in physics history.",
|
|
||||||
ai_goals=[
|
|
||||||
"Write to file the winner's name(s), affiliated university, and discovery of the 2010 nobel prize in physics. Write your final answer to 2010_nobel_prize_winners.txt.",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
ai_config.command_registry = command_registry
|
|
||||||
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
agent_test_config.set_continuous_mode(False)
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
ai_name="Get-PhysicsNobelPrize",
|
|
||||||
memory=memory_json_file,
|
|
||||||
command_registry=command_registry,
|
|
||||||
ai_config=ai_config,
|
|
||||||
config=agent_test_config,
|
|
||||||
next_action_count=0,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
|
||||||
workspace_directory=workspace.root,
|
|
||||||
)
|
|
||||||
|
|
||||||
return agent
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
|
|
||||||
agents = []
|
|
||||||
goals = [
|
|
||||||
[
|
|
||||||
"1- Run test.py using the execute_python_file command.",
|
|
||||||
"2- Read code.py using the read_file command.",
|
|
||||||
"3- Modify code.py using the write_to_file command."
|
|
||||||
"Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
|
|
||||||
],
|
|
||||||
[
|
|
||||||
"1- Run test.py.",
|
|
||||||
"2- Read code.py.",
|
|
||||||
"3- Modify code.py."
|
|
||||||
"Repeat step 1, 2 and 3 until test.py runs without errors.",
|
|
||||||
],
|
|
||||||
["1- Make test.py run without errors."],
|
|
||||||
]
|
|
||||||
|
|
||||||
for goal in goals:
|
|
||||||
ai_config = AIConfig(
|
|
||||||
ai_name="Debug Code Agent",
|
|
||||||
ai_role="an autonomous agent that specializes in debugging python code",
|
|
||||||
ai_goals=goal,
|
|
||||||
)
|
|
||||||
command_registry = get_command_registry(agent_test_config)
|
|
||||||
ai_config.command_registry = command_registry
|
|
||||||
system_prompt = ai_config.construct_full_prompt(agent_test_config)
|
|
||||||
agent_test_config.set_continuous_mode(False)
|
|
||||||
agents.append(
|
|
||||||
Agent(
|
|
||||||
ai_name="Debug Code Agent",
|
|
||||||
memory=memory_json_file,
|
|
||||||
command_registry=command_registry,
|
|
||||||
ai_config=ai_config,
|
|
||||||
config=agent_test_config,
|
|
||||||
next_action_count=0,
|
|
||||||
system_prompt=system_prompt,
|
|
||||||
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
|
|
||||||
workspace_directory=workspace.root,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return agents
|
|
||||||
|
|
||||||
|
|
||||||
def get_command_registry(agent_test_config):
|
|
||||||
command_registry = CommandRegistry()
|
|
||||||
enabled_command_categories = [
|
|
||||||
x
|
|
||||||
for x in COMMAND_CATEGORIES
|
|
||||||
if x not in agent_test_config.disabled_command_categories
|
|
||||||
]
|
|
||||||
for command_category in enabled_command_categories:
|
|
||||||
command_registry.import_commands(command_category)
|
|
||||||
return command_registry
|
|
||||||
|
|||||||
Reference in New Issue
Block a user