mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-22 16:34:25 +01:00
Link all challenges to benchmark python hook (#4786)
This commit is contained in:
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture
|
||||
|
||||
from autogpt.agent import Agent
|
||||
from autogpt.commands.execute_code import execute_python_file
|
||||
from autogpt.workspace import Workspace
|
||||
from tests.challenges.challenge_decorator.challenge_decorator import challenge
|
||||
from tests.challenges.utils import (
|
||||
copy_file_into_workspace,
|
||||
get_workspace_path_from_agent,
|
||||
run_interaction_loop,
|
||||
get_workspace_path,
|
||||
run_challenge,
|
||||
)
|
||||
|
||||
CYCLE_COUNT = 5
|
||||
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
|
||||
DIRECTORY_PATH = Path(__file__).parent / "data"
|
||||
CODE_FILE_PATH = "code.py"
|
||||
TEST_FILE_PATH = "test.py"
|
||||
USER_INPUTS = [
|
||||
"1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
|
||||
"1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
|
||||
"Make test.py run without errors.",
|
||||
]
|
||||
|
||||
|
||||
@challenge()
|
||||
def test_debug_code_challenge_a(
|
||||
debug_code_agents: Agent,
|
||||
dummy_agent: Agent,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
patched_api_requestor: MockerFixture,
|
||||
level_to_run: int,
|
||||
challenge_name: str,
|
||||
workspace: Workspace,
|
||||
patched_make_workspace: pytest.fixture,
|
||||
) -> None:
|
||||
"""
|
||||
Test whether the agent can debug a simple code snippet.
|
||||
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
|
||||
:patched_api_requestor: Sends api requests to our API CI pipeline
|
||||
:level_to_run: The level to run.
|
||||
"""
|
||||
debug_code_agent = debug_code_agents[level_to_run - 1]
|
||||
|
||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
|
||||
copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
|
||||
copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
|
||||
copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)
|
||||
|
||||
run_interaction_loop(
|
||||
monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
|
||||
run_challenge(
|
||||
challenge_name,
|
||||
level_to_run,
|
||||
monkeypatch,
|
||||
USER_INPUTS[level_to_run - 1],
|
||||
CYCLE_COUNT,
|
||||
)
|
||||
|
||||
output = execute_python_file(
|
||||
get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
|
||||
debug_code_agent,
|
||||
get_workspace_path(workspace, TEST_FILE_PATH),
|
||||
dummy_agent,
|
||||
)
|
||||
|
||||
assert "error" not in output.lower(), f"Errors found in output: {output}!"
|
||||
|
||||
Reference in New Issue
Block a user