Link all challenges to benchmark python hook (#4786)

2026-02-06 14:54:40 +01:00 · 2023-06-24 06:20:58 -07:00
parent 307f6e50ad
commit cfdb24efac
13 changed files with 218 additions and 402 deletions
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -5,11 +5,12 @@ from pytest_mock import MockerFixture

 from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
+from autogpt.workspace import Workspace
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
    copy_file_into_workspace,
-    get_workspace_path_from_agent,
-    run_interaction_loop,
+    get_workspace_path,
+    run_challenge,
 )

 CYCLE_COUNT = 5
@@ -17,15 +18,22 @@ EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
 DIRECTORY_PATH = Path(__file__).parent / "data"
 CODE_FILE_PATH = "code.py"
 TEST_FILE_PATH = "test.py"
+USER_INPUTS = [
+    "1- Run test.py using the execute_python_file command.\n2- Read code.py using the read_file command.\n3- Modify code.py using the write_to_file command.\nRepeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
+    "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
+    "Make test.py run without errors.",
+]


@challenge()
 def test_debug_code_challenge_a(
-    debug_code_agents: Agent,
+    dummy_agent: Agent,
    monkeypatch: pytest.MonkeyPatch,
    patched_api_requestor: MockerFixture,
    level_to_run: int,
    challenge_name: str,
+    workspace: Workspace,
+    patched_make_workspace: pytest.fixture,
 ) -> None:
    """
    Test whether the agent can debug a simple code snippet.
@@ -35,18 +43,21 @@ def test_debug_code_challenge_a(
    :patched_api_requestor: Sends api requests to our API CI pipeline
    :level_to_run: The level to run.
    """
-    debug_code_agent = debug_code_agents[level_to_run - 1]

-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
-    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, CODE_FILE_PATH)
+    copy_file_into_workspace(workspace, DIRECTORY_PATH, TEST_FILE_PATH)

-    run_interaction_loop(
-        monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
+    run_challenge(
+        challenge_name,
+        level_to_run,
+        monkeypatch,
+        USER_INPUTS[level_to_run - 1],
+        CYCLE_COUNT,
    )

    output = execute_python_file(
-        get_workspace_path_from_agent(debug_code_agent, TEST_FILE_PATH),
-        debug_code_agent,
+        get_workspace_path(workspace, TEST_FILE_PATH),
+        dummy_agent,
    )

    assert "error" not in output.lower(), f"Errors found in output: {output}!"