diff --git a/BULLETIN.md b/BULLETIN.md index 17c38b8c..70be3c3e 100644 --- a/BULLETIN.md +++ b/BULLETIN.md @@ -51,3 +51,9 @@ memory store was also temporarily removed but we aim to merge a new implementati before the next release. Whether built-in support for the others will be added back in the future is subject to discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280 + +# Challenge Workflow 🏆 +If you have been working on challenges... Thank You! +But to run the debugger challenge or other challenges using cassettes and VCR in docker, You will now need to `pip uninstall vcrpy` and `pip install -r requirements.txt` again. +This will install a new version of vcrpy that is compatible with running vcr in docker. +This workflow will be fixed as soon as the maintainer from VCRpy merges our changes. diff --git a/requirements.txt b/requirements.txt index 542f9b50..31f7706a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -58,6 +58,6 @@ pytest-benchmark pytest-cov pytest-integration pytest-mock -vcrpy +vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master pytest-recording pytest-xdist diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py index 716545c2..30d9cc13 100644 --- a/tests/integration/agent_factory.py +++ b/tests/integration/agent_factory.py @@ -246,3 +246,41 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp ) return agent + + +@pytest.fixture +def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace): + command_registry = CommandRegistry() + command_registry.import_commands("autogpt.commands.file_operations") + command_registry.import_commands("autogpt.commands.execute_code") + command_registry.import_commands("autogpt.commands.improve_code") + command_registry.import_commands("autogpt.app") + command_registry.import_commands("autogpt.commands.task_statuses") + + ai_config = AIConfig( + ai_name="Debug Code Agent", + ai_role="an autonomous agent that specializes in debugging python code", + ai_goals=[ + "1-Run the code in the file named 'code.py' using the execute_code command.", + "2-Read code.py to understand why the code is not working as expected.", + "3-Modify code.py to fix the error.", + "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.", + "Do not use any other commands than execute_python_file and write_file", + ], + ) + ai_config.command_registry = command_registry + + system_prompt = ai_config.construct_full_prompt() + Config().set_continuous_mode(False) + agent = Agent( + ai_name="Debug Code Agent", + memory=memory_json_file, + command_registry=command_registry, + config=ai_config, + next_action_count=0, + system_prompt=system_prompt, + triggering_prompt=DEFAULT_TRIGGERING_PROMPT, + workspace_directory=workspace.root, + ) + + return agent diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json index d2b0b30a..80e19d7c 100644 --- a/tests/integration/challenges/current_score.json +++ b/tests/integration/challenges/current_score.json @@ -19,6 +19,12 @@ "max_level_beaten": 1 } }, + "debug_code": { + "debug_code_challenge_a": { + "max_level": 1, + "max_level_beaten": 1 + } + }, "kubernetes": { "kubernetes_template_challenge_a": { "max_level": 1, @@ -39,4 +45,4 @@ "max_level_beaten": 1 } } -} \ No newline at end of file +} diff --git a/tests/integration/challenges/debug_code/data/two_sum.py b/tests/integration/challenges/debug_code/data/two_sum.py new file mode 100644 index 00000000..305cff4e --- /dev/null +++ b/tests/integration/challenges/debug_code/data/two_sum.py @@ -0,0 +1,19 @@ +# mypy: ignore-errors +from typing import List, Optional + + +def two_sum(nums: List, target: int) -> Optional[int]: + seen = {} + for i, num in enumerate(nums): + complement = target - num + if complement in seen: + return [seen[complement], i] + seen[num] = i + return None + + +# Example usage: +nums = [2, 7, 11, 15] +target = 9 +result = two_sum(nums, target) +print(result) # Output: [0, 1] diff --git a/tests/integration/challenges/debug_code/data/two_sum_tests.py b/tests/integration/challenges/debug_code/data/two_sum_tests.py new file mode 100644 index 00000000..0eb89bcb --- /dev/null +++ b/tests/integration/challenges/debug_code/data/two_sum_tests.py @@ -0,0 +1,30 @@ +# mypy: ignore-errors +# we need a new line at the top of the file to avoid a syntax error + + +def test_two_sum(nums, target, expected_result): + # These tests are appended to the two_sum file so we can ignore this error for now + result = two_sum(nums, target) + print(result) + assert ( + result == expected_result + ), f"AssertionError: Expected the output to be {expected_result}" + + +# test the trivial case with the first two numbers +nums = [2, 7, 11, 15] +target = 9 +expected_result = [0, 1] +test_two_sum(nums, target, expected_result) + +# test for ability to use zero and the same number twice +nums = [2, 7, 0, 15, 12, 0] +target = 0 +expected_result = [2, 5] +test_two_sum(nums, target, expected_result) + +# test for first and last index usage and negative numbers +nums = [-6, 7, 11, 4] +target = -2 +expected_result = [0, 3] +test_two_sum(nums, target, expected_result) diff --git a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py new file mode 100644 index 00000000..008e562c --- /dev/null +++ b/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py @@ -0,0 +1,51 @@ +from pathlib import Path + +import pytest +from pytest_mock import MockerFixture + +from autogpt.agent import Agent +from autogpt.commands.execute_code import execute_python_file +from autogpt.commands.file_operations import append_to_file, write_to_file +from autogpt.config import Config +from tests.integration.challenges.challenge_decorator.challenge_decorator import ( + challenge, +) +from tests.integration.challenges.utils import run_interaction_loop +from tests.utils import requires_api_key + +CYCLE_COUNT = 5 + + +@pytest.mark.vcr +@requires_api_key("OPENAI_API_KEY") +@challenge +def test_debug_code_challenge_a( + debug_code_agent: Agent, + monkeypatch: pytest.MonkeyPatch, + patched_api_requestor: MockerFixture, + config: Config, + level_to_run: int, +) -> None: + """ + Test whether the agent can debug a simple code snippet. + + :param debug_code_agent: The agent to test. + :param monkeypatch: pytest's monkeypatch utility for modifying builtins. + :patched_api_requestor: Sends api requests to our API CI pipeline + :config: The config object for the agent. + :level_to_run: The level to run. + """ + + file_path = str(debug_code_agent.workspace.get_path("code.py")) + + code_file_path = Path(__file__).parent / "data" / "two_sum.py" + test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py" + + write_to_file(file_path, code_file_path.read_text(), config) + + run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT) + + append_to_file(file_path, test_file_path.read_text(), config) + + output = execute_python_file(file_path, config) + assert "error" not in output.lower(), f"Errors found in output: {output}!"