diff --git a/agbenchmark/__init__.py b/agbenchmark/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks.py b/agbenchmark/benchmarks.py similarity index 73% rename from benchmarks.py rename to agbenchmark/benchmarks.py index 589b3f75..e8675f92 100644 --- a/benchmarks.py +++ b/agbenchmark/benchmarks.py @@ -1,4 +1,6 @@ +import sys from pathlib import Path +from typing import Tuple from autogpt.agents import Agent from autogpt.app.main import run_interaction_loop @@ -12,14 +14,15 @@ from autogpt.workspace import Workspace PROJECT_DIR = Path().resolve() -def run_task(task) -> None: - agent = bootstrap_agent(task) +def run_specific_agent(task, continuous_mode=False) -> Tuple[str, int]: + agent = bootstrap_agent(task, continuous_mode) run_interaction_loop(agent) -def bootstrap_agent(task): +def bootstrap_agent(task, continuous_mode) -> Agent: config = ConfigBuilder.build_config_from_env(workdir=PROJECT_DIR) - config.continuous_mode = False + config.debug_mode = True + config.continuous_mode = continuous_mode config.temperature = 0 config.plain_output = True command_registry = get_command_registry(config) @@ -29,7 +32,7 @@ def bootstrap_agent(task): ai_config = AIConfig( ai_name="Auto-GPT", ai_role="a multi-purpose AI assistant.", - ai_goals=[task.user_input], + ai_goals=[task], ) ai_config.command_registry = command_registry return Agent( @@ -50,3 +53,12 @@ def get_command_registry(config: Config): for command_category in enabled_command_categories: command_registry.import_commands(command_category) return command_registry + + +if __name__ == "__main__": + # The first argument is the script name itself, second is the task + if len(sys.argv) != 2: + print("Usage: python script.py ") + sys.exit(1) + task = sys.argv[1] + run_specific_agent(task, continuous_mode=True) diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 00000000..dd957f9e --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1,4 @@ +{ + "workspace": "autogpt/workspace/auto_gpt_workspace", + "entry_path": "agbenchmark.benchmarks" +} diff --git a/agbenchmark/regression_tests.json b/agbenchmark/regression_tests.json new file mode 100644 index 00000000..8d59b1a4 --- /dev/null +++ b/agbenchmark/regression_tests.json @@ -0,0 +1,24 @@ +{ + "TestBasicCodeGeneration": { + "difficulty": "basic", + "dependencies": [ + "TestWriteFile" + ], + "data_path": "agbenchmark/challenges/code/d3" + }, + "TestBasicMemory": { + "difficulty": "basic", + "data_path": "agbenchmark/challenges/memory/m1" + }, + "TestReadFile": { + "difficulty": "basic", + "dependencies": [ + "TestWriteFile" + ], + "data_path": "agbenchmark/challenges/interface/read_file" + }, + "TestWriteFile": { + "dependencies": [], + "data_path": "agbenchmark/challenges/interface/write_file" + } +} diff --git a/requirements.txt b/requirements.txt index 4af8bccd..c150c262 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ beautifulsoup4>=4.12.2 colorama==0.4.6 distro==1.8.0 -openai==0.27.2 +openai==0.27.8 playsound==1.2.2 -python-dotenv==1.0.0 +python-dotenv==0.21 pyyaml==6.0 PyPDF2 python-docx @@ -31,6 +31,7 @@ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_ prompt_toolkit>=3.0.38 pydantic inflection +agbenchmark # web server fastapi diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py index 9d1b76e7..dd661b6e 100644 --- a/tests/challenges/utils.py +++ b/tests/challenges/utils.py @@ -6,9 +6,9 @@ from typing import Any, Generator import pytest +from agbenchmark.benchmarks import run_specific_agent from autogpt.logs import LogCycleHandler from autogpt.workspace import Workspace -from benchmarks import run_task from tests.challenges.schema import Task @@ -75,4 +75,4 @@ def run_challenge( setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run) task = Task(user_input=user_input) with contextlib.suppress(SystemExit): - run_task(task) + run_specific_agent(task.user_input)