diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9d58649..2c638c81 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,6 +81,7 @@ jobs: - "Auto-GPT" - "mini-agi" - "beebot" + - "BabyAGI" steps: - name: Checkout repository @@ -132,6 +133,10 @@ jobs: python -m venv venv source venv/bin/activate pip install -r requirements.txt + elif [ "$AGENT_NAME" == "BabyAGI" ]; then + python -m venv venv + source venv/bin/activate + pip install -r requirements.txt elif [ "$AGENT_NAME" == "SuperAGI" ]; then cp config_template.yaml config.yaml sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml diff --git a/.gitmodules b/.gitmodules index 389314b1..f4ae34af 100644 --- a/.gitmodules +++ b/.gitmodules @@ -18,6 +18,10 @@ path = agent/SuperAGI url = https://github.com/SilenNaihin/SuperAGI.git branch = benchmark-integration +[submodule "agent/BabyAGI"] + path = agent/BabyAGI + url = https://github.com/SilenNaihin/babyagi.git + branch = benchmark-integration [submodule "agent/beebot"] path = agent/beebot url = https://github.com/merwanehamadi/beebot.git diff --git a/agbenchmark/agent_interface.py b/agbenchmark/agent_interface.py index ff5bc890..80537daf 100644 --- a/agbenchmark/agent_interface.py +++ b/agbenchmark/agent_interface.py @@ -27,7 +27,11 @@ def run_agent( else: entry_path = "agbenchmark.benchmarks" - print(f"Running Python function '{entry_path}' with timeout {cutoff}") + timeout = cutoff + if "--nc" in sys.argv: + timeout = 100000 + + print(f"Running Python function '{entry_path}' with timeout {timeout}") command = [sys.executable, "-m", entry_path, str(task)] process = subprocess.Popen( command, @@ -49,11 +53,11 @@ def run_agent( if ( process.poll() is not None or output == "" - or (time.time() - start_time > cutoff) + or (time.time() - start_time > timeout) ): break - if time.time() - start_time > cutoff: + if time.time() - start_time > timeout: print("The Python function has exceeded the time limit and was terminated.") process.kill() else: diff --git a/agbenchmark/challenge.py b/agbenchmark/challenge.py index cdaebed4..e054dddf 100644 --- a/agbenchmark/challenge.py +++ b/agbenchmark/challenge.py @@ -1,6 +1,7 @@ import glob import os import subprocess +import sys from abc import ABC from typing import Any, Dict, List @@ -72,7 +73,7 @@ class Challenge(ABC): for file_path in matching_files: if self.data.ground.type == "execute_python_code": result = subprocess.run( - ["python3", file_path], + [sys.executable, file_path], cwd=os.path.abspath(workspace), capture_output=True, text=True, diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py index b544d2c6..cf5ebb82 100644 --- a/agbenchmark/conftest.py +++ b/agbenchmark/conftest.py @@ -81,6 +81,7 @@ def workspace(config: Dict[str, Any]) -> Generator[str, None, None]: def pytest_addoption(parser: Any) -> None: parser.addoption("--mock", action="store_true", default=False) + parser.addoption("--nc", action="store_true", default=False) parser.addoption("--improve", action="store_true", default=False) parser.addoption("--maintain", action="store_true", default=False) parser.addoption("--test", action="store_true", default=None) diff --git a/agbenchmark/reports/internal_info.json b/agbenchmark/reports/internal_info.json index e483d355..60e57d22 100644 --- a/agbenchmark/reports/internal_info.json +++ b/agbenchmark/reports/internal_info.json @@ -1,4 +1,10 @@ { + "BabyAGI": { + "TestWriteFile": [ + false, + false + ] + }, "TestBasicCodeGeneration": [ true ], @@ -142,4 +148,4 @@ false ] } -} \ No newline at end of file +} diff --git a/agbenchmark/start_benchmark.py b/agbenchmark/start_benchmark.py index ea17d152..de264df4 100644 --- a/agbenchmark/start_benchmark.py +++ b/agbenchmark/start_benchmark.py @@ -30,9 +30,13 @@ def cli() -> None: @click.option("--maintain", is_flag=True, help="Runs only regression tests") @click.option("--improve", is_flag=True, help="Run only non-regression tests") @click.option("--mock", is_flag=True, help="Run with mock") -def start(category: str, test: str, maintain: bool, improve: bool, mock: bool) -> int: +@click.option("--nc", is_flag=True, help="Run without cutoff") +def start( + category: str, test: str, maintain: bool, improve: bool, mock: bool, nc: bool +) -> int: """Start the benchmark tests. If a category flag is provided, run the categories with that mark.""" # Check if configuration file exists and is not empty + if maintain and improve: print( "Error: You can't use both --maintain and --improve at the same time. Please choose one." @@ -106,6 +110,9 @@ def start(category: str, test: str, maintain: bool, improve: bool, mock: bool) - if mock: pytest_args.append("--mock") + if nc: + pytest_args.append("--nc") + # when used as a library, the pytest directory to execute is in the CURRENT_DIRECTORY pytest_args.append(str(CURRENT_DIRECTORY)) diff --git a/agbenchmark/utils.py b/agbenchmark/utils.py index a1788fab..f5397ba6 100644 --- a/agbenchmark/utils.py +++ b/agbenchmark/utils.py @@ -16,10 +16,10 @@ from agbenchmark.challenges.define_task_types import DIFFICULTY_MAP, DifficultyL AGENT_NAME = os.getenv("AGENT_NAME") HOME_ENV = os.getenv("HOME_ENV") +report_location = os.getenv("REPORT_LOCATION", None) def calculate_info_test_path(reports_path: Path) -> str: - report_location = os.getenv("REPORT_LOCATION", ".") if report_location: reports_path = Path(os.getcwd()) / report_location @@ -62,12 +62,13 @@ def calculate_info_test_path(reports_path: Path) -> str: max_prefix = max(all_prefix_numbers, default=0) run_name = f"{max_prefix + 1}_{test_arg}.json" else: + print(f"Found {related_file_count} files with '{test_arg}' in the name") # Take the number from before the _ and add the .{number} - prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0] + + prefix_str = Path(related_files[0]).stem.rsplit("_")[0].split(".")[0] prefix = math.floor(float(prefix_str)) run_name = f"{prefix}.{related_file_count}_{test_arg}.json" - print("run_namerun_namerun_name", run_name) new_file_path = reports_path / run_name return str(new_file_path) diff --git a/agent/BabyAGI b/agent/BabyAGI new file mode 160000 index 00000000..1da0147c --- /dev/null +++ b/agent/BabyAGI @@ -0,0 +1 @@ +Subproject commit 1da0147ccf6c34e8bc0c522d5eb4e5cab8f59f28