Integrate baby-agi (#168)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
2026-02-09 16:24:24 +01:00 · 2023-07-21 19:15:42 +01:00
parent ee13f1ac87
commit 2b3abeff4e
9 changed files with 39 additions and 9 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -81,6 +81,7 @@ jobs:
          - "Auto-GPT"
          - "mini-agi"
          - "beebot"
+          - "BabyAGI"

    steps:
      - name: Checkout repository
@@ -132,6 +133,10 @@ jobs:
            python -m venv venv
            source venv/bin/activate
            pip install -r requirements.txt
+          elif [ "$AGENT_NAME" == "BabyAGI" ]; then
+            python -m venv venv
+            source venv/bin/activate
+            pip install -r requirements.txt
          elif [ "$AGENT_NAME" == "SuperAGI" ]; then
            cp config_template.yaml config.yaml
            sed -i 's/OPENAI_API_KEY:.*/OPENAI_API_KEY: "'"${{ secrets.OPENAI_API_KEY }}"'"/' config.yaml
--- a/.gitmodules
+++ b/.gitmodules
@@ -18,6 +18,10 @@
 	path = agent/SuperAGI
 	url = https://github.com/SilenNaihin/SuperAGI.git
 	branch = benchmark-integration
+[submodule "agent/BabyAGI"]
+	path = agent/BabyAGI
+	url = https://github.com/SilenNaihin/babyagi.git
+	branch = benchmark-integration
 [submodule "agent/beebot"]
 	path = agent/beebot
 	url = https://github.com/merwanehamadi/beebot.git
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@@ -27,7 +27,11 @@ def run_agent(
    else:
        entry_path = "agbenchmark.benchmarks"

-        print(f"Running Python function '{entry_path}' with timeout {cutoff}")
+        timeout = cutoff
+        if "--nc" in sys.argv:
+            timeout = 100000
+
+        print(f"Running Python function '{entry_path}' with timeout {timeout}")
        command = [sys.executable, "-m", entry_path, str(task)]
        process = subprocess.Popen(
            command,
@@ -49,11 +53,11 @@ def run_agent(
            if (
                process.poll() is not None
                or output == ""
-                or (time.time() - start_time > cutoff)
+                or (time.time() - start_time > timeout)
            ):
                break

-        if time.time() - start_time > cutoff:
+        if time.time() - start_time > timeout:
            print("The Python function has exceeded the time limit and was terminated.")
            process.kill()
        else:
--- a/agbenchmark/challenge.py
+++ b/agbenchmark/challenge.py
@@ -1,6 +1,7 @@
 import glob
 import os
 import subprocess
+import sys
 from abc import ABC
 from typing import Any, Dict, List

@@ -72,7 +73,7 @@ class Challenge(ABC):
            for file_path in matching_files:
                if self.data.ground.type == "execute_python_code":
                    result = subprocess.run(
-                        ["python3", file_path],
+                        [sys.executable, file_path],
                        cwd=os.path.abspath(workspace),
                        capture_output=True,
                        text=True,
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -81,6 +81,7 @@ def workspace(config: Dict[str, Any]) -> Generator[str, None, None]:

 def pytest_addoption(parser: Any) -> None:
    parser.addoption("--mock", action="store_true", default=False)
+    parser.addoption("--nc", action="store_true", default=False)
    parser.addoption("--improve", action="store_true", default=False)
    parser.addoption("--maintain", action="store_true", default=False)
    parser.addoption("--test", action="store_true", default=None)
--- a/agbenchmark/reports/internal_info.json
+++ b/agbenchmark/reports/internal_info.json
@@ -1,4 +1,10 @@
 {
+    "BabyAGI": {
+        "TestWriteFile": [
+            false,
+            false
+        ]
+    },
    "TestBasicCodeGeneration": [
        true
    ],
@@ -142,4 +148,4 @@
            false
        ]
    }
-}
+}
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@@ -30,9 +30,13 @@ def cli() -> None:
@click.option("--maintain", is_flag=True, help="Runs only regression tests")
@click.option("--improve", is_flag=True, help="Run only non-regression tests")
@click.option("--mock", is_flag=True, help="Run with mock")
-def start(category: str, test: str, maintain: bool, improve: bool, mock: bool) -> int:
+@click.option("--nc", is_flag=True, help="Run without cutoff")
+def start(
+    category: str, test: str, maintain: bool, improve: bool, mock: bool, nc: bool
+) -> int:
    """Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
    # Check if configuration file exists and is not empty
+
    if maintain and improve:
        print(
            "Error: You can't use both --maintain and --improve at the same time. Please choose one."
@@ -106,6 +110,9 @@ def start(category: str, test: str, maintain: bool, improve: bool, mock: bool) -
    if mock:
        pytest_args.append("--mock")

+    if nc:
+        pytest_args.append("--nc")
+
    # when used as a library, the pytest directory to execute is in the CURRENT_DIRECTORY
    pytest_args.append(str(CURRENT_DIRECTORY))

--- a/agbenchmark/utils.py
+++ b/agbenchmark/utils.py
@@ -16,10 +16,10 @@ from agbenchmark.challenges.define_task_types import DIFFICULTY_MAP, DifficultyL

 AGENT_NAME = os.getenv("AGENT_NAME")
 HOME_ENV = os.getenv("HOME_ENV")
+report_location = os.getenv("REPORT_LOCATION", None)


 def calculate_info_test_path(reports_path: Path) -> str:
-    report_location = os.getenv("REPORT_LOCATION", ".")
    if report_location:
        reports_path = Path(os.getcwd()) / report_location

@@ -62,12 +62,13 @@ def calculate_info_test_path(reports_path: Path) -> str:
            max_prefix = max(all_prefix_numbers, default=0)
            run_name = f"{max_prefix + 1}_{test_arg}.json"
        else:
+            print(f"Found {related_file_count} files with '{test_arg}' in the name")
            # Take the number from before the _ and add the .{number}
-            prefix_str = Path(related_files[0]).stem.rsplit("_", 1)[0].split(".")[0]
+
+            prefix_str = Path(related_files[0]).stem.rsplit("_")[0].split(".")[0]
            prefix = math.floor(float(prefix_str))
            run_name = f"{prefix}.{related_file_count}_{test_arg}.json"

-    print("run_namerun_namerun_name", run_name)
    new_file_path = reports_path / run_name
    return str(new_file_path)

--- a/agent/BabyAGI
+++ b/agent/BabyAGI