benchmark-fix

2025-12-17 14:04:27 +01:00 · 2023-09-11 21:37:23 -07:00
parent cc24dd50e5
commit c7550ba845
11 changed files with 32 additions and 19 deletions
--- a/benchmark/agbenchmark/init.py
+++ b/benchmark/agbenchmark/init.py
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -0,0 +1 @@
 {"workspace": "workspace"}
--- a/benchmark/backend/main.py
+++ b/benchmark/backend/main.py
@@ -9,10 +9,11 @@ from typing import Any
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from agbenchmark.utils.utils import find_absolute_benchmark_path
 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 from agbenchmark.utils.utils import find_absolute_benchmark_path
 app = FastAPI()
 origins = ["http://localhost:3000"]
--- a/benchmark/benchmark/conftest.py
+++ b/benchmark/benchmark/conftest.py
@@ -10,13 +10,7 @@ from typing import Any, Dict, Generator
 import pytest
-from benchmark.reports.reports import (
+from benchmark.utils.data_types import AgentBenchmarkConfig
    finalize_reports,
    generate_combined_suite_report,
    generate_single_call_report,
    session_finish,
 )
 from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig
 GLOBAL_TIMEOUT = (
    1500  # The tests will stop after 25 minutes so we can send the reports.
@@ -297,7 +291,7 @@ def run_agent(request: Any) -> Any:
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
-            cwd=agent_benchmark_config_path.entry_path.parent.parent,
+            # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex
        )
        time.sleep(3)
        yield
--- a/benchmark/benchmark/generate_test.py
+++ b/benchmark/benchmark/generate_test.py
@@ -231,7 +231,8 @@ def generate_tests() -> None:  # sourcery skip: invert-any-all
        )
    )
-    agent_config_path = None
+    agent_benchmark_config_path = Path.cwd() / "agbenchmark" / "config.json"
    if "--agent-config" in sys.argv:
        agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1]
    else:
--- a/benchmark/benchmark/start_benchmark.py
+++ b/benchmark/benchmark/start_benchmark.py
@@ -12,8 +12,8 @@ from helicone.lock import HeliconeLockManager
 sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
-from agbenchmark.reports.ReportManager import ReportManager
+from benchmark.reports.ReportManager import ReportManager
-from agbenchmark.utils.utils import (  # get_git_commit_sha,
+from benchmark.utils.utils import (  # get_git_commit_sha,
    AGENT_NAME,
    calculate_dynamic_paths,
 )
--- a/benchmark/benchmark/utils/data_types.py
+++ b/benchmark/benchmark/utils/data_types.py
@@ -78,7 +78,6 @@ class AgentBenchmarkConfig(BaseModel):
    """
    This class represents the configuration for the Agent Benchmark.
    It includes the following attributes:
    - entry_path: The path to the file that, when run, starts the agent configured for benchmarking, realtive location from the config_file.
    - workspace: The path to the workspace where the benchmark will be run.
    - reports_folder: The path to the folder where the benchmark reports will be stored.
    - api_mode: A boolean indicating whether the benchmark is run in API mode.
@@ -86,7 +85,6 @@ class AgentBenchmarkConfig(BaseModel):
    """
    agent_benchmark_config_path: Path | None = None
    entry_path: Path
    workspace: Path
    reports_folder: Path | None = None
    api_mode: bool = False
@@ -95,10 +93,7 @@ class AgentBenchmarkConfig(BaseModel):
    def get_reports_location(self) -> Path:
        if not self.reports_folder:
            self.reports_folder = (
-                self.agent_benchmark_config_path
+                self.agent_benchmark_config_path / "reports"
                / self.entry_path.parent
                / ".."
                / "reports"
            ).resolve()
        return self.reports_folder
--- a/benchmark/benchmark/utils/utils.py
+++ b/benchmark/benchmark/utils/utils.py
@@ -9,6 +9,8 @@ from typing import Any, List, Optional
 import git
 from dotenv import load_dotenv
 from benchmark.utils.data_types import calculate_info_test_path
 load_dotenv()
 from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -2367,6 +2367,17 @@ anyio = ">=3.4.0,<5"
 [package.extras]
 full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
 [[package]]
 name = "toml"
 version = "0.10.2"
 description = "Python Library for Tom's Obvious, Minimal Language"
 optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
 ]
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -2669,4 +2680,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e"
+content-hash = "cbfb78cc028636025da583204d77e7903b41b08ec620eb755cb56211a837e0c1"
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -31,6 +31,7 @@ pytest-asyncio = "^0.21.1"
 uvicorn = "^0.23.2"
 fastapi = "^0.103.1"
 python-multipart = "^0.0.6"
 toml = "^0.10.2"
 [tool.poetry.group.dev.dependencies]
--- a/poetry.lock
+++ b/poetry.lock
@@ -0,0 +1,7 @@
 # This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 package = []
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
 content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"