From c7550ba8454561b620ef6e5579a0b48c808934b0 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Mon, 11 Sep 2023 21:37:23 -0700
Subject: [PATCH] benchmark-fix

---
 {benchmark/agbenchmark => agbenchmark}/__init__.py |  0
 agbenchmark/config.json                            |  1 +
 benchmark/backend/main.py                          |  3 ++-
 benchmark/benchmark/conftest.py                    | 10 ++--------
 benchmark/benchmark/generate_test.py               |  3 ++-
 benchmark/benchmark/start_benchmark.py             |  4 ++--
 benchmark/benchmark/utils/data_types.py            |  7 +------
 benchmark/benchmark/utils/utils.py                 |  2 ++
 benchmark/poetry.lock                              | 13 ++++++++++++-
 benchmark/pyproject.toml                           |  1 +
 poetry.lock                                        |  7 +++++++
 11 files changed, 32 insertions(+), 19 deletions(-)
 rename {benchmark/agbenchmark => agbenchmark}/__init__.py (100%)
 create mode 100644 agbenchmark/config.json
 create mode 100644 poetry.lock

diff --git a/benchmark/agbenchmark/__init__.py b/agbenchmark/__init__.py
similarity index 100%
rename from benchmark/agbenchmark/__init__.py
rename to agbenchmark/__init__.py
diff --git a/agbenchmark/config.json b/agbenchmark/config.json
new file mode 100644
index 00000000..fee0da39
--- /dev/null
+++ b/agbenchmark/config.json
@@ -0,0 +1 @@
+{"workspace": "workspace"}
\ No newline at end of file
diff --git a/benchmark/backend/main.py b/benchmark/backend/main.py
index c0c2bf2d..03880f0e 100644
--- a/benchmark/backend/main.py
+++ b/benchmark/backend/main.py
@@ -9,10 +9,11 @@ from typing import Any
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
-from agbenchmark.utils.utils import find_absolute_benchmark_path
 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 
+from agbenchmark.utils.utils import find_absolute_benchmark_path
+
 app = FastAPI()
 
 origins = ["http://localhost:3000"]
diff --git a/benchmark/benchmark/conftest.py b/benchmark/benchmark/conftest.py
index a93867e4..74e76b0c 100644
--- a/benchmark/benchmark/conftest.py
+++ b/benchmark/benchmark/conftest.py
@@ -10,13 +10,7 @@ from typing import Any, Dict, Generator
 
 import pytest
 
-from benchmark.reports.reports import (
-    finalize_reports,
-    generate_combined_suite_report,
-    generate_single_call_report,
-    session_finish,
-)
-from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig
+from benchmark.utils.data_types import AgentBenchmarkConfig
 
 GLOBAL_TIMEOUT = (
     1500  # The tests will stop after 25 minutes so we can send the reports.
@@ -297,7 +291,7 @@ def run_agent(request: Any) -> Any:
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,
             universal_newlines=True,
-            cwd=agent_benchmark_config_path.entry_path.parent.parent,
+            # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex
         )
         time.sleep(3)
         yield
diff --git a/benchmark/benchmark/generate_test.py b/benchmark/benchmark/generate_test.py
index fd81058b..15e0d3ed 100644
--- a/benchmark/benchmark/generate_test.py
+++ b/benchmark/benchmark/generate_test.py
@@ -231,7 +231,8 @@ def generate_tests() -> None:  # sourcery skip: invert-any-all
         )
     )
 
-    agent_config_path = None
+    agent_benchmark_config_path = Path.cwd() / "agbenchmark" / "config.json"
+
     if "--agent-config" in sys.argv:
         agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1]
     else:
diff --git a/benchmark/benchmark/start_benchmark.py b/benchmark/benchmark/start_benchmark.py
index b7cda4bd..b47488f5 100644
--- a/benchmark/benchmark/start_benchmark.py
+++ b/benchmark/benchmark/start_benchmark.py
@@ -12,8 +12,8 @@ from helicone.lock import HeliconeLockManager
 
 sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
 
-from agbenchmark.reports.ReportManager import ReportManager
-from agbenchmark.utils.utils import (  # get_git_commit_sha,
+from benchmark.reports.ReportManager import ReportManager
+from benchmark.utils.utils import (  # get_git_commit_sha,
     AGENT_NAME,
     calculate_dynamic_paths,
 )
diff --git a/benchmark/benchmark/utils/data_types.py b/benchmark/benchmark/utils/data_types.py
index 57a327cf..6950ba31 100644
--- a/benchmark/benchmark/utils/data_types.py
+++ b/benchmark/benchmark/utils/data_types.py
@@ -78,7 +78,6 @@ class AgentBenchmarkConfig(BaseModel):
     """
     This class represents the configuration for the Agent Benchmark.
     It includes the following attributes:
-    - entry_path: The path to the file that, when run, starts the agent configured for benchmarking, realtive location from the config_file.
     - workspace: The path to the workspace where the benchmark will be run.
     - reports_folder: The path to the folder where the benchmark reports will be stored.
     - api_mode: A boolean indicating whether the benchmark is run in API mode.
@@ -86,7 +85,6 @@ class AgentBenchmarkConfig(BaseModel):
     """
 
     agent_benchmark_config_path: Path | None = None
-    entry_path: Path
     workspace: Path
     reports_folder: Path | None = None
     api_mode: bool = False
@@ -95,10 +93,7 @@ class AgentBenchmarkConfig(BaseModel):
     def get_reports_location(self) -> Path:
         if not self.reports_folder:
             self.reports_folder = (
-                self.agent_benchmark_config_path
-                / self.entry_path.parent
-                / ".."
-                / "reports"
+                self.agent_benchmark_config_path / "reports"
             ).resolve()
         return self.reports_folder
 
diff --git a/benchmark/benchmark/utils/utils.py b/benchmark/benchmark/utils/utils.py
index 8f9dc205..e0206b07 100644
--- a/benchmark/benchmark/utils/utils.py
+++ b/benchmark/benchmark/utils/utils.py
@@ -9,6 +9,8 @@ from typing import Any, List, Optional
 import git
 from dotenv import load_dotenv
 
+from benchmark.utils.data_types import calculate_info_test_path
+
 load_dotenv()
 from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
 
diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock
index 7d943095..9ab90929 100644
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -2367,6 +2367,17 @@ anyio = ">=3.4.0,<5"
 [package.extras]
 full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
 
+[[package]]
+name = "toml"
+version = "0.10.2"
+description = "Python Library for Tom's Obvious, Minimal Language"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+files = [
+    {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
+    {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
+]
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -2669,4 +2680,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e"
+content-hash = "cbfb78cc028636025da583204d77e7903b41b08ec620eb755cb56211a837e0c1"
diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml
index caeee5fc..a6d37fb7 100644
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -31,6 +31,7 @@ pytest-asyncio = "^0.21.1"
 uvicorn = "^0.23.2"
 fastapi = "^0.103.1"
 python-multipart = "^0.0.6"
+toml = "^0.10.2"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 00000000..8802b86e
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+package = []
+
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.11"
+content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"