From c7550ba8454561b620ef6e5579a0b48c808934b0 Mon Sep 17 00:00:00 2001 From: Merwane Hamadi Date: Mon, 11 Sep 2023 21:37:23 -0700 Subject: [PATCH] benchmark-fix --- {benchmark/agbenchmark => agbenchmark}/__init__.py | 0 agbenchmark/config.json | 1 + benchmark/backend/main.py | 3 ++- benchmark/benchmark/conftest.py | 10 ++-------- benchmark/benchmark/generate_test.py | 3 ++- benchmark/benchmark/start_benchmark.py | 4 ++-- benchmark/benchmark/utils/data_types.py | 7 +------ benchmark/benchmark/utils/utils.py | 2 ++ benchmark/poetry.lock | 13 ++++++++++++- benchmark/pyproject.toml | 1 + poetry.lock | 7 +++++++ 11 files changed, 32 insertions(+), 19 deletions(-) rename {benchmark/agbenchmark => agbenchmark}/__init__.py (100%) create mode 100644 agbenchmark/config.json create mode 100644 poetry.lock diff --git a/benchmark/agbenchmark/__init__.py b/agbenchmark/__init__.py similarity index 100% rename from benchmark/agbenchmark/__init__.py rename to agbenchmark/__init__.py diff --git a/agbenchmark/config.json b/agbenchmark/config.json new file mode 100644 index 00000000..fee0da39 --- /dev/null +++ b/agbenchmark/config.json @@ -0,0 +1 @@ +{"workspace": "workspace"} \ No newline at end of file diff --git a/benchmark/backend/main.py b/benchmark/backend/main.py index c0c2bf2d..03880f0e 100644 --- a/benchmark/backend/main.py +++ b/benchmark/backend/main.py @@ -9,10 +9,11 @@ from typing import Any sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from agbenchmark.utils.utils import find_absolute_benchmark_path from fastapi import FastAPI, Query from fastapi.middleware.cors import CORSMiddleware +from agbenchmark.utils.utils import find_absolute_benchmark_path + app = FastAPI() origins = ["http://localhost:3000"] diff --git a/benchmark/benchmark/conftest.py b/benchmark/benchmark/conftest.py index a93867e4..74e76b0c 100644 --- a/benchmark/benchmark/conftest.py +++ b/benchmark/benchmark/conftest.py @@ -10,13 +10,7 @@ from typing import Any, Dict, Generator import pytest -from benchmark.reports.reports import ( - finalize_reports, - generate_combined_suite_report, - generate_single_call_report, - session_finish, -) -from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig +from benchmark.utils.data_types import AgentBenchmarkConfig GLOBAL_TIMEOUT = ( 1500 # The tests will stop after 25 minutes so we can send the reports. @@ -297,7 +291,7 @@ def run_agent(request: Any) -> Any: stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, - cwd=agent_benchmark_config_path.entry_path.parent.parent, + # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex ) time.sleep(3) yield diff --git a/benchmark/benchmark/generate_test.py b/benchmark/benchmark/generate_test.py index fd81058b..15e0d3ed 100644 --- a/benchmark/benchmark/generate_test.py +++ b/benchmark/benchmark/generate_test.py @@ -231,7 +231,8 @@ def generate_tests() -> None: # sourcery skip: invert-any-all ) ) - agent_config_path = None + agent_benchmark_config_path = Path.cwd() / "agbenchmark" / "config.json" + if "--agent-config" in sys.argv: agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1] else: diff --git a/benchmark/benchmark/start_benchmark.py b/benchmark/benchmark/start_benchmark.py index b7cda4bd..b47488f5 100644 --- a/benchmark/benchmark/start_benchmark.py +++ b/benchmark/benchmark/start_benchmark.py @@ -12,8 +12,8 @@ from helicone.lock import HeliconeLockManager sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark") -from agbenchmark.reports.ReportManager import ReportManager -from agbenchmark.utils.utils import ( # get_git_commit_sha, +from benchmark.reports.ReportManager import ReportManager +from benchmark.utils.utils import ( # get_git_commit_sha, AGENT_NAME, calculate_dynamic_paths, ) diff --git a/benchmark/benchmark/utils/data_types.py b/benchmark/benchmark/utils/data_types.py index 57a327cf..6950ba31 100644 --- a/benchmark/benchmark/utils/data_types.py +++ b/benchmark/benchmark/utils/data_types.py @@ -78,7 +78,6 @@ class AgentBenchmarkConfig(BaseModel): """ This class represents the configuration for the Agent Benchmark. It includes the following attributes: - - entry_path: The path to the file that, when run, starts the agent configured for benchmarking, realtive location from the config_file. - workspace: The path to the workspace where the benchmark will be run. - reports_folder: The path to the folder where the benchmark reports will be stored. - api_mode: A boolean indicating whether the benchmark is run in API mode. @@ -86,7 +85,6 @@ class AgentBenchmarkConfig(BaseModel): """ agent_benchmark_config_path: Path | None = None - entry_path: Path workspace: Path reports_folder: Path | None = None api_mode: bool = False @@ -95,10 +93,7 @@ class AgentBenchmarkConfig(BaseModel): def get_reports_location(self) -> Path: if not self.reports_folder: self.reports_folder = ( - self.agent_benchmark_config_path - / self.entry_path.parent - / ".." - / "reports" + self.agent_benchmark_config_path / "reports" ).resolve() return self.reports_folder diff --git a/benchmark/benchmark/utils/utils.py b/benchmark/benchmark/utils/utils.py index 8f9dc205..e0206b07 100644 --- a/benchmark/benchmark/utils/utils.py +++ b/benchmark/benchmark/utils/utils.py @@ -9,6 +9,8 @@ from typing import Any, List, Optional import git from dotenv import load_dotenv +from benchmark.utils.data_types import calculate_info_test_path + load_dotenv() from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock index 7d943095..9ab90929 100644 --- a/benchmark/poetry.lock +++ b/benchmark/poetry.lock @@ -2367,6 +2367,17 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -2669,4 +2680,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e" +content-hash = "cbfb78cc028636025da583204d77e7903b41b08ec620eb755cb56211a837e0c1" diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml index caeee5fc..a6d37fb7 100644 --- a/benchmark/pyproject.toml +++ b/benchmark/pyproject.toml @@ -31,6 +31,7 @@ pytest-asyncio = "^0.21.1" uvicorn = "^0.23.2" fastapi = "^0.103.1" python-multipart = "^0.0.6" +toml = "^0.10.2" [tool.poetry.group.dev.dependencies] diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 00000000..8802b86e --- /dev/null +++ b/poetry.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +package = [] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"