benchmark-fix

This commit is contained in:
Merwane Hamadi
2023-09-11 21:37:23 -07:00
parent cc24dd50e5
commit c7550ba845
11 changed files with 32 additions and 19 deletions

1
agbenchmark/config.json Normal file
View File

@@ -0,0 +1 @@
{"workspace": "workspace"}

View File

@@ -9,10 +9,11 @@ from typing import Any
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from agbenchmark.utils.utils import find_absolute_benchmark_path
from fastapi import FastAPI, Query from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from agbenchmark.utils.utils import find_absolute_benchmark_path
app = FastAPI() app = FastAPI()
origins = ["http://localhost:3000"] origins = ["http://localhost:3000"]

View File

@@ -10,13 +10,7 @@ from typing import Any, Dict, Generator
import pytest import pytest
from benchmark.reports.reports import ( from benchmark.utils.data_types import AgentBenchmarkConfig
finalize_reports,
generate_combined_suite_report,
generate_single_call_report,
session_finish,
)
from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig
GLOBAL_TIMEOUT = ( GLOBAL_TIMEOUT = (
1500 # The tests will stop after 25 minutes so we can send the reports. 1500 # The tests will stop after 25 minutes so we can send the reports.
@@ -297,7 +291,7 @@ def run_agent(request: Any) -> Any:
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, stderr=subprocess.STDOUT,
universal_newlines=True, universal_newlines=True,
cwd=agent_benchmark_config_path.entry_path.parent.parent, # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex
) )
time.sleep(3) time.sleep(3)
yield yield

View File

@@ -231,7 +231,8 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
) )
) )
agent_config_path = None agent_benchmark_config_path = Path.cwd() / "agbenchmark" / "config.json"
if "--agent-config" in sys.argv: if "--agent-config" in sys.argv:
agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1] agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1]
else: else:

View File

@@ -12,8 +12,8 @@ from helicone.lock import HeliconeLockManager
sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark") sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
from agbenchmark.reports.ReportManager import ReportManager from benchmark.reports.ReportManager import ReportManager
from agbenchmark.utils.utils import ( # get_git_commit_sha, from benchmark.utils.utils import ( # get_git_commit_sha,
AGENT_NAME, AGENT_NAME,
calculate_dynamic_paths, calculate_dynamic_paths,
) )

View File

@@ -78,7 +78,6 @@ class AgentBenchmarkConfig(BaseModel):
""" """
This class represents the configuration for the Agent Benchmark. This class represents the configuration for the Agent Benchmark.
It includes the following attributes: It includes the following attributes:
- entry_path: The path to the file that, when run, starts the agent configured for benchmarking, realtive location from the config_file.
- workspace: The path to the workspace where the benchmark will be run. - workspace: The path to the workspace where the benchmark will be run.
- reports_folder: The path to the folder where the benchmark reports will be stored. - reports_folder: The path to the folder where the benchmark reports will be stored.
- api_mode: A boolean indicating whether the benchmark is run in API mode. - api_mode: A boolean indicating whether the benchmark is run in API mode.
@@ -86,7 +85,6 @@ class AgentBenchmarkConfig(BaseModel):
""" """
agent_benchmark_config_path: Path | None = None agent_benchmark_config_path: Path | None = None
entry_path: Path
workspace: Path workspace: Path
reports_folder: Path | None = None reports_folder: Path | None = None
api_mode: bool = False api_mode: bool = False
@@ -95,10 +93,7 @@ class AgentBenchmarkConfig(BaseModel):
def get_reports_location(self) -> Path: def get_reports_location(self) -> Path:
if not self.reports_folder: if not self.reports_folder:
self.reports_folder = ( self.reports_folder = (
self.agent_benchmark_config_path self.agent_benchmark_config_path / "reports"
/ self.entry_path.parent
/ ".."
/ "reports"
).resolve() ).resolve()
return self.reports_folder return self.reports_folder

View File

@@ -9,6 +9,8 @@ from typing import Any, List, Optional
import git import git
from dotenv import load_dotenv from dotenv import load_dotenv
from benchmark.utils.data_types import calculate_info_test_path
load_dotenv() load_dotenv()
from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel

13
benchmark/poetry.lock generated
View File

@@ -2367,6 +2367,17 @@ anyio = ">=3.4.0,<5"
[package.extras] [package.extras]
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
[[package]] [[package]]
name = "tomli" name = "tomli"
version = "2.0.1" version = "2.0.1"
@@ -2669,4 +2680,4 @@ multidict = ">=4.0"
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10" python-versions = "^3.10"
content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e" content-hash = "cbfb78cc028636025da583204d77e7903b41b08ec620eb755cb56211a837e0c1"

View File

@@ -31,6 +31,7 @@ pytest-asyncio = "^0.21.1"
uvicorn = "^0.23.2" uvicorn = "^0.23.2"
fastapi = "^0.103.1" fastapi = "^0.103.1"
python-multipart = "^0.0.6" python-multipart = "^0.0.6"
toml = "^0.10.2"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]

7
poetry.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
package = []
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"