mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 14:04:27 +01:00
benchmark-fix
This commit is contained in:
1
agbenchmark/config.json
Normal file
1
agbenchmark/config.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"workspace": "workspace"}
|
||||||
@@ -9,10 +9,11 @@ from typing import Any
|
|||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
|
||||||
from fastapi import FastAPI, Query
|
from fastapi import FastAPI, Query
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
from agbenchmark.utils.utils import find_absolute_benchmark_path
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
origins = ["http://localhost:3000"]
|
origins = ["http://localhost:3000"]
|
||||||
|
|||||||
@@ -10,13 +10,7 @@ from typing import Any, Dict, Generator
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from benchmark.reports.reports import (
|
from benchmark.utils.data_types import AgentBenchmarkConfig
|
||||||
finalize_reports,
|
|
||||||
generate_combined_suite_report,
|
|
||||||
generate_single_call_report,
|
|
||||||
session_finish,
|
|
||||||
)
|
|
||||||
from benchmark.utils.data_types import AgentBenchmarkConfig, SuiteConfig
|
|
||||||
|
|
||||||
GLOBAL_TIMEOUT = (
|
GLOBAL_TIMEOUT = (
|
||||||
1500 # The tests will stop after 25 minutes so we can send the reports.
|
1500 # The tests will stop after 25 minutes so we can send the reports.
|
||||||
@@ -297,7 +291,7 @@ def run_agent(request: Any) -> Any:
|
|||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
universal_newlines=True,
|
universal_newlines=True,
|
||||||
cwd=agent_benchmark_config_path.entry_path.parent.parent,
|
# cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex
|
||||||
)
|
)
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
yield
|
yield
|
||||||
|
|||||||
@@ -231,7 +231,8 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
agent_config_path = None
|
agent_benchmark_config_path = Path.cwd() / "agbenchmark" / "config.json"
|
||||||
|
|
||||||
if "--agent-config" in sys.argv:
|
if "--agent-config" in sys.argv:
|
||||||
agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1]
|
agent_benchmark_config_path = sys.argv[sys.argv.index("--agent-config") + 1]
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -12,8 +12,8 @@ from helicone.lock import HeliconeLockManager
|
|||||||
|
|
||||||
sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
|
sys.path.append("/Users/swifty/dev/Auto-GPT/benchmark")
|
||||||
|
|
||||||
from agbenchmark.reports.ReportManager import ReportManager
|
from benchmark.reports.ReportManager import ReportManager
|
||||||
from agbenchmark.utils.utils import ( # get_git_commit_sha,
|
from benchmark.utils.utils import ( # get_git_commit_sha,
|
||||||
AGENT_NAME,
|
AGENT_NAME,
|
||||||
calculate_dynamic_paths,
|
calculate_dynamic_paths,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -78,7 +78,6 @@ class AgentBenchmarkConfig(BaseModel):
|
|||||||
"""
|
"""
|
||||||
This class represents the configuration for the Agent Benchmark.
|
This class represents the configuration for the Agent Benchmark.
|
||||||
It includes the following attributes:
|
It includes the following attributes:
|
||||||
- entry_path: The path to the file that, when run, starts the agent configured for benchmarking, realtive location from the config_file.
|
|
||||||
- workspace: The path to the workspace where the benchmark will be run.
|
- workspace: The path to the workspace where the benchmark will be run.
|
||||||
- reports_folder: The path to the folder where the benchmark reports will be stored.
|
- reports_folder: The path to the folder where the benchmark reports will be stored.
|
||||||
- api_mode: A boolean indicating whether the benchmark is run in API mode.
|
- api_mode: A boolean indicating whether the benchmark is run in API mode.
|
||||||
@@ -86,7 +85,6 @@ class AgentBenchmarkConfig(BaseModel):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
agent_benchmark_config_path: Path | None = None
|
agent_benchmark_config_path: Path | None = None
|
||||||
entry_path: Path
|
|
||||||
workspace: Path
|
workspace: Path
|
||||||
reports_folder: Path | None = None
|
reports_folder: Path | None = None
|
||||||
api_mode: bool = False
|
api_mode: bool = False
|
||||||
@@ -95,10 +93,7 @@ class AgentBenchmarkConfig(BaseModel):
|
|||||||
def get_reports_location(self) -> Path:
|
def get_reports_location(self) -> Path:
|
||||||
if not self.reports_folder:
|
if not self.reports_folder:
|
||||||
self.reports_folder = (
|
self.reports_folder = (
|
||||||
self.agent_benchmark_config_path
|
self.agent_benchmark_config_path / "reports"
|
||||||
/ self.entry_path.parent
|
|
||||||
/ ".."
|
|
||||||
/ "reports"
|
|
||||||
).resolve()
|
).resolve()
|
||||||
return self.reports_folder
|
return self.reports_folder
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ from typing import Any, List, Optional
|
|||||||
import git
|
import git
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from benchmark.utils.data_types import calculate_info_test_path
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
|
from benchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
|
||||||
|
|
||||||
|
|||||||
13
benchmark/poetry.lock
generated
13
benchmark/poetry.lock
generated
@@ -2367,6 +2367,17 @@ anyio = ">=3.4.0,<5"
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
|
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml"
|
||||||
|
version = "0.10.2"
|
||||||
|
description = "Python Library for Tom's Obvious, Minimal Language"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
|
||||||
|
files = [
|
||||||
|
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
|
||||||
|
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tomli"
|
name = "tomli"
|
||||||
version = "2.0.1"
|
version = "2.0.1"
|
||||||
@@ -2669,4 +2680,4 @@ multidict = ">=4.0"
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e"
|
content-hash = "cbfb78cc028636025da583204d77e7903b41b08ec620eb755cb56211a837e0c1"
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ pytest-asyncio = "^0.21.1"
|
|||||||
uvicorn = "^0.23.2"
|
uvicorn = "^0.23.2"
|
||||||
fastapi = "^0.103.1"
|
fastapi = "^0.103.1"
|
||||||
python-multipart = "^0.0.6"
|
python-multipart = "^0.0.6"
|
||||||
|
toml = "^0.10.2"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
|||||||
7
poetry.lock
generated
Normal file
7
poetry.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
||||||
|
package = []
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
lock-version = "2.0"
|
||||||
|
python-versions = "^3.11"
|
||||||
|
content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"
|
||||||
Reference in New Issue
Block a user