From 9eb01d85a384ead1b4de096a4deb591288a8d5bb Mon Sep 17 00:00:00 2001 From: SwiftyOS Date: Wed, 13 Sep 2023 12:18:04 +0200 Subject: [PATCH] fixed multiple report folder bug --- benchmark/agbenchmark/__init__.py | 47 --------- benchmark/agbenchmark/__main__.py | 52 +++++++++- .../agbenchmark/reports/ReportManager.py | 6 +- benchmark/agbenchmark/reports/reports.py | 2 +- benchmark/agbenchmark/utils/data_types.py | 13 ++- .../utils/get_data_from_helicone.py | 2 +- benchmark/agbenchmark/utils/utils.py | 98 +------------------ 7 files changed, 67 insertions(+), 153 deletions(-) diff --git a/benchmark/agbenchmark/__init__.py b/benchmark/agbenchmark/__init__.py index 9cb4af9f..e69de29b 100644 --- a/benchmark/agbenchmark/__init__.py +++ b/benchmark/agbenchmark/__init__.py @@ -1,47 +0,0 @@ -import json -from datetime import datetime, timezone -from pathlib import Path - -from .reports.ReportManager import ReportManager -from .utils.data_types import AgentBenchmarkConfig - -BENCHMARK_START_TIME = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00") - - -def get_agent_benchmark_config() -> AgentBenchmarkConfig: - agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json") - try: - with open(agent_benchmark_config_path, "r") as f: - agent_benchmark_config = AgentBenchmarkConfig(**json.load(f)) - agent_benchmark_config.agent_benchmark_config_path = ( - agent_benchmark_config_path - ) - return agent_benchmark_config - except json.JSONDecodeError: - print("Error: benchmark_config.json is not a valid JSON file.") - raise - - -def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]: - agent_benchmark_config = get_agent_benchmark_config() - # tests that consistently pass are considered regression tests - REGRESSION_MANAGER = ReportManager( - agent_benchmark_config.get_regression_reports_path(), BENCHMARK_START_TIME - ) - - # print(f"Using {REPORTS_PATH} for reports") - # user facing reporting information - INFO_MANAGER = ReportManager( - str(agent_benchmark_config.get_reports_path() / "report.json"), - BENCHMARK_START_TIME, - ) - - # internal db step in replacement track pass/fail rate - INTERNAL_INFO_MANAGER = ReportManager( - agent_benchmark_config.get_success_rate_path(), BENCHMARK_START_TIME - ) - - return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER - - -(REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER) = get_report_managers() diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index 3b1f4374..db7350de 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -11,9 +11,59 @@ import pytest import toml from helicone.lock import HeliconeLockManager -from agbenchmark import BENCHMARK_START_TIME from agbenchmark.utils.data_types import AgentBenchmarkConfig +from .reports.ReportManager import ReportManager +from .utils.data_types import AgentBenchmarkConfig + +BENCHMARK_START_TIME_DT = datetime.now(timezone.utc) +BENCHMARK_START_TIME = BENCHMARK_START_TIME_DT.strftime("%Y-%m-%dT%H:%M:%S+00:00") + + +def get_agent_benchmark_config() -> AgentBenchmarkConfig: + agent_benchmark_config_path = str(Path.cwd() / "agbenchmark_config" / "config.json") + try: + with open(agent_benchmark_config_path, "r") as f: + agent_benchmark_config = AgentBenchmarkConfig(**json.load(f)) + agent_benchmark_config.agent_benchmark_config_path = ( + agent_benchmark_config_path + ) + return agent_benchmark_config + except json.JSONDecodeError: + print("Error: benchmark_config.json is not a valid JSON file.") + raise + + +def get_report_managers() -> tuple[ReportManager, ReportManager, ReportManager]: + agent_benchmark_config = get_agent_benchmark_config() + # tests that consistently pass are considered regression tests + REGRESSION_MANAGER = ReportManager( + agent_benchmark_config.get_regression_reports_path(), BENCHMARK_START_TIME_DT + ) + + # print(f"Using {REPORTS_PATH} for reports") + # user facing reporting information + INFO_MANAGER = ReportManager( + str( + agent_benchmark_config.get_reports_path( + benchmark_start_time=BENCHMARK_START_TIME_DT + ) + / "report.json" + ), + BENCHMARK_START_TIME_DT, + ) + + # internal db step in replacement track pass/fail rate + INTERNAL_INFO_MANAGER = ReportManager( + agent_benchmark_config.get_success_rate_path(), BENCHMARK_START_TIME_DT + ) + + return REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER + + +(REGRESSION_MANAGER, INFO_MANAGER, INTERNAL_INFO_MANAGER) = get_report_managers() + + if os.environ.get("HELICONE_API_KEY"): HeliconeLockManager.write_custom_property( "benchmark_start_time", BENCHMARK_START_TIME diff --git a/benchmark/agbenchmark/reports/ReportManager.py b/benchmark/agbenchmark/reports/ReportManager.py index 642cfcea..53a560c3 100644 --- a/benchmark/agbenchmark/reports/ReportManager.py +++ b/benchmark/agbenchmark/reports/ReportManager.py @@ -71,7 +71,9 @@ class ReportManager: "completion_time": datetime.now(timezone.utc).strftime( "%Y-%m-%dT%H:%M:%S+00:00" ), - "benchmark_start_time": self.benchmark_start_time, + "benchmark_start_time": self.benchmark_start_time.strftime( + "%Y-%m-%dT%H:%M:%S+00:00" + ), "metrics": { "run_time": str(round(time.time() - self.start_time, 2)) + " seconds", "highest_difficulty": get_highest_success_difficulty(self.tests), @@ -89,7 +91,7 @@ class ReportManager: save_single_radar_chart( agent_categories, - config.get_reports_path() / "radar_chart.png", + config.get_reports_path(self.benchmark_start_time) / "radar_chart.png", ) self.save() diff --git a/benchmark/agbenchmark/reports/reports.py b/benchmark/agbenchmark/reports/reports.py index d607d222..c0a4dbf9 100644 --- a/benchmark/agbenchmark/reports/reports.py +++ b/benchmark/agbenchmark/reports/reports.py @@ -4,7 +4,7 @@ import sys from pathlib import Path from typing import Any, Dict -from agbenchmark import ( +from agbenchmark.__main__ import ( INFO_MANAGER, INTERNAL_INFO_MANAGER, REGRESSION_MANAGER, diff --git a/benchmark/agbenchmark/utils/data_types.py b/benchmark/agbenchmark/utils/data_types.py index c01d3c1c..c0d0d4d4 100644 --- a/benchmark/agbenchmark/utils/data_types.py +++ b/benchmark/agbenchmark/utils/data_types.py @@ -1,3 +1,4 @@ +import datetime import json import sys from datetime import datetime, timezone @@ -37,7 +38,9 @@ DIFFICULTY_MAP = { STRING_DIFFICULTY_MAP = {e.value: DIFFICULTY_MAP[e] for e in DifficultyLevel} -def calculate_info_test_path(base_path: Path) -> Path: +def calculate_info_test_path( + base_path: Path, benchmark_start_time: datetime.datetime +) -> Path: """ Calculates the path to the directory where the test report will be saved. """ @@ -45,7 +48,7 @@ def calculate_info_test_path(base_path: Path) -> Path: base_path.mkdir(parents=True, exist_ok=True) # Get current UTC date-time stamp - date_stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S") + date_stamp = benchmark_start_time.strftime("%Y%m%dT%H%M%S") # Default run name run_name = "full_run" @@ -102,8 +105,10 @@ class AgentBenchmarkConfig(BaseModel): # ).resolve() return Path.cwd() / "agbenchmark_config" / "reports" - def get_reports_path(self) -> Path: - return calculate_info_test_path(self.get_reports_location()) + def get_reports_path(self, benchmark_start_time: datetime.datetime) -> Path: + return calculate_info_test_path( + self.get_reports_location(), benchmark_start_time + ) def get_regression_reports_path(self) -> Path: return self.get_reports_location() / "regression_tests.json" diff --git a/benchmark/agbenchmark/utils/get_data_from_helicone.py b/benchmark/agbenchmark/utils/get_data_from_helicone.py index f99a49c6..1e2f5fcb 100644 --- a/benchmark/agbenchmark/utils/get_data_from_helicone.py +++ b/benchmark/agbenchmark/utils/get_data_from_helicone.py @@ -4,7 +4,7 @@ from typing import Optional import requests -from agbenchmark import BENCHMARK_START_TIME +from agbenchmark.__main__ import BENCHMARK_START_TIME from agbenchmark.agent_interface import HELICONE_GRAPHQL_LOGS diff --git a/benchmark/agbenchmark/utils/utils.py b/benchmark/agbenchmark/utils/utils.py index bbcfa08c..7b3cbd7f 100644 --- a/benchmark/agbenchmark/utils/utils.py +++ b/benchmark/agbenchmark/utils/utils.py @@ -1,4 +1,5 @@ # radio charts, logs, helper functions for tests, anything else relevant. +import datetime import os import re from pathlib import Path @@ -109,103 +110,6 @@ def get_highest_success_difficulty( return "No successful tests" -def assign_paths(folder_path: Path) -> tuple[str, str, str, str, str]: - CONFIG_PATH = str(folder_path / "config.json") - - reports_location = folder_path / "reports" - - # if the user has a locally defined challenges path that they've added tests to - CHALLENGES_PATH = str(folder_path / "challenges") - if not os.path.exists(CHALLENGES_PATH): - CHALLENGES_PATH = str(Path(__file__).parent.parent / "challenges") - - if not os.path.exists(reports_location): - os.makedirs(reports_location) - - # from the ci - if REPORT_LOCATION: - reports_location = Path.cwd() / REPORT_LOCATION - - REPORTS_PATH = calculate_info_test_path(reports_location) - - REGRESSION_TESTS_PATH = str(reports_location / "regression_tests.json") - - SUCCESS_RATE_PATH = str(reports_location / "success_rate.json") - - return ( - CONFIG_PATH, - REGRESSION_TESTS_PATH, - REPORTS_PATH, - SUCCESS_RATE_PATH, - CHALLENGES_PATH, - ) - - -def calculate_dynamic_paths() -> tuple[Path, str, str, str, str, str]: - # the default home is where you're running from - HOME_DIRECTORY = Path(os.getcwd()) - - if os.path.join("Auto-GPT-Benchmarks", "backend") in str( - HOME_DIRECTORY - ): # accounting for backend calls - HOME_DIRECTORY = HOME_DIRECTORY.parent - - benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark" - - if AGENT_NAME and not os.path.join("Auto-GPT-Benchmarks", "agent") in str( - HOME_DIRECTORY - ): - # if the agent name is defined but the run is not from the agent repo, then home is the agent repo - # used for development of both a benchmark and an agent - HOME_DIRECTORY = HOME_DIRECTORY / "agent" / AGENT_NAME - benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark" - - ( - CONFIG_PATH, - REGRESSION_TESTS_PATH, - REPORTS_PATH, - SUCCESS_RATE_PATH, - CHALLENGES_PATH, - ) = assign_paths(benchmarks_folder_path) - else: - # otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo) - # used when its just a pip install - ( - CONFIG_PATH, - REGRESSION_TESTS_PATH, - REPORTS_PATH, - SUCCESS_RATE_PATH, - CHALLENGES_PATH, - ) = assign_paths(benchmarks_folder_path) - - if not benchmarks_folder_path.exists(): - benchmarks_folder_path.mkdir(exist_ok=True) - - if not os.path.exists(benchmarks_folder_path / "reports"): - os.makedirs(benchmarks_folder_path / "reports") - - if not os.path.exists(REGRESSION_TESTS_PATH): - with open(REGRESSION_TESTS_PATH, "w"): - pass - - if not os.path.exists(SUCCESS_RATE_PATH): - with open(SUCCESS_RATE_PATH, "w"): - pass - - if not os.path.exists(Path(REPORTS_PATH) / "report.json"): - with open(Path(REPORTS_PATH) / "report.json", "w"): - pass - - return ( - HOME_DIRECTORY, - CONFIG_PATH, - REGRESSION_TESTS_PATH, - REPORTS_PATH, - SUCCESS_RATE_PATH, - CHALLENGES_PATH, - ) - - # def get_git_commit_sha(directory: Path) -> Optional[str]: # try: # repo = git.Repo(directory)