Benchmark changes

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
Merwane Hamadi
2023-09-12 12:10:03 -07:00
parent 978a980d72
commit 1b14d304d4
281 changed files with 428 additions and 718 deletions

View File

@@ -0,0 +1,252 @@
# radio charts, logs, helper functions for tests, anything else relevant.
import os
import re
from pathlib import Path
from typing import Any, List, Optional
from dotenv import load_dotenv
from agbenchmark.utils.data_types import calculate_info_test_path
load_dotenv()
from agbenchmark.utils.data_types import DIFFICULTY_MAP, DifficultyLevel
AGENT_NAME = os.getenv("AGENT_NAME")
REPORT_LOCATION = os.getenv("REPORT_LOCATION", None)
def replace_backslash(value: Any) -> Any:
if isinstance(value, str):
return re.sub(
r"\\+", "/", value
) # replace one or more backslashes with a forward slash
elif isinstance(value, list):
return [replace_backslash(i) for i in value]
elif isinstance(value, dict):
return {k: replace_backslash(v) for k, v in value.items()}
else:
return value
def calculate_success_percentage(results: list[bool]) -> float:
# Take the last 10 results or all if less than 10
last_results = results[-10:] if len(results) > 10 else results
success_count = last_results.count(True)
total_count = len(last_results)
if total_count == 0:
return 0
success_percentage = (success_count / total_count) * 100 # as a percentage
return round(success_percentage, 2)
def get_test_path(json_file: str | Path) -> str:
if isinstance(json_file, str):
json_file = Path(json_file)
# Find the index of "agbenchmark" in the path parts
try:
agbenchmark_index = json_file.parts.index("benchmark")
except ValueError:
raise ValueError("Invalid challenge location.")
# Create the path from "agbenchmark" onwards
challenge_location = Path(*json_file.parts[agbenchmark_index:])
formatted_location = replace_backslash(str(challenge_location))
if isinstance(formatted_location, str):
return formatted_location
else:
return str(challenge_location)
def get_highest_success_difficulty(
data: dict, just_string: Optional[bool] = None
) -> str:
highest_difficulty = None
highest_difficulty_level = 0
for test_name, test_data in data.items():
try:
if test_data.get("tests", None):
highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
try:
highest_difficulty = DifficultyLevel[highest_difficulty_str]
highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
except KeyError:
print(
f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
)
continue
else:
if test_data["metrics"]["success"]:
difficulty_str = test_data["metrics"]["difficulty"]
try:
difficulty_enum = DifficultyLevel[difficulty_str.lower()]
difficulty_level = DIFFICULTY_MAP[difficulty_enum]
if difficulty_level > highest_difficulty_level:
highest_difficulty = difficulty_enum
highest_difficulty_level = difficulty_level
except KeyError:
print(
f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
)
continue
except Exception:
print(f"Make sure you selected the right test, no reports were generated.")
break
if highest_difficulty is not None:
highest_difficulty_str = highest_difficulty.name # convert enum to string
else:
highest_difficulty_str = ""
if highest_difficulty_level and not just_string:
return f"{highest_difficulty_str}: {highest_difficulty_level}"
elif highest_difficulty_str:
return highest_difficulty_str
return "No successful tests"
def assign_paths(folder_path: Path) -> tuple[str, str, str, str, str]:
CONFIG_PATH = str(folder_path / "config.json")
reports_location = folder_path / "reports"
# if the user has a locally defined challenges path that they've added tests to
CHALLENGES_PATH = str(folder_path / "challenges")
if not os.path.exists(CHALLENGES_PATH):
CHALLENGES_PATH = str(Path(__file__).parent.parent / "challenges")
if not os.path.exists(reports_location):
os.makedirs(reports_location)
# from the ci
if REPORT_LOCATION:
reports_location = Path.cwd() / REPORT_LOCATION
REPORTS_PATH = calculate_info_test_path(reports_location)
REGRESSION_TESTS_PATH = str(reports_location / "regression_tests.json")
SUCCESS_RATE_PATH = str(reports_location / "success_rate.json")
return (
CONFIG_PATH,
REGRESSION_TESTS_PATH,
REPORTS_PATH,
SUCCESS_RATE_PATH,
CHALLENGES_PATH,
)
def calculate_dynamic_paths() -> tuple[Path, str, str, str, str, str]:
# the default home is where you're running from
HOME_DIRECTORY = Path(os.getcwd())
if os.path.join("Auto-GPT-Benchmarks", "backend") in str(
HOME_DIRECTORY
): # accounting for backend calls
HOME_DIRECTORY = HOME_DIRECTORY.parent
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
if AGENT_NAME and not os.path.join("Auto-GPT-Benchmarks", "agent") in str(
HOME_DIRECTORY
):
# if the agent name is defined but the run is not from the agent repo, then home is the agent repo
# used for development of both a benchmark and an agent
HOME_DIRECTORY = HOME_DIRECTORY / "agent" / AGENT_NAME
benchmarks_folder_path = HOME_DIRECTORY / "agbenchmark"
(
CONFIG_PATH,
REGRESSION_TESTS_PATH,
REPORTS_PATH,
SUCCESS_RATE_PATH,
CHALLENGES_PATH,
) = assign_paths(benchmarks_folder_path)
else:
# otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
# used when its just a pip install
(
CONFIG_PATH,
REGRESSION_TESTS_PATH,
REPORTS_PATH,
SUCCESS_RATE_PATH,
CHALLENGES_PATH,
) = assign_paths(benchmarks_folder_path)
if not benchmarks_folder_path.exists():
benchmarks_folder_path.mkdir(exist_ok=True)
if not os.path.exists(benchmarks_folder_path / "reports"):
os.makedirs(benchmarks_folder_path / "reports")
if not os.path.exists(REGRESSION_TESTS_PATH):
with open(REGRESSION_TESTS_PATH, "w"):
pass
if not os.path.exists(SUCCESS_RATE_PATH):
with open(SUCCESS_RATE_PATH, "w"):
pass
if not os.path.exists(Path(REPORTS_PATH) / "report.json"):
with open(Path(REPORTS_PATH) / "report.json", "w"):
pass
return (
HOME_DIRECTORY,
CONFIG_PATH,
REGRESSION_TESTS_PATH,
REPORTS_PATH,
SUCCESS_RATE_PATH,
CHALLENGES_PATH,
)
# def get_git_commit_sha(directory: Path) -> Optional[str]:
# try:
# repo = git.Repo(directory)
# remote_url = repo.remotes.origin.url
# if remote_url.endswith(".git"):
# remote_url = remote_url[:-4]
# git_commit_sha = f"{remote_url}/tree/{repo.head.commit.hexsha}"
# # print(f"GIT_COMMIT_SHA: {git_commit_sha}")
# return git_commit_sha
# except Exception:
# # print(f"{directory} is not a git repository!")
# return None
def agent_eligibible_for_optional_categories(
optional_challenge_categories: List, agent_categories: List
) -> bool:
for element in optional_challenge_categories:
if element not in agent_categories:
return False
return True
def find_absolute_benchmark_path() -> Path:
# Find the absolute path to the current working directory
current_path = Path.cwd()
# Find the position of "Auto-GPT-Benchmarks" in the path
benchmark_path_index = (
current_path.parts.index("Auto-GPT-Benchmarks")
if "Auto-GPT-Benchmarks" in current_path.parts
else None
)
if benchmark_path_index is not None:
# Construct the absolute path starting from "Auto-GPT-Benchmarks"
benchmark_path = Path(*current_path.parts[: benchmark_path_index + 1])
return benchmark_path
else:
raise ValueError(
"The directory 'Auto-GPT-Benchmarks' is not found in the current path."
)