mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-25 09:54:23 +01:00
248 lines
8.5 KiB
Python
248 lines
8.5 KiB
Python
import json
|
|
import os
|
|
import shutil
|
|
import sys
|
|
import time
|
|
from pathlib import Path # noqa
|
|
from typing import Any, Dict, Generator
|
|
|
|
import pytest
|
|
|
|
from agbenchmark.ReportManager import ReportManager
|
|
from agbenchmark.start_benchmark import (
|
|
CONFIG_PATH,
|
|
INFO_TESTS_PATH,
|
|
REGRESSION_TESTS_PATH,
|
|
get_regression_data,
|
|
)
|
|
from agbenchmark.utils import calculate_success_percentage
|
|
|
|
|
|
def resolve_workspace(workspace: str) -> str:
|
|
if workspace.startswith("${") and workspace.endswith("}"):
|
|
# Extract the string inside ${...}
|
|
path_expr = workspace[2:-1]
|
|
|
|
# Check if it starts with "os.path.join"
|
|
if path_expr.strip().startswith("os.path.join"):
|
|
# Evaluate the path string
|
|
path_value = eval(path_expr)
|
|
|
|
# Replace the original string with the evaluated result
|
|
return path_value
|
|
else:
|
|
raise ValueError("Invalid workspace path expression.")
|
|
else:
|
|
return os.path.abspath(Path(os.getcwd()) / workspace)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def config(request: Any) -> None:
|
|
print(f"Config file: {CONFIG_PATH}")
|
|
with open(CONFIG_PATH, "r") as f:
|
|
config = json.load(f)
|
|
|
|
if isinstance(config["workspace"], str):
|
|
config["workspace"] = resolve_workspace(config["workspace"])
|
|
else: # it's a input output dict
|
|
config["workspace"]["input"] = resolve_workspace(config["workspace"]["input"])
|
|
config["workspace"]["output"] = resolve_workspace(config["workspace"]["output"])
|
|
|
|
return config
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def workspace(config: Dict[str, Any]) -> Generator[str, None, None]:
|
|
output_path = config["workspace"]
|
|
|
|
# checks if its an input output paradigm
|
|
if not isinstance(config["workspace"], str):
|
|
output_path = config["workspace"]["output"]
|
|
if not os.path.exists(config["workspace"]["input"]):
|
|
os.makedirs(config["workspace"]["input"], exist_ok=True)
|
|
|
|
# create output directory if it doesn't exist
|
|
if not os.path.exists(output_path):
|
|
os.makedirs(output_path, exist_ok=True)
|
|
|
|
yield config["workspace"]
|
|
# teardown after test function completes
|
|
|
|
for filename in os.listdir(output_path):
|
|
file_path = os.path.join(output_path, filename)
|
|
try:
|
|
if os.path.isfile(file_path) or os.path.islink(file_path):
|
|
os.unlink(file_path)
|
|
elif os.path.isdir(file_path):
|
|
shutil.rmtree(file_path)
|
|
except Exception as e:
|
|
print(f"Failed to delete {file_path}. Reason: {e}")
|
|
|
|
|
|
def pytest_addoption(parser: Any) -> None:
|
|
parser.addoption("--mock", action="store_true", default=False)
|
|
parser.addoption("--improve", action="store_true", default=False)
|
|
parser.addoption("--maintain", action="store_true", default=False)
|
|
parser.addoption("--test", action="store_true", default=None)
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def check_regression(request: Any) -> None:
|
|
test_name = request.node.parent.name
|
|
data = get_regression_data()
|
|
|
|
# Get the true location of the test
|
|
challenge_location = getattr(request.node.parent.cls, "CHALLENGE_LOCATION", "")
|
|
|
|
skip_string = f"Skipping {test_name} at {challenge_location}"
|
|
|
|
# Check if the test name exists in the regression tests
|
|
if request.config.getoption("--improve") and data.get(test_name, None):
|
|
pytest.skip(f"{skip_string} because it's a regression test")
|
|
elif request.config.getoption("--maintain") and not data.get(test_name, None):
|
|
pytest.skip(f"{skip_string} because it's not a regression test")
|
|
|
|
|
|
# this is to get the challenge_data from every test
|
|
@pytest.fixture(autouse=True)
|
|
def challenge_data(request: Any) -> None:
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(autouse=True, scope="session")
|
|
def mock(request: Any) -> None:
|
|
return request.config.getoption("--mock")
|
|
|
|
|
|
@pytest.fixture(autouse=True, scope="function")
|
|
def timer(request: Any) -> Any:
|
|
start_time = time.time()
|
|
yield
|
|
run_time = time.time() - start_time
|
|
request.node.user_properties.append(("run_time", run_time))
|
|
|
|
|
|
# tests that consistently pass are considered regression tests
|
|
regression_manager = ReportManager(REGRESSION_TESTS_PATH)
|
|
|
|
# user facing reporting information
|
|
info_manager = ReportManager(INFO_TESTS_PATH)
|
|
|
|
INTERNAL_LOGS = Path(__file__).resolve().parent # agbenchmark/conftest.py
|
|
# internal db step in replacement track pass/fail rate
|
|
internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json"))
|
|
|
|
|
|
def pytest_runtest_makereport(item: Any, call: Any) -> None:
|
|
if call.when == "call":
|
|
challenge_data = item.funcargs.get("challenge_data", None)
|
|
difficulty = (
|
|
challenge_data["info"]["difficulty"] if challenge_data else "unknown"
|
|
)
|
|
dependencies = dependencies = (
|
|
challenge_data["dependencies"] if challenge_data else []
|
|
)
|
|
# Extract the challenge_location from the class
|
|
challenge_location: str = getattr(item.cls, "CHALLENGE_LOCATION", "")
|
|
test_name = item.nodeid.split("::")[1]
|
|
item.test_name = test_name
|
|
|
|
test_details = {
|
|
"difficulty": difficulty,
|
|
"dependencies": dependencies,
|
|
"data_path": challenge_location,
|
|
}
|
|
|
|
info_details: Any = {
|
|
"data_path": challenge_location,
|
|
"is_regression": False,
|
|
"metrics": {
|
|
"difficulty": difficulty,
|
|
"success": False,
|
|
},
|
|
}
|
|
|
|
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
|
|
|
if call.excinfo is None:
|
|
info_details["metrics"]["success"] = True
|
|
else:
|
|
if not mock: # don't remove if it's a mock test
|
|
regression_manager.remove_test(test_name)
|
|
info_details["metrics"]["fail_reason"] = str(call.excinfo.value)
|
|
|
|
prev_test_results: list[bool] = internal_info.tests.get(test_name, [])
|
|
if not mock:
|
|
# only add if it's an actual test
|
|
prev_test_results.append(info_details["metrics"]["success"])
|
|
internal_info.add_test(test_name, prev_test_results)
|
|
|
|
# can calculate success rate regardless of mock
|
|
info_details["metrics"]["success_%"] = calculate_success_percentage(
|
|
prev_test_results
|
|
)
|
|
else:
|
|
# can calculate success rate regardless of mock
|
|
info_details["metrics"][
|
|
"non_mock_success_%"
|
|
] = calculate_success_percentage(prev_test_results)
|
|
|
|
if len(prev_test_results) >= 3 and prev_test_results[-3:] == [True, True, True]:
|
|
# if the last 3 tests were successful, add to the regression tests
|
|
info_details["is_regression"] = True
|
|
regression_manager.add_test(test_name, test_details)
|
|
|
|
# user facing reporting
|
|
item.info_details = info_details
|
|
if call.when == "teardown":
|
|
run_time = dict(item.user_properties).get("run_time")
|
|
|
|
info_details = getattr(item, "info_details", {})
|
|
test_name = getattr(item, "test_name", "")
|
|
|
|
if info_details and test_name:
|
|
if run_time:
|
|
info_details["metrics"][
|
|
"run_time"
|
|
] = f"{str(round(run_time, 3))} seconds"
|
|
|
|
info_manager.add_test(test_name, info_details)
|
|
|
|
|
|
def pytest_sessionfinish(session: Any) -> None:
|
|
"""Called at the end of the session to save regression tests and info"""
|
|
with open(CONFIG_PATH, "r") as f:
|
|
config = json.load(f)
|
|
|
|
internal_info.save()
|
|
info_manager.end_info_report(config)
|
|
regression_manager.save()
|
|
|
|
|
|
# this is adding the dependency marker and category markers automatically from the json
|
|
def pytest_collection_modifyitems(items: Any, config: Any) -> None:
|
|
data = get_regression_data()
|
|
|
|
for item in items:
|
|
# Assuming item.cls is your test class
|
|
test_class_instance = item.cls()
|
|
|
|
# Then you can access your properties
|
|
name = item.parent.cls.__name__
|
|
dependencies = test_class_instance.data.dependencies
|
|
|
|
# Filter dependencies if they exist in regression data if its an improvement test
|
|
if config.getoption("--improve"):
|
|
dependencies = [dep for dep in dependencies if not data.get(dep, None)]
|
|
elif config.getoption("--test"):
|
|
dependencies = []
|
|
|
|
categories = test_class_instance.data.category
|
|
|
|
# Add depends marker dynamically
|
|
item.add_marker(pytest.mark.depends(on=dependencies, name=name))
|
|
|
|
# Add category marker dynamically
|
|
for category in categories:
|
|
item.add_marker(getattr(pytest.mark, category))
|