mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-26 10:24:30 +01:00
hotfix reports (#191)
This commit is contained in:
@@ -34,7 +34,7 @@ def create_single_test(
|
||||
# if its a parallel run suite we just give it the data
|
||||
if suite_config and suite_config.same_task:
|
||||
artifacts_location = str(Path(challenge_location).resolve())
|
||||
if "--test" or "--maintain" or "--improve" in sys.argv:
|
||||
if "--test" in sys.argv or "--maintain" in sys.argv or "--improve" in sys.argv:
|
||||
artifacts_location = str(Path(challenge_location).resolve().parent.parent)
|
||||
else:
|
||||
setattr(
|
||||
@@ -99,7 +99,7 @@ def create_challenge(
|
||||
grandparent_dir = path.parent.parent
|
||||
|
||||
# if its a single test running we dont care about the suite
|
||||
if "--test" or "--maintain" or "--improve" in sys.argv:
|
||||
if "--test" in sys.argv or "--maintain" in sys.argv or "--improve" in sys.argv:
|
||||
create_single_suite_challenge(suite_config, data, path)
|
||||
return json_files
|
||||
|
||||
@@ -191,8 +191,9 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
|
||||
continue
|
||||
|
||||
# --maintain and --improve flag
|
||||
improve_flag = regression_tests.get(data["name"], None)
|
||||
maintain_flag = not improve_flag
|
||||
in_regression = regression_tests.get(data["name"], None)
|
||||
improve_flag = in_regression and "--improve" in commands
|
||||
maintain_flag = not in_regression and "--maintain" in commands
|
||||
if "--maintain" in commands and maintain_flag:
|
||||
continue
|
||||
elif "--improve" in commands and improve_flag:
|
||||
|
||||
@@ -137,19 +137,25 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
|
||||
return
|
||||
|
||||
challenge_location: str = getattr(item.cls, "CHALLENGE_LOCATION", "")
|
||||
is_suite = None
|
||||
# this is a non same task suite, with the location pointing to a data.json
|
||||
is_suite = SuiteConfig.suite_data_if_suite(
|
||||
Path(__file__).parent.parent / Path(challenge_location)
|
||||
)
|
||||
|
||||
try:
|
||||
# this is for a same_task suite pointing to the directory where the suite lives
|
||||
is_suite = SuiteConfig.deserialize(
|
||||
Path(__file__).parent.parent / Path(challenge_location) / "suite.json"
|
||||
)
|
||||
except:
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
flags = "--test" in sys.argv or "--maintain" in sys.argv or "--improve" in sys.argv
|
||||
|
||||
if call.when == "call":
|
||||
# if it's a same task suite, we combine the report.
|
||||
# but not if it's a single --test
|
||||
if is_suite and is_suite.same_task and "--test" not in sys.argv:
|
||||
if is_suite and is_suite.same_task and not flags:
|
||||
generate_combined_suite_report(item, challenge_data, challenge_location)
|
||||
else:
|
||||
# single non suite test
|
||||
@@ -159,7 +165,7 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
|
||||
finalize_reports(item, challenge_data)
|
||||
|
||||
# for separate task suites (same_task=false), their data is the same as a regular suite, but we combined the report at the end
|
||||
if is_suite and not is_suite.same_task:
|
||||
if is_suite and not is_suite.same_task and not flags:
|
||||
suite_reports.setdefault(is_suite.prefix, []).append(challenge_data["name"])
|
||||
|
||||
|
||||
|
||||
@@ -45,8 +45,6 @@ def generate_combined_suite_report(
|
||||
)
|
||||
item.test_name = suite_config.prefix
|
||||
|
||||
print("Generating combined suite report...", challenge_data, challenge_location)
|
||||
|
||||
data_paths = suite_config.get_data_paths(root_path / Path(challenge_location))
|
||||
scores = getattr(item, "scores", {})
|
||||
mock = "--mock" in sys.argv # Check if --mock is in sys.argv
|
||||
@@ -296,7 +294,9 @@ def generate_separate_suite_reports(suite_reports: dict) -> None:
|
||||
|
||||
|
||||
def session_finish(suite_reports: dict) -> None:
|
||||
generate_separate_suite_reports(suite_reports)
|
||||
flags = "--test" in sys.argv or "--maintain" in sys.argv or "--improve" in sys.argv
|
||||
if not flags:
|
||||
generate_separate_suite_reports(suite_reports)
|
||||
|
||||
with open(CONFIG_PATH, "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
@@ -88,8 +88,8 @@ def calculate_info_test_path(reports_path: Path) -> str:
|
||||
print(f"Found {related_file_count} files with '{test_arg}' in the name")
|
||||
# Take the number from before the _ and add the .{number}
|
||||
|
||||
prefix = ""
|
||||
math.floor(prefix_number)
|
||||
prefix = 0
|
||||
prefix = math.floor(prefix_number)
|
||||
|
||||
run_name = f"{prefix}.{related_file_count}_{test_arg}.json"
|
||||
|
||||
@@ -148,32 +148,36 @@ def get_highest_success_difficulty(
|
||||
highest_difficulty_level = 0
|
||||
|
||||
for test_name, test_data in data.items():
|
||||
if test_data.get("tests", None):
|
||||
highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
|
||||
try:
|
||||
highest_difficulty = DifficultyLevel[highest_difficulty_str]
|
||||
highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
|
||||
except KeyError:
|
||||
print(
|
||||
f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
if test_data["metrics"]["success"]:
|
||||
difficulty_str = test_data["metrics"]["difficulty"]
|
||||
|
||||
try:
|
||||
if test_data.get("tests", None):
|
||||
highest_difficulty_str = test_data["metrics"]["highest_difficulty"]
|
||||
try:
|
||||
difficulty_enum = DifficultyLevel[difficulty_str.lower()]
|
||||
difficulty_level = DIFFICULTY_MAP[difficulty_enum]
|
||||
|
||||
if difficulty_level > highest_difficulty_level:
|
||||
highest_difficulty = difficulty_enum
|
||||
highest_difficulty_level = difficulty_level
|
||||
highest_difficulty = DifficultyLevel[highest_difficulty_str]
|
||||
highest_difficulty_level = DIFFICULTY_MAP[highest_difficulty]
|
||||
except KeyError:
|
||||
print(
|
||||
f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
|
||||
f"Unexpected difficulty level '{highest_difficulty_str}' in test '{test_name}'"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
if test_data["metrics"]["success"]:
|
||||
difficulty_str = test_data["metrics"]["difficulty"]
|
||||
|
||||
try:
|
||||
difficulty_enum = DifficultyLevel[difficulty_str.lower()]
|
||||
difficulty_level = DIFFICULTY_MAP[difficulty_enum]
|
||||
|
||||
if difficulty_level > highest_difficulty_level:
|
||||
highest_difficulty = difficulty_enum
|
||||
highest_difficulty_level = difficulty_level
|
||||
except KeyError:
|
||||
print(
|
||||
f"Unexpected difficulty level '{difficulty_str}' in test '{test_name}'"
|
||||
)
|
||||
continue
|
||||
except Exception:
|
||||
print(f"Make sure you selected the right test, no reports were generated.")
|
||||
break
|
||||
|
||||
if highest_difficulty is not None:
|
||||
highest_difficulty_str = highest_difficulty.name # convert enum to string
|
||||
|
||||
Submodule agent/mini-agi updated: 4a346ab7cb...3e83765fa5
Reference in New Issue
Block a user