diff --git a/benchmark/agbenchmark/conftest.py b/benchmark/agbenchmark/conftest.py index 21a58466..03520b9b 100644 --- a/benchmark/agbenchmark/conftest.py +++ b/benchmark/agbenchmark/conftest.py @@ -313,7 +313,8 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None: ) if call.when == "call": - generate_single_call_report(item, call, challenge_data) + answers = getattr(item, 'answers', None) + generate_single_call_report(item, call, challenge_data, answers) if call.when == "teardown": finalize_reports(item, challenge_data) diff --git a/benchmark/agbenchmark/reports/reports.py b/benchmark/agbenchmark/reports/reports.py index 60821c1f..13fe4328 100644 --- a/benchmark/agbenchmark/reports/reports.py +++ b/benchmark/agbenchmark/reports/reports.py @@ -53,7 +53,7 @@ def update_regression_tests( def generate_single_call_report( - item: Any, call: Any, challenge_data: dict[str, Any] + item: Any, call: Any, challenge_data: dict[str, Any], answers: dict[str, Any] ) -> None: try: difficulty = challenge_data["info"]["difficulty"] @@ -87,6 +87,9 @@ def generate_single_call_report( }, # "answers": answers, } + if answers: + info_details["answers"] = answers + if "metadata" in challenge_data: info_details["metadata"] = challenge_data["metadata"]