mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-01 04:14:24 +01:00
debug(benchmark): Add more debug code to pinpoint cause of rare crash
Target: https://github.com/Significant-Gravitas/AutoGPT/actions/runs/7941977633/job/21684817491
This commit is contained in:
@@ -33,7 +33,7 @@ class TestResult(BaseModel):
|
||||
logger.error(
|
||||
"Error validating `success ^ fail_reason` on TestResult: "
|
||||
f"success = {repr(values['success'])}; "
|
||||
f"fail_reason = {repr(v)} ({v})"
|
||||
f"fail_reason = {repr(v)}"
|
||||
)
|
||||
if v:
|
||||
success = values["success"]
|
||||
|
||||
@@ -4,6 +4,7 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from agbenchmark.challenges import ChallengeInfo
|
||||
from agbenchmark.config import AgentBenchmarkConfig
|
||||
@@ -86,21 +87,28 @@ def add_test_result_to_report(
|
||||
else:
|
||||
test_report.metrics.attempted = True
|
||||
|
||||
test_report.results.append(
|
||||
TestResult(
|
||||
success=call.excinfo is None,
|
||||
run_time=f"{str(round(call.duration, 3))} seconds",
|
||||
fail_reason=None if call.excinfo is None else str(call.excinfo.value),
|
||||
reached_cutoff=user_properties.get("timed_out", False),
|
||||
n_steps=user_properties.get("n_steps"),
|
||||
cost=user_properties.get("agent_task_cost"),
|
||||
try:
|
||||
test_report.results.append(
|
||||
TestResult(
|
||||
success=call.excinfo is None,
|
||||
run_time=f"{str(round(call.duration, 3))} seconds",
|
||||
fail_reason=None if call.excinfo is None else str(call.excinfo.value),
|
||||
reached_cutoff=user_properties.get("timed_out", False),
|
||||
n_steps=user_properties.get("n_steps"),
|
||||
cost=user_properties.get("agent_task_cost"),
|
||||
)
|
||||
)
|
||||
)
|
||||
test_report.metrics.success_percentage = (
|
||||
sum(r.success or False for r in test_report.results)
|
||||
/ len(test_report.results)
|
||||
* 100
|
||||
)
|
||||
test_report.metrics.success_percentage = (
|
||||
sum(r.success or False for r in test_report.results)
|
||||
/ len(test_report.results)
|
||||
* 100
|
||||
)
|
||||
except ValidationError:
|
||||
logger.error(
|
||||
"Validation failed on TestResult; "
|
||||
f"call.excinfo = {repr(call.excinfo)} ({call.excinfo})"
|
||||
)
|
||||
raise
|
||||
|
||||
prev_test_results: list[bool | None] = get_and_update_success_history(
|
||||
test_name, test_report.results[-1].success
|
||||
|
||||
Reference in New Issue
Block a user