Files
Auto-GPT/agbenchmark/reports/1.json
2023-07-12 01:37:59 -04:00

148 lines
5.1 KiB
JSON

{
"command": "agbenchmark start --mock",
"completion_time": "2023-07-11-21:09",
"metrics": {
"run_time": "0.96 seconds",
"highest_difficulty": "advanced: 5"
},
"tests": {
"TestWriteFile": {
"data_path": "agbenchmark/challenges/interface/write_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 0,
"run_time": "0.008 seconds"
}
},
"TestReadFile": {
"data_path": "agbenchmark/challenges/interface/read_file",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 0,
"run_time": "0.005 seconds"
}
},
"TestSearch": {
"data_path": "agbenchmark/challenges/interface/search",
"is_regression": false,
"metrics": {
"difficulty": "interface",
"success": true,
"success_%": 0,
"run_time": "0.006 seconds"
}
},
"TestDebugSimpleTypoWithGuidance": {
"data_path": "agbenchmark/challenges/code/d1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": false,
"fail_reason": "assert 1 in [0.0]",
"success_%": 0,
"run_time": "0.489 seconds"
}
},
"TestBasicMemory": {
"data_path": "agbenchmark/challenges/memory/m1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": true,
"success_%": 0,
"run_time": "0.02 seconds"
}
},
"TestBasicRetrieval": {
"data_path": "agbenchmark/challenges/retrieval/r1",
"is_regression": false,
"metrics": {
"difficulty": "basic",
"success": true,
"success_%": 0,
"run_time": "0.01 seconds"
}
},
"TestDebugSimpleTypoWithoutGuidance": {
"data_path": "agbenchmark/challenges/code/d2",
"is_regression": false,
"metrics": {
"difficulty": "novice",
"success": false,
"fail_reason": "agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithoutGuidance::test_method[challenge_data0] depends on agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
"success_%": 0,
"run_time": "0.001 seconds"
}
},
"TestCreateSimpleWebServer": {
"data_path": "agbenchmark/challenges/code/d3",
"is_regression": false,
"metrics": {
"difficulty": "advanced",
"success": false,
"fail_reason": "agbenchmark/challenges/test_all.py::TestCreateSimpleWebServer::test_method[challenge_data0] depends on agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
"success_%": 0,
"run_time": "0.001 seconds"
}
},
"TestRememberMultipleIds": {
"data_path": "agbenchmark/challenges/memory/m2",
"is_regression": false,
"metrics": {
"difficulty": "novice",
"success": true,
"success_%": 0,
"run_time": "0.018 seconds"
}
},
"TestRetrieval2": {
"data_path": "agbenchmark/challenges/retrieval/r2",
"is_regression": false,
"metrics": {
"difficulty": "novice",
"success": true,
"success_%": 0,
"run_time": "0.009 seconds"
}
},
"TestRememberMultipleIdsWithNoise": {
"data_path": "agbenchmark/challenges/memory/m3",
"is_regression": false,
"metrics": {
"difficulty": "intermediate",
"success": true,
"success_%": 0,
"run_time": "0.022 seconds"
}
},
"TestRetrieval3": {
"data_path": "agbenchmark/challenges/retrieval/r3",
"is_regression": false,
"metrics": {
"difficulty": "intermediate",
"success": true,
"success_%": 0,
"run_time": "0.01 seconds"
}
},
"TestRememberMultiplePhrasesWithNoise": {
"data_path": "agbenchmark/challenges/memory/m4",
"is_regression": false,
"metrics": {
"difficulty": "advanced",
"success": true,
"success_%": 0,
"run_time": "0.021 seconds"
}
}
},
"config": {
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
"entry_path": "agbenchmark.benchmarks",
"cutoff": 60
}
}