mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-05 23:34:29 +01:00
148 lines
5.1 KiB
JSON
148 lines
5.1 KiB
JSON
{
|
|
"command": "agbenchmark start --mock",
|
|
"completion_time": "2023-07-11-21:09",
|
|
"metrics": {
|
|
"run_time": "0.96 seconds",
|
|
"highest_difficulty": "advanced: 5"
|
|
},
|
|
"tests": {
|
|
"TestWriteFile": {
|
|
"data_path": "agbenchmark/challenges/interface/write_file",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "interface",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.008 seconds"
|
|
}
|
|
},
|
|
"TestReadFile": {
|
|
"data_path": "agbenchmark/challenges/interface/read_file",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "interface",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.005 seconds"
|
|
}
|
|
},
|
|
"TestSearch": {
|
|
"data_path": "agbenchmark/challenges/interface/search",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "interface",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.006 seconds"
|
|
}
|
|
},
|
|
"TestDebugSimpleTypoWithGuidance": {
|
|
"data_path": "agbenchmark/challenges/code/d1",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "basic",
|
|
"success": false,
|
|
"fail_reason": "assert 1 in [0.0]",
|
|
"success_%": 0,
|
|
"run_time": "0.489 seconds"
|
|
}
|
|
},
|
|
"TestBasicMemory": {
|
|
"data_path": "agbenchmark/challenges/memory/m1",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "basic",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.02 seconds"
|
|
}
|
|
},
|
|
"TestBasicRetrieval": {
|
|
"data_path": "agbenchmark/challenges/retrieval/r1",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "basic",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.01 seconds"
|
|
}
|
|
},
|
|
"TestDebugSimpleTypoWithoutGuidance": {
|
|
"data_path": "agbenchmark/challenges/code/d2",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "novice",
|
|
"success": false,
|
|
"fail_reason": "agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithoutGuidance::test_method[challenge_data0] depends on agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
|
|
"success_%": 0,
|
|
"run_time": "0.001 seconds"
|
|
}
|
|
},
|
|
"TestCreateSimpleWebServer": {
|
|
"data_path": "agbenchmark/challenges/code/d3",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "advanced",
|
|
"success": false,
|
|
"fail_reason": "agbenchmark/challenges/test_all.py::TestCreateSimpleWebServer::test_method[challenge_data0] depends on agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
|
|
"success_%": 0,
|
|
"run_time": "0.001 seconds"
|
|
}
|
|
},
|
|
"TestRememberMultipleIds": {
|
|
"data_path": "agbenchmark/challenges/memory/m2",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "novice",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.018 seconds"
|
|
}
|
|
},
|
|
"TestRetrieval2": {
|
|
"data_path": "agbenchmark/challenges/retrieval/r2",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "novice",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.009 seconds"
|
|
}
|
|
},
|
|
"TestRememberMultipleIdsWithNoise": {
|
|
"data_path": "agbenchmark/challenges/memory/m3",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "intermediate",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.022 seconds"
|
|
}
|
|
},
|
|
"TestRetrieval3": {
|
|
"data_path": "agbenchmark/challenges/retrieval/r3",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "intermediate",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.01 seconds"
|
|
}
|
|
},
|
|
"TestRememberMultiplePhrasesWithNoise": {
|
|
"data_path": "agbenchmark/challenges/memory/m4",
|
|
"is_regression": false,
|
|
"metrics": {
|
|
"difficulty": "advanced",
|
|
"success": true,
|
|
"success_%": 0,
|
|
"run_time": "0.021 seconds"
|
|
}
|
|
}
|
|
},
|
|
"config": {
|
|
"workspace": "${os.path.join(Path.home(), 'miniagi')}",
|
|
"entry_path": "agbenchmark.benchmarks",
|
|
"cutoff": 60
|
|
}
|
|
} |