mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-18 12:44:29 +01:00
smol-developer-20230718001759
This commit is contained in:
176
reports/smol-developer/file1_07-18-00-17.json
Normal file
176
reports/smol-developer/file1_07-18-00-17.json
Normal file
@@ -0,0 +1,176 @@
|
||||
{
|
||||
"command": "agbenchmark start",
|
||||
"completion_time": "2023-07-18-00:17",
|
||||
"metrics": {
|
||||
"run_time": "41.3 seconds",
|
||||
"highest_difficulty": "novice: 3"
|
||||
},
|
||||
"tests": {
|
||||
"TestWriteFile": {
|
||||
"data_path": "agbenchmark/challenges/interface/write_file",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "interface",
|
||||
"success": true,
|
||||
"success_%": 100.0,
|
||||
"run_time": "5.554 seconds"
|
||||
}
|
||||
},
|
||||
"TestBasicCodeGeneration": {
|
||||
"data_path": "agbenchmark/challenges/code/d4",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "novice",
|
||||
"success": true,
|
||||
"success_%": 100.0,
|
||||
"run_time": "8.223 seconds"
|
||||
}
|
||||
},
|
||||
"TestSearch": {
|
||||
"data_path": "agbenchmark/challenges/interface/search",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "interface",
|
||||
"success": false,
|
||||
"fail_reason": "assert 1 in [0.0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "16.099 seconds"
|
||||
}
|
||||
},
|
||||
"TestReadFile": {
|
||||
"data_path": "agbenchmark/challenges/interface/read_file",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "interface",
|
||||
"success": false,
|
||||
"fail_reason": "assert 1 in [0.0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "9.624 seconds"
|
||||
}
|
||||
},
|
||||
"TestThreeSum": {
|
||||
"data_path": "agbenchmark/challenges/code/d5",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "intermediate",
|
||||
"success": false,
|
||||
"fail_reason": "assert 1 in [0.0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "1.625 seconds"
|
||||
}
|
||||
},
|
||||
"TestBasicRetrieval": {
|
||||
"data_path": "agbenchmark/challenges/retrieval/r1",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "basic",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestBasicRetrieval::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestSearch::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestDebugSimpleTypoWithGuidance": {
|
||||
"data_path": "agbenchmark/challenges/code/d1",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "basic",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestReadFile::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestBasicMemory": {
|
||||
"data_path": "agbenchmark/challenges/memory/m1",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "basic",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestBasicMemory::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestReadFile::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"data_path": "agbenchmark/challenges/retrieval/r2",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "novice",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRetrieval2::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestBasicRetrieval::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestCreateSimpleWebServer": {
|
||||
"data_path": "agbenchmark/challenges/code/d3",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "advanced",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestCreateSimpleWebServer::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.002 seconds"
|
||||
}
|
||||
},
|
||||
"TestDebugSimpleTypoWithoutGuidance": {
|
||||
"data_path": "agbenchmark/challenges/code/d2",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "novice",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithoutGuidance::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestDebugSimpleTypoWithGuidance::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestRememberMultipleIds": {
|
||||
"data_path": "agbenchmark/challenges/memory/m2",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "novice",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRememberMultipleIds::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestBasicMemory::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestRetrieval3": {
|
||||
"data_path": "agbenchmark/challenges/retrieval/r3",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "intermediate",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRetrieval3::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRetrieval2::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestRememberMultipleIdsWithNoise": {
|
||||
"data_path": "agbenchmark/challenges/memory/m3",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "intermediate",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRememberMultipleIdsWithNoise::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRememberMultipleIds::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
},
|
||||
"TestRememberMultiplePhrasesWithNoise": {
|
||||
"data_path": "agbenchmark/challenges/memory/m4",
|
||||
"is_regression": false,
|
||||
"metrics": {
|
||||
"difficulty": "advanced",
|
||||
"success": false,
|
||||
"fail_reason": "agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRememberMultiplePhrasesWithNoise::test_method[challenge_data0] depends on agent/smol-developer/venv/lib/python3.10/site-packages/agbenchmark/challenges/test_all.py::TestRememberMultipleIdsWithNoise::test_method[challenge_data0]",
|
||||
"success_%": 0.0,
|
||||
"run_time": "0.001 seconds"
|
||||
}
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"workspace": "generated"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user