diff --git a/reports/mini-agi/folder14_08-01-08-14/report.json b/reports/mini-agi/folder14_08-01-08-14/report.json new file mode 100644 index 00000000..4f81cd51 --- /dev/null +++ b/reports/mini-agi/folder14_08-01-08-14/report.json @@ -0,0 +1,13 @@ +{ + "command": "agbenchmark start", + "completion_time": "2023-08-01-08:14", + "benchmark_start_time": "2023-08-01-08:14", + "metrics": { + "run_time": "20.63 seconds", + "highest_difficulty": "No successful tests" + }, + "tests": {}, + "config": { + "workspace": "${os.path.join(Path.home(), 'miniagi')}" + } +} \ No newline at end of file diff --git a/reports/mini-agi/regression_tests.json b/reports/mini-agi/regression_tests.json index 9e26dfee..df8f99c3 100644 --- a/reports/mini-agi/regression_tests.json +++ b/reports/mini-agi/regression_tests.json @@ -1 +1,6 @@ -{} \ No newline at end of file +{ + "TestWriteFile": { + "difficulty": "interface", + "data_path": "agbenchmark/challenges/interface/write_file/data.json" + } +} \ No newline at end of file diff --git a/reports/mini-agi/success_rate.json b/reports/mini-agi/success_rate.json index 55b4f2ae..67d730ed 100644 --- a/reports/mini-agi/success_rate.json +++ b/reports/mini-agi/success_rate.json @@ -1,64 +1,35 @@ { - "TestPlanCreation": [ - true, - true - ], - "TestWriteFile": [ - true, - true - ], - "TestGoalDivergence": [ + "TestAdaptLink": [ false ], - "TestSearch": [ + "TestAdaptSimpleTypoWithGuidance": [ true ], - "TestReadFile": [ - true - ], - "TestBasicRetrieval": [ - true + "TestAdaptTeslaRevenue": [ + false ], "TestBasicContentGen": [ true ], - "TestReturnCode_Simple": [ - false - ], - "TestDebugSimpleTypoWithGuidance": [ - true - ], "TestBasicMemory": [ false ], - "TestAdaptLink": [ - false - ], - "TestRevenueRetrieval_1.2": [ - false - ], - "TestRevenueRetrieval_1.1": [ - false - ], - "TestRevenueRetrieval_1.0": [ + "TestBasicRetrieval": [ true ], - "TestReturnCode_Write": [ - false + "TestDebugMultipleTypo": [ + true + ], + "TestDebugSimpleTypoWithGuidance": [ + true ], "TestDebugSimpleTypoWithoutGuidance": [ true ], - "TestAdaptSimpleTypoWithGuidance": [ - true - ], - "TestInstructionFollowing": [ + "TestFunctionCodeGeneration": [ false ], - "TestGoalLoss_Simple": [ - false - ], - "TestGoalLoss_advanced": [ + "TestGoalDivergence": [ false ], "TestGoalLoss_Hard": [ @@ -67,34 +38,64 @@ "TestGoalLoss_Medium": [ false ], + "TestGoalLoss_Simple": [ + false + ], + "TestGoalLoss_advanced": [ + false + ], + "TestInstructionFollowing": [ + false + ], + "TestPlanCreation": [ + true, + true + ], + "TestReadFile": [ + true + ], "TestRememberMultipleIds": [ false ], - "TestFunctionCodeGeneration": [ + "TestRememberMultiplePhrasesWithNoise": [ false ], - "TestReturnCode_Modify": [ - false - ], - "TestDebugMultipleTypo": [ - true - ], "TestRememberMultipleWithNoise": [ false ], "TestRetrieval3": [ false ], - "TestAdaptTeslaRevenue": [ + "TestReturnCode_Modify": [ false ], - "TestThreeSum": [ + "TestReturnCode_Simple": [ false ], "TestReturnCode_Tests": [ false ], - "TestRememberMultiplePhrasesWithNoise": [ + "TestReturnCode_Write": [ false + ], + "TestRevenueRetrieval_1.0": [ + true + ], + "TestRevenueRetrieval_1.1": [ + false + ], + "TestRevenueRetrieval_1.2": [ + false + ], + "TestSearch": [ + true + ], + "TestThreeSum": [ + false + ], + "TestWriteFile": [ + true, + true, + true ] } \ No newline at end of file