From f0615f77ec7bd6adc93d595e485239d53dda920b Mon Sep 17 00:00:00 2001 From: Auto-GPT-Bot Date: Tue, 1 Aug 2023 08:13:24 +0000 Subject: [PATCH] smol-developer-20230801081324 --- .../folder13_08-01-08-13/report.json | 14 +++ reports/smol-developer/regression_tests.json | 7 +- reports/smol-developer/success_rate.json | 119 +++++++++--------- 3 files changed, 80 insertions(+), 60 deletions(-) create mode 100644 reports/smol-developer/folder13_08-01-08-13/report.json diff --git a/reports/smol-developer/folder13_08-01-08-13/report.json b/reports/smol-developer/folder13_08-01-08-13/report.json new file mode 100644 index 00000000..00092e57 --- /dev/null +++ b/reports/smol-developer/folder13_08-01-08-13/report.json @@ -0,0 +1,14 @@ +{ + "command": "agbenchmark start", + "completion_time": "2023-08-01-08:13", + "benchmark_start_time": "2023-08-01-08:13", + "metrics": { + "run_time": "6.61 seconds", + "highest_difficulty": "No successful tests" + }, + "tests": {}, + "config": { + "workspace": "generated", + "entry_path": "agbenchmark.benchmarks" + } +} \ No newline at end of file diff --git a/reports/smol-developer/regression_tests.json b/reports/smol-developer/regression_tests.json index 9e26dfee..df8f99c3 100644 --- a/reports/smol-developer/regression_tests.json +++ b/reports/smol-developer/regression_tests.json @@ -1 +1,6 @@ -{} \ No newline at end of file +{ + "TestWriteFile": { + "difficulty": "interface", + "data_path": "agbenchmark/challenges/interface/write_file/data.json" + } +} \ No newline at end of file diff --git a/reports/smol-developer/success_rate.json b/reports/smol-developer/success_rate.json index be53ac9c..3bf1fcc6 100644 --- a/reports/smol-developer/success_rate.json +++ b/reports/smol-developer/success_rate.json @@ -1,63 +1,35 @@ { - "TestWriteFile": [ - true, - true - ], - "TestPlanCreation": [ - true - ], - "TestGoalDivergence": [ - false - ], - "TestSearch": [ - false - ], - "TestReadFile": [ - false - ], - "TestBasicRetrieval": [ - false - ], - "TestBasicContentGen": [ - false - ], - "TestReturnCode_Simple": [ - false - ], - "TestDebugSimpleTypoWithGuidance": [ - false - ], - "TestBasicMemory": [ - false - ], "TestAdaptLink": [ false ], - "TestRevenueRetrieval_1.2": [ - false - ], - "TestRevenueRetrieval_1.1": [ - false - ], - "TestRevenueRetrieval_1.0": [ - false - ], - "TestReturnCode_Write": [ - false - ], - "TestDebugSimpleTypoWithoutGuidance": [ - false - ], "TestAdaptSimpleTypoWithGuidance": [ false ], - "TestInstructionFollowing": [ + "TestAdaptTeslaRevenue": [ false ], - "TestGoalLoss_Simple": [ + "TestBasicContentGen": [ false ], - "TestGoalLoss_advanced": [ + "TestBasicMemory": [ + false + ], + "TestBasicRetrieval": [ + false + ], + "TestDebugMultipleTypo": [ + false + ], + "TestDebugSimpleTypoWithGuidance": [ + false + ], + "TestDebugSimpleTypoWithoutGuidance": [ + false + ], + "TestFunctionCodeGeneration": [ + false + ], + "TestGoalDivergence": [ false ], "TestGoalLoss_Hard": [ @@ -66,16 +38,25 @@ "TestGoalLoss_Medium": [ false ], + "TestGoalLoss_Simple": [ + false + ], + "TestGoalLoss_advanced": [ + false + ], + "TestInstructionFollowing": [ + false + ], + "TestPlanCreation": [ + true + ], + "TestReadFile": [ + false + ], "TestRememberMultipleIds": [ false ], - "TestFunctionCodeGeneration": [ - false - ], - "TestReturnCode_Modify": [ - false - ], - "TestDebugMultipleTypo": [ + "TestRememberMultiplePhrasesWithNoise": [ false ], "TestRememberMultipleWithNoise": [ @@ -84,16 +65,36 @@ "TestRetrieval3": [ false ], - "TestAdaptTeslaRevenue": [ + "TestReturnCode_Modify": [ false ], - "TestThreeSum": [ + "TestReturnCode_Simple": [ false ], "TestReturnCode_Tests": [ false ], - "TestRememberMultiplePhrasesWithNoise": [ + "TestReturnCode_Write": [ false + ], + "TestRevenueRetrieval_1.0": [ + false + ], + "TestRevenueRetrieval_1.1": [ + false + ], + "TestRevenueRetrieval_1.2": [ + false + ], + "TestSearch": [ + false + ], + "TestThreeSum": [ + false + ], + "TestWriteFile": [ + true, + true, + true ] } \ No newline at end of file