diff --git a/benchmark/agbenchmark/agent_api_interface.py b/benchmark/agbenchmark/agent_api_interface.py index ce6ab649..f35ef81a 100644 --- a/benchmark/agbenchmark/agent_api_interface.py +++ b/benchmark/agbenchmark/agent_api_interface.py @@ -50,10 +50,6 @@ async def run_api_agent( raise TimeoutError("Time limit exceeded") if not step or step.is_last: steps_remaining = False - if os.getenv("IS_MOCK"): - time.sleep( - 1 - ) # will help with the integration og the "polling updates" features since mock agent is too fast. # if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests if os.getenv("IS_MOCK"): await upload_artifacts( diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv new file mode 100644 index 00000000..a52510f1 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv @@ -0,0 +1,5 @@ +id,name,timestamp +3,Alice,2023-09-25 14:10:00 +1,Bob,2023-09-24 12:05:00 +2,Charlie,2023-09-24 12:10:00 +4,David,2023-09-26 16:20:00 diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv new file mode 100644 index 00000000..6cac7733 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv @@ -0,0 +1,5 @@ +id,name,timestamp +1,Bob,2023-09-24 12:05:00 +2,Charlie,2023-09-24 12:10:00 +3,Alice,2023-09-25 14:10:00 +4,David,2023-09-26 16:20:00 diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json new file mode 100644 index 00000000..8515af89 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "SortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." +} diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv new file mode 100644 index 00000000..ae4ca502 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv @@ -0,0 +1,12 @@ +Item +Banana +Leaf +Sky +Sunflower +Grass +Jeans +Lemon +Tree +Ocean +Daisy +Fern diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv new file mode 100644 index 00000000..7b19bce9 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv @@ -0,0 +1,12 @@ +Item, Color +Banana, Yellow +Leaf, Green +Sky, Blue +Sunflower, Yellow +Grass, Green +Jeans, Blue +Lemon, Yellow +Tree, Green +Ocean, Blue +Daisy, Yellow +Fern, Green diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json new file mode 100644 index 00000000..a024a6eb --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "LabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" +} diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv new file mode 100644 index 00000000..fe552d67 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv @@ -0,0 +1,4 @@ +ID,Name,Age +101,John,28 +102,Alice,34 +103,Bob,45 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv new file mode 100644 index 00000000..685e24f4 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv @@ -0,0 +1,4 @@ +ID,Occupation,Salary +101,Engineer,80000 +102,Doctor,120000 +103,Lawyer,95000 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv new file mode 100644 index 00000000..8afe84bf --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv @@ -0,0 +1,4 @@ +Age,ID,Name,Occupation,Salary +28,101,John,Engineer,80000 +34,102,Alice,Doctor,120000 +45,103,Bob,Lawyer,95000 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json new file mode 100644 index 00000000..0739fe03 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "CombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv" +} diff --git a/benchmark/frontend/public/graph.json b/benchmark/frontend/public/graph.json index 8116dacf..f5401f48 100644 --- a/benchmark/frontend/public/graph.json +++ b/benchmark/frontend/public/graph.json @@ -12,6 +12,12 @@ "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]" }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, { "arrows": "to", "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", @@ -83,6 +89,18 @@ "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" } ], "nodes": [ @@ -574,6 +592,117 @@ "label": "RevenueRetrieval2", "shape": "dot" }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + }, { "color": "grey", "data": { diff --git a/frontend/assets/data_tree_structure.json b/frontend/assets/data_tree_structure.json index 1f1a2ad1..48432d2d 100644 --- a/frontend/assets/data_tree_structure.json +++ b/frontend/assets/data_tree_structure.json @@ -1,4 +1,214 @@ { - "edges": [], - "nodes": [] + "edges": [ + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + } + ], + "nodes": [ + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "interface" + ], + "cutoff": 60, + "dependencies": [ + "TestWriteFile" + ], + "eval_id": "261ccfaa-02a2-4c1a-8a56-c76c66f7dba1", + "ground": { + "answer": "The content of output.txt should be 'Hello World!'", + "eval": { + "type": "file" + }, + "files": [ + "output.txt" + ], + "should_contain": [ + "Hello World!" + ] + }, + "info": { + "description": "tests the ability for an agent to read a file.", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestReadFile", + "task": "Read the file called file_to_read.txt and write its content to a file called output.txt" + }, + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "label": "ReadFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "interface" + ], + "cutoff": 60, + "dependencies": [], + "eval_id": "81b64bf9-2b6a-4ac8-bcd2-8bfe36244ac0", + "ground": { + "answer": "The word 'Washington', printed to a .txt file named anything", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "Washington" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests the agents ability to write to a file", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestWriteFile", + "task": "Write the word 'Washington' to a .txt file" + }, + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "label": "WriteFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + } + ] } diff --git a/frontend/assets/tree_structure.json b/frontend/assets/tree_structure.json index 8116dacf..f5401f48 100644 --- a/frontend/assets/tree_structure.json +++ b/frontend/assets/tree_structure.json @@ -12,6 +12,12 @@ "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]" }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, { "arrows": "to", "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", @@ -83,6 +89,18 @@ "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" } ], "nodes": [ @@ -574,6 +592,117 @@ "label": "RevenueRetrieval2", "shape": "dot" }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + }, { "color": "grey", "data": { diff --git a/frontend/pubspec.lock b/frontend/pubspec.lock index 33d60d70..abadca4a 100644 --- a/frontend/pubspec.lock +++ b/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: "direct main" description: name: collection - sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 + sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" url: "https://pub.dev" source: hosted - version: "1.17.2" + version: "1.17.1" crypto: dependency: transitive description: @@ -292,18 +292,18 @@ packages: dependency: transitive description: name: matcher - sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" + sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" url: "https://pub.dev" source: hosted - version: "0.12.16" + version: "0.12.15" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" + sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 url: "https://pub.dev" source: hosted - version: "0.5.0" + version: "0.2.0" meta: dependency: transitive description: @@ -449,10 +449,10 @@ packages: dependency: transitive description: name: source_span - sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" + sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 url: "https://pub.dev" source: hosted - version: "1.10.0" + version: "1.9.1" sprintf: dependency: transitive description: @@ -497,10 +497,10 @@ packages: dependency: transitive description: name: test_api - sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" + sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb url: "https://pub.dev" source: hosted - version: "0.6.0" + version: "0.5.1" typed_data: dependency: transitive description: @@ -589,14 +589,6 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.4" - web: - dependency: transitive - description: - name: web - sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 - url: "https://pub.dev" - source: hosted - version: "0.1.4-beta" win32: dependency: transitive description: @@ -614,5 +606,5 @@ packages: source: hosted version: "1.0.3" sdks: - dart: ">=3.1.0-185.0.dev <4.0.0" + dart: ">=3.0.0 <4.0.0" flutter: ">=3.10.0"