diff --git a/benchmark/agbenchmark/agent_api_interface.py b/benchmark/agbenchmark/agent_api_interface.py
index ce6ab649..f35ef81a 100644
--- a/benchmark/agbenchmark/agent_api_interface.py
+++ b/benchmark/agbenchmark/agent_api_interface.py
@@ -50,10 +50,6 @@ async def run_api_agent(
                 raise TimeoutError("Time limit exceeded")
             if not step or step.is_last:
                 steps_remaining = False
-            if os.getenv("IS_MOCK"):
-                time.sleep(
-                    1
-                )  # will help with the integration og the "polling updates" features since mock agent is too fast.
         # if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests
         if os.getenv("IS_MOCK"):
             await upload_artifacts(
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv
new file mode 100644
index 00000000..a52510f1
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv
@@ -0,0 +1,5 @@
+id,name,timestamp
+3,Alice,2023-09-25 14:10:00
+1,Bob,2023-09-24 12:05:00
+2,Charlie,2023-09-24 12:10:00
+4,David,2023-09-26 16:20:00
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv
new file mode 100644
index 00000000..6cac7733
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv
@@ -0,0 +1,5 @@
+id,name,timestamp
+1,Bob,2023-09-24 12:05:00
+2,Charlie,2023-09-24 12:10:00
+3,Alice,2023-09-25 14:10:00
+4,David,2023-09-26 16:20:00
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json
new file mode 100644
index 00000000..8515af89
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestReadFile"
+    ],
+    "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+    "ground": {
+        "answer": "The csv sorted by date",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can sort a csv",
+        "difficulty": "basic",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "SortCsv",
+    "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+}
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv
new file mode 100644
index 00000000..ae4ca502
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv
@@ -0,0 +1,12 @@
+Item
+Banana
+Leaf
+Sky
+Sunflower
+Grass
+Jeans
+Lemon
+Tree
+Ocean
+Daisy
+Fern
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv
new file mode 100644
index 00000000..7b19bce9
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv
@@ -0,0 +1,12 @@
+Item, Color
+Banana, Yellow
+Leaf, Green
+Sky, Blue
+Sunflower, Yellow
+Grass, Green
+Jeans, Blue
+Lemon, Yellow
+Tree, Green
+Ocean, Blue
+Daisy, Yellow
+Fern, Green
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json
new file mode 100644
index 00000000..a024a6eb
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestSortCsv"
+    ],
+    "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+    "ground": {
+        "answer": "The csv labelled",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can sort a csv",
+        "difficulty": "basic",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "LabelData",
+    "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+}
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv
new file mode 100644
index 00000000..fe552d67
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv
@@ -0,0 +1,4 @@
+ID,Name,Age
+101,John,28
+102,Alice,34
+103,Bob,45
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv
new file mode 100644
index 00000000..685e24f4
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv
@@ -0,0 +1,4 @@
+ID,Occupation,Salary
+101,Engineer,80000
+102,Doctor,120000
+103,Lawyer,95000
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv
new file mode 100644
index 00000000..8afe84bf
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv
@@ -0,0 +1,4 @@
+Age,ID,Name,Occupation,Salary
+28,101,John,Engineer,80000
+34,102,Alice,Doctor,120000
+45,103,Bob,Lawyer,95000
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json
new file mode 100644
index 00000000..0739fe03
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestLabelData"
+    ],
+    "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+    "ground": {
+        "answer": "The csv data is combined",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can combine data from a csv",
+        "difficulty": "intermediate",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "CombineCsv",
+    "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv"
+}
diff --git a/benchmark/frontend/public/graph.json b/benchmark/frontend/public/graph.json
index 8116dacf..f5401f48 100644
--- a/benchmark/frontend/public/graph.json
+++ b/benchmark/frontend/public/graph.json
@@ -12,6 +12,12 @@
             "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
         },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
         {
             "arrows": "to",
             "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
@@ -83,6 +89,18 @@
             "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
             "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
         }
     ],
     "nodes": [
@@ -574,6 +592,117 @@
             "label": "RevenueRetrieval2",
             "shape": "dot"
         },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        },
         {
             "color": "grey",
             "data": {
diff --git a/frontend/assets/data_tree_structure.json b/frontend/assets/data_tree_structure.json
index 1f1a2ad1..48432d2d 100644
--- a/frontend/assets/data_tree_structure.json
+++ b/frontend/assets/data_tree_structure.json
@@ -1,4 +1,214 @@
 {
-    "edges": [],
-    "nodes": []
+    "edges": [
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        }
+    ],
+    "nodes": [
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "interface"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestWriteFile"
+                ],
+                "eval_id": "261ccfaa-02a2-4c1a-8a56-c76c66f7dba1",
+                "ground": {
+                    "answer": "The content of output.txt should be 'Hello World!'",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.txt"
+                    ],
+                    "should_contain": [
+                        "Hello World!"
+                    ]
+                },
+                "info": {
+                    "description": "tests the ability for an agent to read a file.",
+                    "difficulty": "interface",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestReadFile",
+                "task": "Read the file called file_to_read.txt and write its content to a file called output.txt"
+            },
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "label": "ReadFile",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "interface"
+                ],
+                "cutoff": 60,
+                "dependencies": [],
+                "eval_id": "81b64bf9-2b6a-4ac8-bcd2-8bfe36244ac0",
+                "ground": {
+                    "answer": "The word 'Washington', printed to a .txt file named anything",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        ".txt"
+                    ],
+                    "should_contain": [
+                        "Washington"
+                    ],
+                    "should_not_contain": []
+                },
+                "info": {
+                    "description": "Tests the agents ability to write to a file",
+                    "difficulty": "interface",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestWriteFile",
+                "task": "Write the word 'Washington' to a .txt file"
+            },
+            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
+            "label": "WriteFile",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        }
+    ]
 }
diff --git a/frontend/assets/tree_structure.json b/frontend/assets/tree_structure.json
index 8116dacf..f5401f48 100644
--- a/frontend/assets/tree_structure.json
+++ b/frontend/assets/tree_structure.json
@@ -12,6 +12,12 @@
             "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
         },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
         {
             "arrows": "to",
             "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
@@ -83,6 +89,18 @@
             "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
             "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
         }
     ],
     "nodes": [
@@ -574,6 +592,117 @@
             "label": "RevenueRetrieval2",
             "shape": "dot"
         },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        },
         {
             "color": "grey",
             "data": {
diff --git a/frontend/pubspec.lock b/frontend/pubspec.lock
index 33d60d70..abadca4a 100644
--- a/frontend/pubspec.lock
+++ b/frontend/pubspec.lock
@@ -45,10 +45,10 @@ packages:
     dependency: "direct main"
     description:
       name: collection
-      sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687
+      sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c"
       url: "https://pub.dev"
     source: hosted
-    version: "1.17.2"
+    version: "1.17.1"
   crypto:
     dependency: transitive
     description:
@@ -292,18 +292,18 @@ packages:
     dependency: transitive
     description:
       name: matcher
-      sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e"
+      sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb"
       url: "https://pub.dev"
     source: hosted
-    version: "0.12.16"
+    version: "0.12.15"
   material_color_utilities:
     dependency: transitive
     description:
       name: material_color_utilities
-      sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41"
+      sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724
       url: "https://pub.dev"
     source: hosted
-    version: "0.5.0"
+    version: "0.2.0"
   meta:
     dependency: transitive
     description:
@@ -449,10 +449,10 @@ packages:
     dependency: transitive
     description:
       name: source_span
-      sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
+      sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250
       url: "https://pub.dev"
     source: hosted
-    version: "1.10.0"
+    version: "1.9.1"
   sprintf:
     dependency: transitive
     description:
@@ -497,10 +497,10 @@ packages:
     dependency: transitive
     description:
       name: test_api
-      sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8"
+      sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb
       url: "https://pub.dev"
     source: hosted
-    version: "0.6.0"
+    version: "0.5.1"
   typed_data:
     dependency: transitive
     description:
@@ -589,14 +589,6 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "2.1.4"
-  web:
-    dependency: transitive
-    description:
-      name: web
-      sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10
-      url: "https://pub.dev"
-    source: hosted
-    version: "0.1.4-beta"
   win32:
     dependency: transitive
     description:
@@ -614,5 +606,5 @@ packages:
     source: hosted
     version: "1.0.3"
 sdks:
-  dart: ">=3.1.0-185.0.dev <4.0.0"
+  dart: ">=3.0.0 <4.0.0"
   flutter: ">=3.10.0"