diff --git a/agbenchmark/challenges/adaptability/a1_test.py b/agbenchmark/challenges/adaptability/a1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/interface/browse_test.py b/agbenchmark/challenges/interface/browse_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt b/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt new file mode 100644 index 00000000..03566759 --- /dev/null +++ b/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt @@ -0,0 +1,2 @@ +This is a Heading +This is a paragraph. \ No newline at end of file diff --git a/agbenchmark/challenges/interface/search/data.json b/agbenchmark/challenges/interface/search/data.json new file mode 100644 index 00000000..17ee1ac1 --- /dev/null +++ b/agbenchmark/challenges/interface/search/data.json @@ -0,0 +1,18 @@ +{ + "name": "TestSearch", + "category": ["interface"], + "task": "Open 'https://silennaihin.com/random/plain.html' and paste the text on the page in a .txt file", + "dependencies": [], + "ground": { + "answer": "This is a Heading\nThis is a paragraph.", + "should_contain": ["Heading", "paragraph"], + "should_not_contain": ["The", "the"], + "files": [".txt"], + "type": "file" + }, + "info": { + "difficulty": "basic", + "description": "Tests if an llm can search", + "side_effects": [""] + } +} diff --git a/agbenchmark/challenges/retrieval/r1/data.json b/agbenchmark/challenges/retrieval/r1/data.json index 7812c21d..4f3833df 100644 --- a/agbenchmark/challenges/retrieval/r1/data.json +++ b/agbenchmark/challenges/retrieval/r1/data.json @@ -2,7 +2,7 @@ "name": "TestBasicRetrieval", "category": ["retrieval"], "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.", - "dependencies": ["TestWriteFile"], + "dependencies": ["TestWriteFile", "TestSearch"], "ground": { "answer": "£25.89", "should_contain": ["25.89"], diff --git a/agbenchmark/challenges/web_navigation/wn1_test.py b/agbenchmark/challenges/web_navigation/wn1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/writing/w1_test.py b/agbenchmark/challenges/writing/w1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/regression_tests.json b/regression_tests.json index 61320791..10a6e11b 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -60,5 +60,10 @@ "TestRememberMultipleIdsWithNoise" ], "test": "agbenchmark\\challenges\\memory\\m4" + }, + "TestSearch": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark\\challenges\\interface\\search" } } \ No newline at end of file