From b8830f86256ce54c990fc4bd4a0fe2ac7389cdbd Mon Sep 17 00:00:00 2001 From: Silen Naihin Date: Sun, 9 Jul 2023 21:33:08 -0400 Subject: [PATCH] Adding search interface challenge and cleaning repo (#80) --- agbenchmark/challenges/adaptability/a1_test.py | 0 .../challenges/interface/browse_test.py | 0 .../search/artifacts_out/random_file.txt | 2 ++ .../challenges/interface/search/data.json | 18 ++++++++++++++++++ agbenchmark/challenges/retrieval/r1/data.json | 2 +- .../challenges/web_navigation/wn1_test.py | 0 agbenchmark/challenges/writing/w1_test.py | 0 regression_tests.json | 5 +++++ 8 files changed, 26 insertions(+), 1 deletion(-) delete mode 100644 agbenchmark/challenges/adaptability/a1_test.py delete mode 100644 agbenchmark/challenges/interface/browse_test.py create mode 100644 agbenchmark/challenges/interface/search/artifacts_out/random_file.txt create mode 100644 agbenchmark/challenges/interface/search/data.json delete mode 100644 agbenchmark/challenges/web_navigation/wn1_test.py delete mode 100644 agbenchmark/challenges/writing/w1_test.py diff --git a/agbenchmark/challenges/adaptability/a1_test.py b/agbenchmark/challenges/adaptability/a1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/interface/browse_test.py b/agbenchmark/challenges/interface/browse_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt b/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt new file mode 100644 index 00000000..03566759 --- /dev/null +++ b/agbenchmark/challenges/interface/search/artifacts_out/random_file.txt @@ -0,0 +1,2 @@ +This is a Heading +This is a paragraph. \ No newline at end of file diff --git a/agbenchmark/challenges/interface/search/data.json b/agbenchmark/challenges/interface/search/data.json new file mode 100644 index 00000000..17ee1ac1 --- /dev/null +++ b/agbenchmark/challenges/interface/search/data.json @@ -0,0 +1,18 @@ +{ + "name": "TestSearch", + "category": ["interface"], + "task": "Open 'https://silennaihin.com/random/plain.html' and paste the text on the page in a .txt file", + "dependencies": [], + "ground": { + "answer": "This is a Heading\nThis is a paragraph.", + "should_contain": ["Heading", "paragraph"], + "should_not_contain": ["The", "the"], + "files": [".txt"], + "type": "file" + }, + "info": { + "difficulty": "basic", + "description": "Tests if an llm can search", + "side_effects": [""] + } +} diff --git a/agbenchmark/challenges/retrieval/r1/data.json b/agbenchmark/challenges/retrieval/r1/data.json index 7812c21d..4f3833df 100644 --- a/agbenchmark/challenges/retrieval/r1/data.json +++ b/agbenchmark/challenges/retrieval/r1/data.json @@ -2,7 +2,7 @@ "name": "TestBasicRetrieval", "category": ["retrieval"], "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.", - "dependencies": ["TestWriteFile"], + "dependencies": ["TestWriteFile", "TestSearch"], "ground": { "answer": "£25.89", "should_contain": ["25.89"], diff --git a/agbenchmark/challenges/web_navigation/wn1_test.py b/agbenchmark/challenges/web_navigation/wn1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/agbenchmark/challenges/writing/w1_test.py b/agbenchmark/challenges/writing/w1_test.py deleted file mode 100644 index e69de29b..00000000 diff --git a/regression_tests.json b/regression_tests.json index 61320791..10a6e11b 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -60,5 +60,10 @@ "TestRememberMultipleIdsWithNoise" ], "test": "agbenchmark\\challenges\\memory\\m4" + }, + "TestSearch": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark\\challenges\\interface\\search" } } \ No newline at end of file