Add information retrieval 3 (#54)

Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
2026-02-20 05:34:25 +01:00 · 2023-07-04 17:41:13 -07:00
parent cef259c945
commit e3c453f10e
6 changed files with 132 additions and 0 deletions
--- a/agbenchmark/challenges/retrieval/r2/r2_data.json
+++ b/agbenchmark/challenges/retrieval/r2/r2_data.json
@@ -0,0 +1,21 @@
+{
+  "name": "basic_information_retrieval",
+  "category": ["retrieval"],
+  "task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
+  "dependencies": [],
+  "ground": {
+    "answer": "81,462",
+    "should_contain": ["81,462"],
+    "should_not_contain": [],
+    "files": [".txt"]
+  },
+  "mock": {
+    "mock_func": "basic_retrieval_2_mock",
+    "mock_task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)."
+  },
+  "info": {
+    "difficulty": "basic",
+    "description": "Tests ability to retrieve information.",
+    "side_effects": ["tests if there is in fact an LLM attached"]
+  }
+}
--- a/agbenchmark/challenges/retrieval/r2/r2_test.py
+++ b/agbenchmark/challenges/retrieval/r2/r2_test.py
@@ -0,0 +1,29 @@
+import os
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
+
+
+class TestRetrieval2(RetrievalChallenge):
+    """The first information-retrieval challenge"""
+
+    def get_file_path(self) -> str:  # all tests must implement this method
+        return os.path.join(os.path.dirname(__file__), "r2_data.json")
+
+    @pytest.mark.depends(on=["basic_write_file"])
+    def test_method(self, config: Dict[str, Any]) -> None:
+        self.setup_challenge(config)
+
+        workspace = Path(os.getcwd()) / config["workspace"]
+        files_contents = self.open_files(workspace, self.data.ground.files)
+
+        scores = []
+        for file_content in files_contents:
+            score = self.scoring(file_content, self.data.ground)
+            print("Your score is:", score)
+            scores.append(score)
+
+        assert 1 in scores
--- a/agbenchmark/challenges/retrieval/r3/r3_data.json
+++ b/agbenchmark/challenges/retrieval/r3/r3_data.json
@@ -0,0 +1,21 @@
+{
+  "name": "basic_information_retrieval",
+  "category": ["retrieval"],
+  "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
+  "dependencies": [],
+  "ground": {
+    "answer": "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
+    "should_contain": ["15", "112", "117", "204", "413", "2,014", "3,198", "4,046", "7,000", "11,759", "21,461", "24,578", "31,536", "53,823", "81,462"],
+    "should_not_contain": [],
+    "files": [".txt"]
+  },
+  "mock": {
+    "mock_func": "basic_retrieval_3_mock",
+    "mock_task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)."
+  },
+  "info": {
+    "difficulty": "basic",
+    "description": "Tests ability to retrieve information.",
+    "side_effects": ["tests if there is in fact an LLM attached"]
+  }
+}
--- a/agbenchmark/challenges/retrieval/r3/r3_test.py
+++ b/agbenchmark/challenges/retrieval/r3/r3_test.py
@@ -0,0 +1,29 @@
+import os
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge
+
+
+class TestRetrieval3(RetrievalChallenge):
+    """The first information-retrieval challenge"""
+
+    def get_file_path(self) -> str:  # all tests must implement this method
+        return os.path.join(os.path.dirname(__file__), "r3_data.json")
+
+    @pytest.mark.depends(on=["basic_write_file"])
+    def test_method(self, config: Dict[str, Any]) -> None:
+        self.setup_challenge(config)
+
+        workspace = Path(os.getcwd()) / config["workspace"]
+        files_contents = self.open_files(workspace, self.data.ground.files)
+
+        scores = []
+        for file_content in files_contents:
+            score = self.scoring(file_content, self.data.ground)
+            print("Your score is:", score)
+            scores.append(score)
+
+        assert 1 in scores
--- a/agbenchmark/mocks/tests/basic_mocks.py
+++ b/agbenchmark/mocks/tests/basic_mocks.py
@@ -33,3 +33,25 @@ def basic_retrieval_mock(task: str, workspace: str) -> None:
        "file_to_check.txt",
        "25.89",
    )
+
+
+def basic_retrieval_2_mock(task: str, workspace: str) -> None:
+    """
+    This mock writes to a file (creates one if it doesn't exist)
+    """
+    Challenge.write_to_file(
+        workspace,
+        "file_to_check.txt",
+        "81,462",
+    )
+
+
+def basic_retrieval_3_mock(task: str, workspace: str) -> None:
+    """
+    This mock writes to a file (creates one if it doesn't exist)
+    """
+    Challenge.write_to_file(
+        workspace,
+        "file_to_check.txt",
+        "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
+    )
--- a/regression_tests.json
+++ b/regression_tests.json
@@ -15,5 +15,15 @@
            "basic_write_file"
        ],
        "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
+    },
+    "TestRetrieval2": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
+    },
+    "TestRetrieval3": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
    }
 }