diff --git a/agbenchmark/challenges/retrieval/r2/r2_data.json b/agbenchmark/challenges/retrieval/r2/r2_data.json new file mode 100644 index 00000000..925e6db8 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r2/r2_data.json @@ -0,0 +1,21 @@ +{ + "name": "basic_information_retrieval", + "category": ["retrieval"], + "task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).", + "dependencies": [], + "ground": { + "answer": "81,462", + "should_contain": ["81,462"], + "should_not_contain": [], + "files": [".txt"] + }, + "mock": { + "mock_func": "basic_retrieval_2_mock", + "mock_task": "Write tesla's revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)." + }, + "info": { + "difficulty": "basic", + "description": "Tests ability to retrieve information.", + "side_effects": ["tests if there is in fact an LLM attached"] + } +} diff --git a/agbenchmark/challenges/retrieval/r2/r2_test.py b/agbenchmark/challenges/retrieval/r2/r2_test.py new file mode 100644 index 00000000..bdc73886 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r2/r2_test.py @@ -0,0 +1,29 @@ +import os +from pathlib import Path +from typing import Any, Dict + +import pytest + +from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge + + +class TestRetrieval2(RetrievalChallenge): + """The first information-retrieval challenge""" + + def get_file_path(self) -> str: # all tests must implement this method + return os.path.join(os.path.dirname(__file__), "r2_data.json") + + @pytest.mark.depends(on=["basic_write_file"]) + def test_method(self, config: Dict[str, Any]) -> None: + self.setup_challenge(config) + + workspace = Path(os.getcwd()) / config["workspace"] + files_contents = self.open_files(workspace, self.data.ground.files) + + scores = [] + for file_content in files_contents: + score = self.scoring(file_content, self.data.ground) + print("Your score is:", score) + scores.append(score) + + assert 1 in scores diff --git a/agbenchmark/challenges/retrieval/r3/r3_data.json b/agbenchmark/challenges/retrieval/r3/r3_data.json new file mode 100644 index 00000000..183529c4 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r3/r3_data.json @@ -0,0 +1,21 @@ +{ + "name": "basic_information_retrieval", + "category": ["retrieval"], + "task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).", + "dependencies": [], + "ground": { + "answer": "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions", + "should_contain": ["15", "112", "117", "204", "413", "2,014", "3,198", "4,046", "7,000", "11,759", "21,461", "24,578", "31,536", "53,823", "81,462"], + "should_not_contain": [], + "files": [".txt"] + }, + "mock": { + "mock_func": "basic_retrieval_3_mock", + "mock_task": "Write tesla's revenue every year since its creation into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion)." + }, + "info": { + "difficulty": "basic", + "description": "Tests ability to retrieve information.", + "side_effects": ["tests if there is in fact an LLM attached"] + } +} diff --git a/agbenchmark/challenges/retrieval/r3/r3_test.py b/agbenchmark/challenges/retrieval/r3/r3_test.py new file mode 100644 index 00000000..36382b69 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r3/r3_test.py @@ -0,0 +1,29 @@ +import os +from pathlib import Path +from typing import Any, Dict + +import pytest + +from agbenchmark.challenges.retrieval.retrieval import RetrievalChallenge + + +class TestRetrieval3(RetrievalChallenge): + """The first information-retrieval challenge""" + + def get_file_path(self) -> str: # all tests must implement this method + return os.path.join(os.path.dirname(__file__), "r3_data.json") + + @pytest.mark.depends(on=["basic_write_file"]) + def test_method(self, config: Dict[str, Any]) -> None: + self.setup_challenge(config) + + workspace = Path(os.getcwd()) / config["workspace"] + files_contents = self.open_files(workspace, self.data.ground.files) + + scores = [] + for file_content in files_contents: + score = self.scoring(file_content, self.data.ground) + print("Your score is:", score) + scores.append(score) + + assert 1 in scores diff --git a/agbenchmark/mocks/tests/basic_mocks.py b/agbenchmark/mocks/tests/basic_mocks.py index 07d8a6de..882e3c82 100644 --- a/agbenchmark/mocks/tests/basic_mocks.py +++ b/agbenchmark/mocks/tests/basic_mocks.py @@ -33,3 +33,25 @@ def basic_retrieval_mock(task: str, workspace: str) -> None: "file_to_check.txt", "25.89", ) + + +def basic_retrieval_2_mock(task: str, workspace: str) -> None: + """ + This mock writes to a file (creates one if it doesn't exist) + """ + Challenge.write_to_file( + workspace, + "file_to_check.txt", + "81,462", + ) + + +def basic_retrieval_3_mock(task: str, workspace: str) -> None: + """ + This mock writes to a file (creates one if it doesn't exist) + """ + Challenge.write_to_file( + workspace, + "file_to_check.txt", + "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions", + ) diff --git a/regression_tests.json b/regression_tests.json index 9b998d11..853c38dc 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -15,5 +15,15 @@ "basic_write_file" ], "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py" + }, + "TestRetrieval2": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/retrieval/r2/r2_test.py" + }, + "TestRetrieval3": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/retrieval/r3/r3_test.py" } } \ No newline at end of file