diff --git a/agbenchmark/challenges/memory/m2/artifacts/instructions_1.txt b/agbenchmark/challenges/memory/m2/artifacts/instructions_1.txt new file mode 100644 index 00000000..d304ce63 --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts/instructions_1.txt @@ -0,0 +1 @@ +The id to remember is 3145. Read the file instructions_2.txt. diff --git a/agbenchmark/challenges/memory/m2/artifacts/instructions_2.txt b/agbenchmark/challenges/memory/m2/artifacts/instructions_2.txt new file mode 100644 index 00000000..cfff66ba --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts/instructions_2.txt @@ -0,0 +1 @@ +The id to remember is 3791. Read the file instructions_3.txt. diff --git a/agbenchmark/challenges/memory/m2/artifacts/instructions_3.txt b/agbenchmark/challenges/memory/m2/artifacts/instructions_3.txt new file mode 100644 index 00000000..ad16b6fd --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts/instructions_3.txt @@ -0,0 +1 @@ +The id to remember is 9317. Read the file instructions_4.txt. diff --git a/agbenchmark/challenges/memory/m2/artifacts/instructions_4.txt b/agbenchmark/challenges/memory/m2/artifacts/instructions_4.txt new file mode 100644 index 00000000..2394d64b --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts/instructions_4.txt @@ -0,0 +1 @@ +The id to remember is 9471. Read the file instructions_5.txt. diff --git a/agbenchmark/challenges/memory/m2/artifacts/instructions_5.txt b/agbenchmark/challenges/memory/m2/artifacts/instructions_5.txt new file mode 100644 index 00000000..92a6fba9 --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts/instructions_5.txt @@ -0,0 +1 @@ +Write the ids previously mentioned to a file called file_to_check.txt. diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_data.json b/agbenchmark/challenges/memory/m2/remember_multiple_ids_data.json new file mode 100644 index 00000000..374df616 --- /dev/null +++ b/agbenchmark/challenges/memory/m2/remember_multiple_ids_data.json @@ -0,0 +1,21 @@ +{ + "name": "remember_multiple_ids", + "category": ["memory"], + "task": "Follow the instructions in the instructions_1.txt file", + "dependencies": [], + "ground": { + "answer": "3145\n3791\n9317\n9471", + "should_contain": ["3145", "3791", "9317", "9471"], + "should_not_contain": [], + "files": ["file_to_check.txt"] + }, + "mock": { + "mock_func": "remember_multiple_ids_mock", + "mock_task": "Follow the instructions in the instructions_1.txt file" + }, + "info": { + "difficulty": "basic", + "description": "Tests ability for the agent to remember information between each action. Multiple ids are presented in different files and the last file instructs the agent to write these ids into another file.", + "side_effects": ["tests if there is in fact an LLM attached"] + } +} diff --git a/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py new file mode 100644 index 00000000..d5f0cf1a --- /dev/null +++ b/agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py @@ -0,0 +1,31 @@ +import os +from typing import Any, Dict + +import pytest + +from agbenchmark.challenges.memory.memory import MemoryChallenge + + +class TestRememberMultipleIds(MemoryChallenge): + """The first memory challenge""" + + def get_file_path(self) -> str: # all tests must implement this method + return os.path.join( + os.path.dirname(__file__), "remember_multiple_ids_data.json" + ) + + @pytest.mark.depends( + name="test_remember_multiple_ids", depends=["test_basic_memory"] + ) + def test_method(self, config: Dict[str, Any]) -> None: + self.setup_challenge(config) + + files_contents = self.open_files(config["workspace"], self.data.ground.files) + + scores = [] + for file_content in files_contents: + score = self.scoring(file_content, self.data.ground) + print("Your score is:", score) + scores.append(score) + + assert 1 in scores diff --git a/agbenchmark/mocks/tests/basic_mocks.py b/agbenchmark/mocks/tests/basic_mocks.py index 3b9170f4..1ffb3de3 100644 --- a/agbenchmark/mocks/tests/basic_mocks.py +++ b/agbenchmark/mocks/tests/basic_mocks.py @@ -66,3 +66,14 @@ def basic_memory_mock(task: str, workspace: str) -> None: "file_to_check.txt", "2314", ) + + +def remember_multiple_ids_mock(task: str, workspace: str) -> None: + """ + This mock writes to a file (creates one if it doesn't exist) + """ + Challenge.write_to_file( + workspace, + "file_to_check.txt", + "3145\n3791\n9317\n9471", + ) diff --git a/regression_tests.json b/regression_tests.json index cfa4bda3..9742aa47 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -30,5 +30,10 @@ "difficulty": "basic", "dependencies": [], "test": "agbenchmark/challenges/retrieval/r3/r3_test.py" + }, + "TestRememberMultipleIds": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/memory/m2/remember_multiple_ids_test.py" } } \ No newline at end of file