diff --git a/agbenchmark/challenges/memory/m1/artifacts_out/random_file.txt b/agbenchmark/challenges/memory/m1/artifacts_out/random_file.txt new file mode 100644 index 00000000..86be9d15 --- /dev/null +++ b/agbenchmark/challenges/memory/m1/artifacts_out/random_file.txt @@ -0,0 +1 @@ +2314 diff --git a/agbenchmark/challenges/memory/m2/artifacts_out/random_file.txt b/agbenchmark/challenges/memory/m2/artifacts_out/random_file.txt new file mode 100644 index 00000000..7d48aaf1 --- /dev/null +++ b/agbenchmark/challenges/memory/m2/artifacts_out/random_file.txt @@ -0,0 +1,4 @@ +3145 +3791 +9317 +9471 diff --git a/agbenchmark/challenges/memory/m3/artifacts_out/random_file.txt b/agbenchmark/challenges/memory/m3/artifacts_out/random_file.txt new file mode 100644 index 00000000..7d48aaf1 --- /dev/null +++ b/agbenchmark/challenges/memory/m3/artifacts_out/random_file.txt @@ -0,0 +1,4 @@ +3145 +3791 +9317 +9471 diff --git a/agbenchmark/challenges/memory/m4/artifacts_out/random_file.txt b/agbenchmark/challenges/memory/m4/artifacts_out/random_file.txt new file mode 100644 index 00000000..9b8405bf --- /dev/null +++ b/agbenchmark/challenges/memory/m4/artifacts_out/random_file.txt @@ -0,0 +1,4 @@ +The purple elephant danced on a rainbow while eating a taco +The sneaky toaster stole my socks and ran away to Hawaii +My pet rock sings better than Beyoncé on Tuesdays +The giant hamster rode a unicycle through the crowded mall diff --git a/agbenchmark/challenges/retrieval/r1/artifacts_out/random_file.txt b/agbenchmark/challenges/retrieval/r1/artifacts_out/random_file.txt new file mode 100644 index 00000000..f558a0f9 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r1/artifacts_out/random_file.txt @@ -0,0 +1 @@ +25.89 diff --git a/agbenchmark/challenges/retrieval/r2/artifacts_out/random_file.txt b/agbenchmark/challenges/retrieval/r2/artifacts_out/random_file.txt new file mode 100644 index 00000000..8a0eae04 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r2/artifacts_out/random_file.txt @@ -0,0 +1 @@ +81,462 Millions diff --git a/agbenchmark/challenges/retrieval/r3/artifacts_out/random_file.txt b/agbenchmark/challenges/retrieval/r3/artifacts_out/random_file.txt new file mode 100644 index 00000000..d8d5bd16 --- /dev/null +++ b/agbenchmark/challenges/retrieval/r3/artifacts_out/random_file.txt @@ -0,0 +1,15 @@ +15 Millions +112 Millions +117 Millions +204 Millions +413 Millions +2,014 Millions +3,198 Millions +4,046 Millions +7,000 Millions +11,759 Millions +21,461 Millions +24,578 Millions +31,536 Millions +53,823 Millions +81,462 Millions diff --git a/agbenchmark/mocks/mock_manager.py b/agbenchmark/mocks/mock_manager.py index 57c03405..3a227e49 100644 --- a/agbenchmark/mocks/mock_manager.py +++ b/agbenchmark/mocks/mock_manager.py @@ -1,14 +1,13 @@ from typing import Any, Dict, Optional import agbenchmark.mocks.tests.basic_mocks as basic_mocks -import agbenchmark.mocks.tests.retrieval_mocks as retrieval_mocks class MockManager: def __init__(self, task: Optional[str], config: Dict[str, Any]) -> None: self.task = task self.workspace = config["workspace"] - self.modules = [basic_mocks, retrieval_mocks] + self.modules = [basic_mocks] def delegate(self, mock_function_name: Any, *args: Any, **kwargs: Any) -> None: if hasattr(self, mock_function_name): diff --git a/agbenchmark/mocks/tests/basic_mocks.py b/agbenchmark/mocks/tests/basic_mocks.py index 32149eb8..e4a1dedc 100644 --- a/agbenchmark/mocks/tests/basic_mocks.py +++ b/agbenchmark/mocks/tests/basic_mocks.py @@ -1,78 +1,12 @@ from agbenchmark.challenge import Challenge -def basic_write_file_mock(task: str, workspace: str) -> None: +def example_mock(task: str, workspace: str) -> None: """ This mock writes to a file (creates one if it doesn't exist) """ Challenge.write_to_file( workspace, "file_to_check.txt", - "Washington DC is the capital of the United States of America", - ) - - -def basic_retrieval_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "25.89", - ) - - -def basic_retrieval_2_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "81,462", - ) - - -def basic_retrieval_3_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions", - ) - - -def basic_memory_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "2314", - ) - - -def remember_multiple_ids_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "3145\n3791\n9317\n9471", - ) - - -def remember_multiple_phrases_with_noise_mock(task: str, workspace: str) -> None: - """ - This mock writes to a file (creates one if it doesn't exist) - """ - Challenge.write_to_file( - workspace, - "file_to_check.txt", - "The purple elephant danced on a rainbow while eating a taco\nThe sneaky toaster stole my socks and ran away to Hawaii\nMy pet rock sings better than Beyoncé on Tuesdays\nThe giant hamster rode a unicycle through the crowded mall", + "This is an example showing how you can use mocks but here you can use artifacts_out folder instead of a mock.", ) diff --git a/agbenchmark/mocks/tests/retrieval_mocks.py b/agbenchmark/mocks/tests/retrieval_mocks.py deleted file mode 100644 index 9a8a57db..00000000 --- a/agbenchmark/mocks/tests/retrieval_mocks.py +++ /dev/null @@ -1,5 +0,0 @@ -# TODO: Make it so that you can specify for tests to only run if their prerequisites are met. -# Prerequisites here would be writing to a file (basic_abilities test). -# Should also check if prerequisites exists in regression file -def retrieval_1_mock(task: str, workspace: str) -> None: - pass diff --git a/agbenchmark/tests/basic_abilities/write_file/artifacts_out/random_file.txt b/agbenchmark/tests/basic_abilities/write_file/artifacts_out/random_file.txt new file mode 100644 index 00000000..1f275fb9 --- /dev/null +++ b/agbenchmark/tests/basic_abilities/write_file/artifacts_out/random_file.txt @@ -0,0 +1 @@ +Washington diff --git a/regression_tests.json b/regression_tests.json index 59a9694b..9714d42a 100644 --- a/regression_tests.json +++ b/regression_tests.json @@ -4,6 +4,11 @@ "dependencies": [], "test": "agbenchmark/challenges/code/d1/debug_simple_typo_with_guidance_test.py" }, + "TestDebugSimpleTypoWithoutGuidance": { + "difficulty": "basic", + "dependencies": [], + "test": "agbenchmark/challenges/code/d2/d2_test.py" + }, "TestBasicMemory": { "difficulty": "basic", "dependencies": [], @@ -19,11 +24,6 @@ "dependencies": [], "test": "agbenchmark/challenges/memory/m3/remember_multiple_ids_with_noise_test.py" }, - "TestRememberMultiplePhrasesWithNoise": { - "difficulty": "medium", - "dependencies": [], - "test": "agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py" - }, "TestRetrieval": { "difficulty": "basic", "dependencies": [], @@ -39,11 +39,6 @@ "dependencies": [], "test": "agbenchmark/challenges/retrieval/r2/r2_test.py" }, - "TestRetrieval3": { - "difficulty": "basic", - "dependencies": [], - "test": "agbenchmark/challenges/retrieval/r3/r3_test.py" - }, "TestReadFile": { "difficulty": "basic", "dependencies": [ @@ -51,9 +46,14 @@ ], "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py" }, - "TestDebugSimpleTypoWithoutGuidance": { + "TestRetrieval3": { "difficulty": "basic", "dependencies": [], - "test": "agbenchmark/challenges/code/d2/d2_test.py" + "test": "agbenchmark/challenges/retrieval/r3/r3_test.py" + }, + "TestRememberMultiplePhrasesWithNoise": { + "difficulty": "medium", + "dependencies": [], + "test": "agbenchmark/challenges/memory/m4/remember_multiple_phrases_with_noise_test.py" } } \ No newline at end of file