Add basic memory challenge (#57)

This commit is contained in:
merwanehamadi
2023-07-05 20:32:28 -07:00
committed by GitHub
parent bfc7dfdb29
commit 74fc969dd6
24 changed files with 121 additions and 38 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,5 @@
agbenchmark/mocks/workspace/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@@ -22,7 +22,7 @@ def run_agent(
print("No mock provided")
elif MOCK_FLAG == "True":
mock_manager = MockManager(
task
task, config
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_func)
mock_manager.delegate(mock_func)

View File

@@ -1,5 +1,7 @@
import glob
import inspect
import os
import shutil
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
@@ -45,6 +47,8 @@ class Challenge(ABC):
def setup_challenge(self, config: Dict[str, Any]) -> None:
from agbenchmark.agent_interface import run_agent
self.copy_artifacts_into_workspace(config["workspace"])
run_agent(self.task, self.mock, config)
@property
@@ -124,3 +128,19 @@ class Challenge(ABC):
)
return 1.0
def copy_artifacts_into_workspace(self, workspace: str) -> None:
curr_frame = inspect.currentframe()
outer_frame = inspect.getouterframes(curr_frame)[2]
caller_file_path = outer_frame.filename
caller_dir_path = os.path.dirname(os.path.abspath(caller_file_path))
source_dir = os.path.join(caller_dir_path, "artifacts")
# Check if source_dir exists, if not then return immediately.
if not os.path.exists(source_dir):
return
for file_name in os.listdir(source_dir):
full_file_name = os.path.join(source_dir, file_name)
if os.path.isfile(full_file_name):
shutil.copy(full_file_name, workspace)

View File

@@ -0,0 +1,2 @@
The id to remember is 2314
Read the file instructions_2.txt

View File

@@ -0,0 +1 @@
Read the file instructions_3.txt

View File

@@ -0,0 +1 @@
Read the file instructions_4.txt

View File

@@ -0,0 +1 @@
Read the file instructions_5.txt

View File

@@ -0,0 +1 @@
Write the id previously mentioned to a .txt file.

View File

@@ -0,0 +1,21 @@
{
"name": "basic_memory",
"category": ["memory"],
"task": "Follow the instructions in the instructions_1.txt file",
"dependencies": [],
"ground": {
"answer": "2314",
"should_contain": ["2314"],
"should_not_contain": [],
"files": ["file_to_check.txt"]
},
"mock": {
"mock_func": "basic_memory_mock",
"mock_task": "Follow the instructions in the instructions_1.txt file"
},
"info": {
"difficulty": "basic",
"description": "Tests ability for the agent to remember information between each action. An id is presented initially and the agent has to remember it after reading 4 other files",
"side_effects": ["tests if there is in fact an LLM attached"]
}
}

View File

@@ -0,0 +1,27 @@
import os
from typing import Any, Dict
import pytest
from agbenchmark.challenges.memory.memory import MemoryChallenge
class TestBasicMemory(MemoryChallenge):
"""The first memory challenge"""
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "m1_data.json")
@pytest.mark.depends(name="test_basic_memory")
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:
score = self.scoring(file_content, self.data.ground)
print("Your score is:", score)
scores.append(score)
assert 1 in scores

View File

@@ -0,0 +1,8 @@
import pytest
from agbenchmark.challenge import Challenge
@pytest.mark.memory
class MemoryChallenge(Challenge):
"""Challenge for memory"""

View File

@@ -1,5 +1,4 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
@@ -13,12 +12,11 @@ class TestRetrieval(RetrievalChallenge):
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r1_data.json")
@pytest.mark.depends(on=["basic_write_file"], name="test_retrieval")
@pytest.mark.depends(name="test_retrieval")
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
workspace = Path(os.getcwd()) / config["workspace"]
files_contents = self.open_files(workspace, self.data.ground.files)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -1,5 +1,4 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
@@ -17,8 +16,7 @@ class TestRetrieval2(RetrievalChallenge):
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
workspace = Path(os.getcwd()) / config["workspace"]
files_contents = self.open_files(workspace, self.data.ground.files)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -1,5 +1,4 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
@@ -17,8 +16,7 @@ class TestRetrieval3(RetrievalChallenge):
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
workspace = Path(os.getcwd()) / config["workspace"]
files_contents = self.open_files(workspace, self.data.ground.files)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -31,14 +31,13 @@ def config(request: Any) -> None:
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
if request.config.getoption("--mock"):
config["workspace"] = "agbenchmark/mocks/workspace"
elif config.get("workspace", "").startswith("${") and config.get(
if config.get("workspace", "").startswith("${") and config.get(
"workspace", ""
).endswith("}"):
path = get_dynamic_workspace(config)
config["workspace"] = path
else:
config["workspace"] = Path(os.getcwd()) / config["workspace"]
return config

View File

@@ -1,13 +1,13 @@
from typing import Any
from typing import Any, Dict
import agbenchmark.mocks.tests.basic_mocks as basic_mocks
import agbenchmark.mocks.tests.retrieval_mocks as retrieval_mocks
class MockManager:
def __init__(self, task: str):
def __init__(self, task: str, config: Dict[str, Any]) -> None:
self.task = task
self.workspace = "agbenchmark/mocks/workspace"
self.workspace = config["workspace"]
self.modules = [basic_mocks, retrieval_mocks]
def delegate(self, mock_function_name: Any, *args: Any, **kwargs: Any) -> None:

View File

@@ -55,3 +55,14 @@ def basic_retrieval_3_mock(task: str, workspace: str) -> None:
"file_to_check.txt",
"15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
)
def basic_memory_mock(task: str, workspace: str) -> None:
"""
This mock writes to a file (creates one if it doesn't exist)
"""
Challenge.write_to_file(
workspace,
"file_to_check.txt",
"2314",
)

View File

@@ -56,8 +56,6 @@ def start(category: str, reg: bool, mock: bool) -> int:
config = json.load(f)
set_key(".env", "MOCK_TEST", "True" if mock else "False")
if mock:
config["workspace"] = "agbenchmark/mocks/workspace"
# create workspace directory if it doesn't exist
workspace_path = os.path.abspath(config["workspace"])

View File

@@ -0,0 +1 @@
Hello World!

View File

@@ -4,8 +4,8 @@
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
"dependencies": ["basic_write_file"],
"ground": {
"answer": "random string: this is how we're doing",
"should_contain": ["random string: this is how we're doing"],
"answer": "random string: Hello World!",
"should_contain": ["random string: Hello World!"],
"files": ["file_to_check.txt"]
},
"mock": {

View File

@@ -3,19 +3,12 @@ from typing import Any, Dict
import pytest
from agbenchmark.challenge import Challenge
from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge
class TestReadFile(BasicChallenge):
"""Testing if LLM can read a file"""
@pytest.fixture(scope="module", autouse=True)
def setup_module(self, workspace: str) -> None:
Challenge.write_to_file(
workspace, self.data.ground.files[0], "this is how we're doing"
)
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r_file_data.json")

View File

@@ -1,5 +1,4 @@
import os
from pathlib import Path
from typing import Any, Dict
import pytest
@@ -17,8 +16,7 @@ class TestWriteFile(BasicChallenge):
def test_method(self, config: Dict[str, Any]) -> None:
self.setup_challenge(config)
workspace = Path(os.getcwd()) / config["workspace"]
files_contents = self.open_files(workspace, self.data.ground.files)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -1,4 +1,9 @@
{
"TestBasicMemory": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/memory/m1/m1_test.py"
},
"TestRetrieval": {
"difficulty": "basic",
"dependencies": [],
@@ -9,6 +14,11 @@
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
},
"TestRetrieval2": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
@@ -16,14 +26,9 @@
],
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
},
"TestRetrieval2": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
},
"TestRetrieval3": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
}
}
}