mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-25 01:44:28 +01:00
Add basic memory challenge (#57)
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
agbenchmark/mocks/workspace/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -22,7 +22,7 @@ def run_agent(
|
||||
print("No mock provided")
|
||||
elif MOCK_FLAG == "True":
|
||||
mock_manager = MockManager(
|
||||
task
|
||||
task, config
|
||||
) # workspace doesn't need to be passed in, stays the same
|
||||
print("Server unavailable, using mock", mock_func)
|
||||
mock_manager.delegate(mock_func)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import glob
|
||||
import inspect
|
||||
import os
|
||||
import shutil
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -45,6 +47,8 @@ class Challenge(ABC):
|
||||
def setup_challenge(self, config: Dict[str, Any]) -> None:
|
||||
from agbenchmark.agent_interface import run_agent
|
||||
|
||||
self.copy_artifacts_into_workspace(config["workspace"])
|
||||
|
||||
run_agent(self.task, self.mock, config)
|
||||
|
||||
@property
|
||||
@@ -124,3 +128,19 @@ class Challenge(ABC):
|
||||
)
|
||||
|
||||
return 1.0
|
||||
|
||||
def copy_artifacts_into_workspace(self, workspace: str) -> None:
|
||||
curr_frame = inspect.currentframe()
|
||||
outer_frame = inspect.getouterframes(curr_frame)[2]
|
||||
caller_file_path = outer_frame.filename
|
||||
caller_dir_path = os.path.dirname(os.path.abspath(caller_file_path))
|
||||
source_dir = os.path.join(caller_dir_path, "artifacts")
|
||||
|
||||
# Check if source_dir exists, if not then return immediately.
|
||||
if not os.path.exists(source_dir):
|
||||
return
|
||||
|
||||
for file_name in os.listdir(source_dir):
|
||||
full_file_name = os.path.join(source_dir, file_name)
|
||||
if os.path.isfile(full_file_name):
|
||||
shutil.copy(full_file_name, workspace)
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
The id to remember is 2314
|
||||
Read the file instructions_2.txt
|
||||
@@ -0,0 +1 @@
|
||||
Read the file instructions_3.txt
|
||||
@@ -0,0 +1 @@
|
||||
Read the file instructions_4.txt
|
||||
@@ -0,0 +1 @@
|
||||
Read the file instructions_5.txt
|
||||
@@ -0,0 +1 @@
|
||||
Write the id previously mentioned to a .txt file.
|
||||
21
agbenchmark/challenges/memory/m1/m1_data.json
Normal file
21
agbenchmark/challenges/memory/m1/m1_data.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "basic_memory",
|
||||
"category": ["memory"],
|
||||
"task": "Follow the instructions in the instructions_1.txt file",
|
||||
"dependencies": [],
|
||||
"ground": {
|
||||
"answer": "2314",
|
||||
"should_contain": ["2314"],
|
||||
"should_not_contain": [],
|
||||
"files": ["file_to_check.txt"]
|
||||
},
|
||||
"mock": {
|
||||
"mock_func": "basic_memory_mock",
|
||||
"mock_task": "Follow the instructions in the instructions_1.txt file"
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "basic",
|
||||
"description": "Tests ability for the agent to remember information between each action. An id is presented initially and the agent has to remember it after reading 4 other files",
|
||||
"side_effects": ["tests if there is in fact an LLM attached"]
|
||||
}
|
||||
}
|
||||
27
agbenchmark/challenges/memory/m1/m1_test.py
Normal file
27
agbenchmark/challenges/memory/m1/m1_test.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenges.memory.memory import MemoryChallenge
|
||||
|
||||
|
||||
class TestBasicMemory(MemoryChallenge):
|
||||
"""The first memory challenge"""
|
||||
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "m1_data.json")
|
||||
|
||||
@pytest.mark.depends(name="test_basic_memory")
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
score = self.scoring(file_content, self.data.ground)
|
||||
print("Your score is:", score)
|
||||
scores.append(score)
|
||||
|
||||
assert 1 in scores
|
||||
8
agbenchmark/challenges/memory/memory.py
Normal file
8
agbenchmark/challenges/memory/memory.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenge import Challenge
|
||||
|
||||
|
||||
@pytest.mark.memory
|
||||
class MemoryChallenge(Challenge):
|
||||
"""Challenge for memory"""
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
@@ -13,12 +12,11 @@ class TestRetrieval(RetrievalChallenge):
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"], name="test_retrieval")
|
||||
@pytest.mark.depends(name="test_retrieval")
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
@@ -17,8 +16,7 @@ class TestRetrieval2(RetrievalChallenge):
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
@@ -17,8 +16,7 @@ class TestRetrieval3(RetrievalChallenge):
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -31,14 +31,13 @@ def config(request: Any) -> None:
|
||||
with open(CONFIG_PATH, "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
if request.config.getoption("--mock"):
|
||||
config["workspace"] = "agbenchmark/mocks/workspace"
|
||||
elif config.get("workspace", "").startswith("${") and config.get(
|
||||
if config.get("workspace", "").startswith("${") and config.get(
|
||||
"workspace", ""
|
||||
).endswith("}"):
|
||||
path = get_dynamic_workspace(config)
|
||||
config["workspace"] = path
|
||||
|
||||
else:
|
||||
config["workspace"] = Path(os.getcwd()) / config["workspace"]
|
||||
return config
|
||||
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from typing import Any
|
||||
from typing import Any, Dict
|
||||
|
||||
import agbenchmark.mocks.tests.basic_mocks as basic_mocks
|
||||
import agbenchmark.mocks.tests.retrieval_mocks as retrieval_mocks
|
||||
|
||||
|
||||
class MockManager:
|
||||
def __init__(self, task: str):
|
||||
def __init__(self, task: str, config: Dict[str, Any]) -> None:
|
||||
self.task = task
|
||||
self.workspace = "agbenchmark/mocks/workspace"
|
||||
self.workspace = config["workspace"]
|
||||
self.modules = [basic_mocks, retrieval_mocks]
|
||||
|
||||
def delegate(self, mock_function_name: Any, *args: Any, **kwargs: Any) -> None:
|
||||
|
||||
@@ -55,3 +55,14 @@ def basic_retrieval_3_mock(task: str, workspace: str) -> None:
|
||||
"file_to_check.txt",
|
||||
"15 Millions\n112 Millions\n117 Millions\n204 Millions\n413 Millions\n2,014 Millions\n3,198 Millions\n4,046 Millions\n7,000 Millions\n11,759 Millions\n21,461 Millions\n24,578 Millions\n31,536 Millions\n53,823 Millions\n81,462 Millions",
|
||||
)
|
||||
|
||||
|
||||
def basic_memory_mock(task: str, workspace: str) -> None:
|
||||
"""
|
||||
This mock writes to a file (creates one if it doesn't exist)
|
||||
"""
|
||||
Challenge.write_to_file(
|
||||
workspace,
|
||||
"file_to_check.txt",
|
||||
"2314",
|
||||
)
|
||||
|
||||
@@ -56,8 +56,6 @@ def start(category: str, reg: bool, mock: bool) -> int:
|
||||
config = json.load(f)
|
||||
|
||||
set_key(".env", "MOCK_TEST", "True" if mock else "False")
|
||||
if mock:
|
||||
config["workspace"] = "agbenchmark/mocks/workspace"
|
||||
|
||||
# create workspace directory if it doesn't exist
|
||||
workspace_path = os.path.abspath(config["workspace"])
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Hello World!
|
||||
@@ -4,8 +4,8 @@
|
||||
"task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
|
||||
"dependencies": ["basic_write_file"],
|
||||
"ground": {
|
||||
"answer": "random string: this is how we're doing",
|
||||
"should_contain": ["random string: this is how we're doing"],
|
||||
"answer": "random string: Hello World!",
|
||||
"should_contain": ["random string: Hello World!"],
|
||||
"files": ["file_to_check.txt"]
|
||||
},
|
||||
"mock": {
|
||||
|
||||
@@ -3,19 +3,12 @@ from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.challenge import Challenge
|
||||
from agbenchmark.tests.basic_abilities.basic_challenge import BasicChallenge
|
||||
|
||||
|
||||
class TestReadFile(BasicChallenge):
|
||||
"""Testing if LLM can read a file"""
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup_module(self, workspace: str) -> None:
|
||||
Challenge.write_to_file(
|
||||
workspace, self.data.ground.files[0], "this is how we're doing"
|
||||
)
|
||||
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
@@ -17,8 +16,7 @@ class TestWriteFile(BasicChallenge):
|
||||
def test_method(self, config: Dict[str, Any]) -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
workspace = Path(os.getcwd()) / config["workspace"]
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
{
|
||||
"TestBasicMemory": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/memory/m1/m1_test.py"
|
||||
},
|
||||
"TestRetrieval": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
@@ -9,6 +14,11 @@
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
@@ -16,14 +26,9 @@
|
||||
],
|
||||
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py"
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r2/r2_test.py"
|
||||
},
|
||||
"TestRetrieval3": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/retrieval/r3/r3_test.py"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user