mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-19 15:04:26 +01:00
moving run agent to tests & agnostic run working
This commit is contained in:
@@ -1,3 +1,3 @@
|
|||||||
AGENT_NAME=mini-agi
|
AGENT_NAME=mini-agi
|
||||||
AGENT_TIMEOUT=60
|
ENVIRONMENT=local
|
||||||
MOCK_TEST=False
|
MOCK_TEST=False
|
||||||
@@ -4,7 +4,7 @@ import pytest
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from agbenchmark.challenges.define_task_types import Ground
|
from agbenchmark.challenges.define_task_types import Ground
|
||||||
from agbenchmark.challenges.define_task_types import ChallengeData
|
from agbenchmark.challenges.define_task_types import ChallengeData
|
||||||
from dotenv import load_dotenv, set_key
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -40,22 +40,24 @@ class Challenge(ABC):
|
|||||||
print("self.data.dependencies", self.data.dependencies)
|
print("self.data.dependencies", self.data.dependencies)
|
||||||
return self.data.dependencies
|
return self.data.dependencies
|
||||||
|
|
||||||
|
def setup_challenge(self, config):
|
||||||
|
from agbenchmark.agent_interface import run_agent
|
||||||
|
|
||||||
|
print("SETTING UP CHALLENGE...")
|
||||||
|
|
||||||
|
run_agent(self.task, self.mock, config)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
print("self.data.name", self.data.name)
|
print("self.data.name", self.data.name)
|
||||||
return self.data.name
|
return self.data.name
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"run_agent",
|
|
||||||
[(task, mock)],
|
|
||||||
indirect=True,
|
|
||||||
)
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"challenge_data",
|
"challenge_data",
|
||||||
[data],
|
[data],
|
||||||
indirect=True,
|
indirect=True,
|
||||||
)
|
)
|
||||||
def test_method(self, workspace):
|
def test_method(self, config):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
108
agbenchmark/agent_interface.py
Normal file
108
agbenchmark/agent_interface.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from agbenchmark.mocks.MockManager import MockManager
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
from agent.hook import run_specific_agent
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
MOCK_FLAG = os.getenv("MOCK_TEST")
|
||||||
|
|
||||||
|
|
||||||
|
def run_agent(task, mock_func, config):
|
||||||
|
"""Calling to get a response"""
|
||||||
|
|
||||||
|
if mock_func == None and MOCK_FLAG == "True":
|
||||||
|
print("No mock provided")
|
||||||
|
elif MOCK_FLAG == "True":
|
||||||
|
mock_manager = MockManager(
|
||||||
|
task
|
||||||
|
) # workspace doesn't need to be passed in, stays the same
|
||||||
|
print("Server unavailable, using mock", mock_func)
|
||||||
|
mock_manager.delegate(mock_func)
|
||||||
|
else:
|
||||||
|
if config["agent"]["type"] == "python":
|
||||||
|
run_agent_function(config, task)
|
||||||
|
elif config["agent"]["type"] == "script":
|
||||||
|
run_agent_command(config, task)
|
||||||
|
|
||||||
|
|
||||||
|
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
|
||||||
|
|
||||||
|
|
||||||
|
def run_agent_command(config, task):
|
||||||
|
path = config["agent"]["path"]
|
||||||
|
|
||||||
|
if ENVIRONMENT == "local":
|
||||||
|
AGENT_NAME = os.getenv("AGENT_NAME")
|
||||||
|
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
|
||||||
|
|
||||||
|
timeout = config["agent"]["cutoff"] or sys.maxsize
|
||||||
|
print(f"Running {task} with timeout {timeout}")
|
||||||
|
|
||||||
|
command_from_config = config["agent"]["script"]
|
||||||
|
command_list = command_from_config.split()
|
||||||
|
|
||||||
|
# replace '{}' with the task
|
||||||
|
command_list = [cmd if cmd != "{}" else task for cmd in command_list]
|
||||||
|
print("path, command_list", path, command_list)
|
||||||
|
start_time = time.time()
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
command_list,
|
||||||
|
cwd=path,
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if time.time() - start_time > timeout:
|
||||||
|
print("The subprocess has exceeded the time limit and was terminated.")
|
||||||
|
proc.terminate()
|
||||||
|
break
|
||||||
|
|
||||||
|
if proc.poll() is not None:
|
||||||
|
print("The subprocess has finished running.")
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def run_agent_function(config, task):
|
||||||
|
timeout = (
|
||||||
|
config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
|
||||||
|
)
|
||||||
|
|
||||||
|
parent_conn, child_conn = Pipe()
|
||||||
|
process = Process(target=run_specific_agent, args=(task, child_conn))
|
||||||
|
process.start()
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if parent_conn.poll(): # Check if there's a new message from the child process
|
||||||
|
response, cycle_count = parent_conn.recv()
|
||||||
|
print(f"Cycle {cycle_count}: {response}")
|
||||||
|
|
||||||
|
if cycle_count >= config["cutoff"]["count"]:
|
||||||
|
print(
|
||||||
|
f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
|
||||||
|
)
|
||||||
|
child_conn.send("terminate")
|
||||||
|
break
|
||||||
|
|
||||||
|
if time.time() - start_time > timeout:
|
||||||
|
print("The Python function has exceeded the time limit and was terminated.")
|
||||||
|
child_conn.send(
|
||||||
|
"terminate"
|
||||||
|
) # Send a termination signal to the child process
|
||||||
|
break
|
||||||
|
|
||||||
|
if not process.is_alive():
|
||||||
|
print("The Python function has finished running.")
|
||||||
|
break
|
||||||
|
|
||||||
|
process.join()
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
import pexpect as expect
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
def check_cycle_count(cycle_count: int, cutoff: int, proc):
|
|
||||||
"""Increment, print, and check cycle count."""
|
|
||||||
cycle_count += 1
|
|
||||||
print(f"Cycle count: {cycle_count}")
|
|
||||||
if cycle_count >= cutoff:
|
|
||||||
proc.terminate(force=True)
|
|
||||||
return cycle_count
|
|
||||||
|
|
||||||
|
|
||||||
AGENT_NAME = os.getenv("AGENT_NAME")
|
|
||||||
|
|
||||||
|
|
||||||
def run_agnostic(config, task):
|
|
||||||
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
|
|
||||||
|
|
||||||
timeout = sys.maxsize
|
|
||||||
|
|
||||||
if config["cutoff"]["type"] == "time":
|
|
||||||
timeout = config["cutoff"]["count"] or 60
|
|
||||||
|
|
||||||
# from pexpect.popen_spawn import PopenSpawn
|
|
||||||
|
|
||||||
print(f"Running {task} with timeout {timeout}")
|
|
||||||
|
|
||||||
# Starting the subprocess using pexpect
|
|
||||||
proc = expect.spawn("python", ["miniagi.py", task], timeout=timeout, cwd=path)
|
|
||||||
|
|
||||||
print("proc", proc)
|
|
||||||
|
|
||||||
cycle_count = 0
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
# If we get the prompt for user input, we send "\n"
|
|
||||||
if config["cutoff"]["type"] == "user_input":
|
|
||||||
proc.expect([config["cutoff"]["user_prompt"]])
|
|
||||||
proc.sendline(config["cutoff"]["user_input"])
|
|
||||||
cycle_count = check_cycle_count(
|
|
||||||
cycle_count, config["cutoff"]["count"], proc
|
|
||||||
)
|
|
||||||
elif config["cutoff"]["type"] == "cycle_count":
|
|
||||||
match = proc.expect([r"Cycle count: (\d+)"])
|
|
||||||
if match is not None:
|
|
||||||
cycle_count = int(match.group(1)) # type: ignore
|
|
||||||
cycle_count = check_cycle_count(
|
|
||||||
cycle_count, config["cutoff"]["count"], proc
|
|
||||||
)
|
|
||||||
|
|
||||||
# for cutoff type "time", just let it run until timeout
|
|
||||||
except expect.TIMEOUT:
|
|
||||||
print("The subprocess has exceeded the time limit and was terminated.")
|
|
||||||
break
|
|
||||||
except expect.EOF:
|
|
||||||
print("The subprocess has finished running.")
|
|
||||||
break
|
|
||||||
|
|
||||||
proc.close()
|
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
import pytest
|
|
||||||
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
||||||
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
|
|||||||
def get_file_path(self) -> str: # all tests must implement this method
|
def get_file_path(self) -> str: # all tests must implement this method
|
||||||
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
||||||
|
|
||||||
def test_method(self, workspace):
|
def test_method(self, config):
|
||||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
self.setup_challenge(config)
|
||||||
|
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
for file_content in files_contents:
|
for file_content in files_contents:
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
{
|
{
|
||||||
"workspace": "C:\\Users\\silen\\miniagi",
|
"workspace": "C:\\Users\\silen\\miniagi",
|
||||||
"cutoff": {
|
"agent": {
|
||||||
"type": "time",
|
"type": "script",
|
||||||
"user_prompt": "Press enter to continue or abort this action by typing feedback:",
|
"path": "",
|
||||||
|
"script": "python miniagi.py {}",
|
||||||
"user_input": "\n",
|
"user_input": "\n",
|
||||||
"count": 5
|
"cutoff": 60
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,11 +2,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import pytest
|
import pytest
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from agbenchmark.tests.regression.RegressionManager import RegressionManager
|
from agbenchmark.tests.regression.RegressionManager import RegressionManager
|
||||||
from agbenchmark.mocks.MockManager import MockManager
|
|
||||||
from agbenchmark.benchmark import run_agnostic
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@@ -41,29 +37,6 @@ def pytest_addoption(parser):
|
|||||||
parser.addoption("--mock", action="store_true", default=False)
|
parser.addoption("--mock", action="store_true", default=False)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def run_agent(request, config):
|
|
||||||
"""Calling to get a response"""
|
|
||||||
if isinstance(request.param, tuple):
|
|
||||||
task = request.param[0] # The task is passed in indirectly
|
|
||||||
mock_function_name = request.param[1] or None
|
|
||||||
else:
|
|
||||||
task = request.param
|
|
||||||
mock_function_name = None
|
|
||||||
|
|
||||||
if mock_function_name != None and (request.config.getoption("--mock")):
|
|
||||||
if mock_function_name:
|
|
||||||
mock_manager = MockManager(
|
|
||||||
task
|
|
||||||
) # workspace doesn't need to be passed in, stays the same
|
|
||||||
print("Server unavailable, using mock", mock_function_name)
|
|
||||||
mock_manager.delegate(mock_function_name)
|
|
||||||
else:
|
|
||||||
print("No mock provided")
|
|
||||||
else:
|
|
||||||
run_agnostic(config, task)
|
|
||||||
|
|
||||||
|
|
||||||
regression_json = "agbenchmark/tests/regression/regression_tests.json"
|
regression_json = "agbenchmark/tests/regression/regression_tests.json"
|
||||||
|
|
||||||
regression_manager = RegressionManager(regression_json)
|
regression_manager = RegressionManager(regression_json)
|
||||||
@@ -120,13 +93,3 @@ def pytest_generate_tests(metafunc):
|
|||||||
|
|
||||||
# Add the parameters to the test function
|
# Add the parameters to the test function
|
||||||
metafunc.parametrize("challenge_data", [params], indirect=True)
|
metafunc.parametrize("challenge_data", [params], indirect=True)
|
||||||
|
|
||||||
if "run_agent" in metafunc.fixturenames:
|
|
||||||
# Get the instance of the test class
|
|
||||||
test_class = metafunc.cls()
|
|
||||||
|
|
||||||
# Generate the parameters
|
|
||||||
params = [(test_class.task, test_class.mock)]
|
|
||||||
|
|
||||||
# Add the parameters to the test function
|
|
||||||
metafunc.parametrize("run_agent", params, indirect=True)
|
|
||||||
|
|||||||
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Washington DC is the capital of the United States of America
|
||||||
@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
|
|||||||
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
|
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
|
||||||
|
|
||||||
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
|
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
|
||||||
def test_method(
|
def test_method(self, config):
|
||||||
self, workspace
|
self.setup_challenge(config)
|
||||||
): # run_test is a common name that all tests must implement
|
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
for file_content in files_contents:
|
for file_content in files_contents:
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
|
|||||||
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
|
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
|
||||||
|
|
||||||
@pytest.mark.depends(on=[], name="basic_write_file")
|
@pytest.mark.depends(on=[], name="basic_write_file")
|
||||||
def test_method(self, workspace):
|
def test_method(self, config):
|
||||||
print("my workspace is ", workspace)
|
self.setup_challenge(config)
|
||||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
for file_content in files_contents:
|
for file_content in files_contents:
|
||||||
|
|||||||
@@ -2,13 +2,6 @@
|
|||||||
"TestWriteFile": {
|
"TestWriteFile": {
|
||||||
"difficulty": "basic",
|
"difficulty": "basic",
|
||||||
"dependencies": [],
|
"dependencies": [],
|
||||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
|
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
|
||||||
},
|
|
||||||
"TestReadFile": {
|
|
||||||
"difficulty": "basic",
|
|
||||||
"dependencies": [
|
|
||||||
"basic_write_file"
|
|
||||||
],
|
|
||||||
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_method[challenge_data0-run_agent0]"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
10
agent/hook.py
Normal file
10
agent/hook.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
async def run_specific_agent(task, conn):
|
||||||
|
while (
|
||||||
|
not conn.poll()
|
||||||
|
): # Check if there's a termination signal from the main process
|
||||||
|
response, cycle_count = await run_agent(
|
||||||
|
task
|
||||||
|
) # run the agent and get the response and cycle count
|
||||||
|
|
||||||
|
# Send response and cycle count back to the main process
|
||||||
|
conn.send((response, cycle_count))
|
||||||
@@ -16,8 +16,6 @@ openai = "^0.27.8"
|
|||||||
pydantic = "^1.10.9"
|
pydantic = "^1.10.9"
|
||||||
pytest-depends = "^1.0.1"
|
pytest-depends = "^1.0.1"
|
||||||
python-dotenv = "^1.0.0"
|
python-dotenv = "^1.0.0"
|
||||||
pexpect = "^4.8.0"
|
|
||||||
wexpect = "^4.0.0"
|
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
Reference in New Issue
Block a user