mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-18 22:44:21 +01:00
moving run agent to tests & agnostic run working
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
AGENT_NAME=mini-agi
|
||||
AGENT_TIMEOUT=60
|
||||
ENVIRONMENT=local
|
||||
MOCK_TEST=False
|
||||
@@ -4,7 +4,7 @@ import pytest
|
||||
from abc import ABC, abstractmethod
|
||||
from agbenchmark.challenges.define_task_types import Ground
|
||||
from agbenchmark.challenges.define_task_types import ChallengeData
|
||||
from dotenv import load_dotenv, set_key
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -40,22 +40,24 @@ class Challenge(ABC):
|
||||
print("self.data.dependencies", self.data.dependencies)
|
||||
return self.data.dependencies
|
||||
|
||||
def setup_challenge(self, config):
|
||||
from agbenchmark.agent_interface import run_agent
|
||||
|
||||
print("SETTING UP CHALLENGE...")
|
||||
|
||||
run_agent(self.task, self.mock, config)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
print("self.data.name", self.data.name)
|
||||
return self.data.name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"run_agent",
|
||||
[(task, mock)],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"challenge_data",
|
||||
[data],
|
||||
indirect=True,
|
||||
)
|
||||
def test_method(self, workspace):
|
||||
def test_method(self, config):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
|
||||
108
agbenchmark/agent_interface.py
Normal file
108
agbenchmark/agent_interface.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
from agbenchmark.mocks.MockManager import MockManager
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
from agent.hook import run_specific_agent
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
MOCK_FLAG = os.getenv("MOCK_TEST")
|
||||
|
||||
|
||||
def run_agent(task, mock_func, config):
|
||||
"""Calling to get a response"""
|
||||
|
||||
if mock_func == None and MOCK_FLAG == "True":
|
||||
print("No mock provided")
|
||||
elif MOCK_FLAG == "True":
|
||||
mock_manager = MockManager(
|
||||
task
|
||||
) # workspace doesn't need to be passed in, stays the same
|
||||
print("Server unavailable, using mock", mock_func)
|
||||
mock_manager.delegate(mock_func)
|
||||
else:
|
||||
if config["agent"]["type"] == "python":
|
||||
run_agent_function(config, task)
|
||||
elif config["agent"]["type"] == "script":
|
||||
run_agent_command(config, task)
|
||||
|
||||
|
||||
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
|
||||
|
||||
|
||||
def run_agent_command(config, task):
|
||||
path = config["agent"]["path"]
|
||||
|
||||
if ENVIRONMENT == "local":
|
||||
AGENT_NAME = os.getenv("AGENT_NAME")
|
||||
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
|
||||
|
||||
timeout = config["agent"]["cutoff"] or sys.maxsize
|
||||
print(f"Running {task} with timeout {timeout}")
|
||||
|
||||
command_from_config = config["agent"]["script"]
|
||||
command_list = command_from_config.split()
|
||||
|
||||
# replace '{}' with the task
|
||||
command_list = [cmd if cmd != "{}" else task for cmd in command_list]
|
||||
print("path, command_list", path, command_list)
|
||||
start_time = time.time()
|
||||
proc = subprocess.Popen(
|
||||
command_list,
|
||||
cwd=path,
|
||||
shell=True,
|
||||
)
|
||||
|
||||
while True:
|
||||
if time.time() - start_time > timeout:
|
||||
print("The subprocess has exceeded the time limit and was terminated.")
|
||||
proc.terminate()
|
||||
break
|
||||
|
||||
if proc.poll() is not None:
|
||||
print("The subprocess has finished running.")
|
||||
break
|
||||
|
||||
|
||||
def run_agent_function(config, task):
|
||||
timeout = (
|
||||
config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
|
||||
)
|
||||
print(
|
||||
f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
|
||||
)
|
||||
|
||||
parent_conn, child_conn = Pipe()
|
||||
process = Process(target=run_specific_agent, args=(task, child_conn))
|
||||
process.start()
|
||||
start_time = time.time()
|
||||
|
||||
while True:
|
||||
if parent_conn.poll(): # Check if there's a new message from the child process
|
||||
response, cycle_count = parent_conn.recv()
|
||||
print(f"Cycle {cycle_count}: {response}")
|
||||
|
||||
if cycle_count >= config["cutoff"]["count"]:
|
||||
print(
|
||||
f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
|
||||
)
|
||||
child_conn.send("terminate")
|
||||
break
|
||||
|
||||
if time.time() - start_time > timeout:
|
||||
print("The Python function has exceeded the time limit and was terminated.")
|
||||
child_conn.send(
|
||||
"terminate"
|
||||
) # Send a termination signal to the child process
|
||||
break
|
||||
|
||||
if not process.is_alive():
|
||||
print("The Python function has finished running.")
|
||||
break
|
||||
|
||||
process.join()
|
||||
@@ -1,65 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import pexpect as expect
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def check_cycle_count(cycle_count: int, cutoff: int, proc):
|
||||
"""Increment, print, and check cycle count."""
|
||||
cycle_count += 1
|
||||
print(f"Cycle count: {cycle_count}")
|
||||
if cycle_count >= cutoff:
|
||||
proc.terminate(force=True)
|
||||
return cycle_count
|
||||
|
||||
|
||||
AGENT_NAME = os.getenv("AGENT_NAME")
|
||||
|
||||
|
||||
def run_agnostic(config, task):
|
||||
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
|
||||
|
||||
timeout = sys.maxsize
|
||||
|
||||
if config["cutoff"]["type"] == "time":
|
||||
timeout = config["cutoff"]["count"] or 60
|
||||
|
||||
# from pexpect.popen_spawn import PopenSpawn
|
||||
|
||||
print(f"Running {task} with timeout {timeout}")
|
||||
|
||||
# Starting the subprocess using pexpect
|
||||
proc = expect.spawn("python", ["miniagi.py", task], timeout=timeout, cwd=path)
|
||||
|
||||
print("proc", proc)
|
||||
|
||||
cycle_count = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
# If we get the prompt for user input, we send "\n"
|
||||
if config["cutoff"]["type"] == "user_input":
|
||||
proc.expect([config["cutoff"]["user_prompt"]])
|
||||
proc.sendline(config["cutoff"]["user_input"])
|
||||
cycle_count = check_cycle_count(
|
||||
cycle_count, config["cutoff"]["count"], proc
|
||||
)
|
||||
elif config["cutoff"]["type"] == "cycle_count":
|
||||
match = proc.expect([r"Cycle count: (\d+)"])
|
||||
if match is not None:
|
||||
cycle_count = int(match.group(1)) # type: ignore
|
||||
cycle_count = check_cycle_count(
|
||||
cycle_count, config["cutoff"]["count"], proc
|
||||
)
|
||||
|
||||
# for cutoff type "time", just let it run until timeout
|
||||
except expect.TIMEOUT:
|
||||
print("The subprocess has exceeded the time limit and was terminated.")
|
||||
break
|
||||
except expect.EOF:
|
||||
print("The subprocess has finished running.")
|
||||
break
|
||||
|
||||
proc.close()
|
||||
@@ -1,6 +1,4 @@
|
||||
import pytest
|
||||
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
||||
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
|
||||
import os
|
||||
|
||||
|
||||
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
||||
|
||||
def test_method(self, workspace):
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
{
|
||||
"workspace": "C:\\Users\\silen\\miniagi",
|
||||
"cutoff": {
|
||||
"type": "time",
|
||||
"user_prompt": "Press enter to continue or abort this action by typing feedback:",
|
||||
"agent": {
|
||||
"type": "script",
|
||||
"path": "",
|
||||
"script": "python miniagi.py {}",
|
||||
"user_input": "\n",
|
||||
"count": 5
|
||||
"cutoff": 60
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,7 @@ import json
|
||||
import os
|
||||
import pytest
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from agbenchmark.tests.regression.RegressionManager import RegressionManager
|
||||
from agbenchmark.mocks.MockManager import MockManager
|
||||
from agbenchmark.benchmark import run_agnostic
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@@ -41,29 +37,6 @@ def pytest_addoption(parser):
|
||||
parser.addoption("--mock", action="store_true", default=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def run_agent(request, config):
|
||||
"""Calling to get a response"""
|
||||
if isinstance(request.param, tuple):
|
||||
task = request.param[0] # The task is passed in indirectly
|
||||
mock_function_name = request.param[1] or None
|
||||
else:
|
||||
task = request.param
|
||||
mock_function_name = None
|
||||
|
||||
if mock_function_name != None and (request.config.getoption("--mock")):
|
||||
if mock_function_name:
|
||||
mock_manager = MockManager(
|
||||
task
|
||||
) # workspace doesn't need to be passed in, stays the same
|
||||
print("Server unavailable, using mock", mock_function_name)
|
||||
mock_manager.delegate(mock_function_name)
|
||||
else:
|
||||
print("No mock provided")
|
||||
else:
|
||||
run_agnostic(config, task)
|
||||
|
||||
|
||||
regression_json = "agbenchmark/tests/regression/regression_tests.json"
|
||||
|
||||
regression_manager = RegressionManager(regression_json)
|
||||
@@ -120,13 +93,3 @@ def pytest_generate_tests(metafunc):
|
||||
|
||||
# Add the parameters to the test function
|
||||
metafunc.parametrize("challenge_data", [params], indirect=True)
|
||||
|
||||
if "run_agent" in metafunc.fixturenames:
|
||||
# Get the instance of the test class
|
||||
test_class = metafunc.cls()
|
||||
|
||||
# Generate the parameters
|
||||
params = [(test_class.task, test_class.mock)]
|
||||
|
||||
# Add the parameters to the test function
|
||||
metafunc.parametrize("run_agent", params, indirect=True)
|
||||
|
||||
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
@@ -0,0 +1 @@
|
||||
Washington DC is the capital of the United States of America
|
||||
@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
|
||||
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
|
||||
def test_method(
|
||||
self, workspace
|
||||
): # run_test is a common name that all tests must implement
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
|
||||
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
|
||||
|
||||
@pytest.mark.depends(on=[], name="basic_write_file")
|
||||
def test_method(self, workspace):
|
||||
print("my workspace is ", workspace)
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -2,13 +2,6 @@
|
||||
"TestWriteFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"basic_write_file"
|
||||
],
|
||||
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_method[challenge_data0-run_agent0]"
|
||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
|
||||
}
|
||||
}
|
||||
10
agent/hook.py
Normal file
10
agent/hook.py
Normal file
@@ -0,0 +1,10 @@
|
||||
async def run_specific_agent(task, conn):
|
||||
while (
|
||||
not conn.poll()
|
||||
): # Check if there's a termination signal from the main process
|
||||
response, cycle_count = await run_agent(
|
||||
task
|
||||
) # run the agent and get the response and cycle count
|
||||
|
||||
# Send response and cycle count back to the main process
|
||||
conn.send((response, cycle_count))
|
||||
@@ -16,8 +16,6 @@ openai = "^0.27.8"
|
||||
pydantic = "^1.10.9"
|
||||
pytest-depends = "^1.0.1"
|
||||
python-dotenv = "^1.0.0"
|
||||
pexpect = "^4.8.0"
|
||||
wexpect = "^4.0.0"
|
||||
|
||||
|
||||
[build-system]
|
||||
|
||||
Reference in New Issue
Block a user