moving run agent to tests & agnostic run working

This commit is contained in:
Silen Naihin
2023-06-30 10:50:54 -04:00
parent fce421fb33
commit 2987d71264
13 changed files with 144 additions and 135 deletions

View File

@@ -1,3 +1,3 @@
AGENT_NAME=mini-agi
AGENT_TIMEOUT=60
ENVIRONMENT=local
MOCK_TEST=False

View File

@@ -4,7 +4,7 @@ import pytest
from abc import ABC, abstractmethod
from agbenchmark.challenges.define_task_types import Ground
from agbenchmark.challenges.define_task_types import ChallengeData
from dotenv import load_dotenv, set_key
from dotenv import load_dotenv
load_dotenv()
@@ -40,22 +40,24 @@ class Challenge(ABC):
print("self.data.dependencies", self.data.dependencies)
return self.data.dependencies
def setup_challenge(self, config):
from agbenchmark.agent_interface import run_agent
print("SETTING UP CHALLENGE...")
run_agent(self.task, self.mock, config)
@property
def name(self) -> str:
print("self.data.name", self.data.name)
return self.data.name
@pytest.mark.parametrize(
"run_agent",
[(task, mock)],
indirect=True,
)
@pytest.mark.parametrize(
"challenge_data",
[data],
indirect=True,
)
def test_method(self, workspace):
def test_method(self, config):
raise NotImplementedError
@staticmethod

View File

@@ -0,0 +1,108 @@
import os
import sys
import subprocess
import time
from agbenchmark.mocks.MockManager import MockManager
from multiprocessing import Process, Pipe
from agent.hook import run_specific_agent
from dotenv import load_dotenv
load_dotenv()
MOCK_FLAG = os.getenv("MOCK_TEST")
def run_agent(task, mock_func, config):
"""Calling to get a response"""
if mock_func == None and MOCK_FLAG == "True":
print("No mock provided")
elif MOCK_FLAG == "True":
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_func)
mock_manager.delegate(mock_func)
else:
if config["agent"]["type"] == "python":
run_agent_function(config, task)
elif config["agent"]["type"] == "script":
run_agent_command(config, task)
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
def run_agent_command(config, task):
path = config["agent"]["path"]
if ENVIRONMENT == "local":
AGENT_NAME = os.getenv("AGENT_NAME")
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
timeout = config["agent"]["cutoff"] or sys.maxsize
print(f"Running {task} with timeout {timeout}")
command_from_config = config["agent"]["script"]
command_list = command_from_config.split()
# replace '{}' with the task
command_list = [cmd if cmd != "{}" else task for cmd in command_list]
print("path, command_list", path, command_list)
start_time = time.time()
proc = subprocess.Popen(
command_list,
cwd=path,
shell=True,
)
while True:
if time.time() - start_time > timeout:
print("The subprocess has exceeded the time limit and was terminated.")
proc.terminate()
break
if proc.poll() is not None:
print("The subprocess has finished running.")
break
def run_agent_function(config, task):
timeout = (
config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
)
print(
f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
)
parent_conn, child_conn = Pipe()
process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
start_time = time.time()
while True:
if parent_conn.poll(): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")
if cycle_count >= config["cutoff"]["count"]:
print(
f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
)
child_conn.send("terminate")
break
if time.time() - start_time > timeout:
print("The Python function has exceeded the time limit and was terminated.")
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break
if not process.is_alive():
print("The Python function has finished running.")
break
process.join()

View File

@@ -1,65 +0,0 @@
import os
import sys
import pexpect as expect
from dotenv import load_dotenv
load_dotenv()
def check_cycle_count(cycle_count: int, cutoff: int, proc):
"""Increment, print, and check cycle count."""
cycle_count += 1
print(f"Cycle count: {cycle_count}")
if cycle_count >= cutoff:
proc.terminate(force=True)
return cycle_count
AGENT_NAME = os.getenv("AGENT_NAME")
def run_agnostic(config, task):
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
timeout = sys.maxsize
if config["cutoff"]["type"] == "time":
timeout = config["cutoff"]["count"] or 60
# from pexpect.popen_spawn import PopenSpawn
print(f"Running {task} with timeout {timeout}")
# Starting the subprocess using pexpect
proc = expect.spawn("python", ["miniagi.py", task], timeout=timeout, cwd=path)
print("proc", proc)
cycle_count = 0
while True:
try:
# If we get the prompt for user input, we send "\n"
if config["cutoff"]["type"] == "user_input":
proc.expect([config["cutoff"]["user_prompt"]])
proc.sendline(config["cutoff"]["user_input"])
cycle_count = check_cycle_count(
cycle_count, config["cutoff"]["count"], proc
)
elif config["cutoff"]["type"] == "cycle_count":
match = proc.expect([r"Cycle count: (\d+)"])
if match is not None:
cycle_count = int(match.group(1)) # type: ignore
cycle_count = check_cycle_count(
cycle_count, config["cutoff"]["count"], proc
)
# for cutoff type "time", just let it run until timeout
except expect.TIMEOUT:
print("The subprocess has exceeded the time limit and was terminated.")
break
except expect.EOF:
print("The subprocess has finished running.")
break
proc.close()

View File

@@ -1,6 +1,4 @@
import pytest
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
import os
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r1_data.json")
def test_method(self, workspace):
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -1,9 +1,10 @@
{
"workspace": "C:\\Users\\silen\\miniagi",
"cutoff": {
"type": "time",
"user_prompt": "Press enter to continue or abort this action by typing feedback:",
"agent": {
"type": "script",
"path": "",
"script": "python miniagi.py {}",
"user_input": "\n",
"count": 5
"cutoff": 60
}
}

View File

@@ -2,11 +2,7 @@ import json
import os
import pytest
import shutil
import subprocess
import sys
from agbenchmark.tests.regression.RegressionManager import RegressionManager
from agbenchmark.mocks.MockManager import MockManager
from agbenchmark.benchmark import run_agnostic
@pytest.fixture(scope="module")
@@ -41,29 +37,6 @@ def pytest_addoption(parser):
parser.addoption("--mock", action="store_true", default=False)
@pytest.fixture(autouse=True)
def run_agent(request, config):
"""Calling to get a response"""
if isinstance(request.param, tuple):
task = request.param[0] # The task is passed in indirectly
mock_function_name = request.param[1] or None
else:
task = request.param
mock_function_name = None
if mock_function_name != None and (request.config.getoption("--mock")):
if mock_function_name:
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_function_name)
mock_manager.delegate(mock_function_name)
else:
print("No mock provided")
else:
run_agnostic(config, task)
regression_json = "agbenchmark/tests/regression/regression_tests.json"
regression_manager = RegressionManager(regression_json)
@@ -120,13 +93,3 @@ def pytest_generate_tests(metafunc):
# Add the parameters to the test function
metafunc.parametrize("challenge_data", [params], indirect=True)
if "run_agent" in metafunc.fixturenames:
# Get the instance of the test class
test_class = metafunc.cls()
# Generate the parameters
params = [(test_class.task, test_class.mock)]
# Add the parameters to the test function
metafunc.parametrize("run_agent", params, indirect=True)

View File

@@ -0,0 +1 @@
Washington DC is the capital of the United States of America

View File

@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
def test_method(
self, workspace
): # run_test is a common name that all tests must implement
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
@pytest.mark.depends(on=[], name="basic_write_file")
def test_method(self, workspace):
print("my workspace is ", workspace)
files_contents = self.open_files(workspace, self.data.ground.files)
def test_method(self, config):
self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = []
for file_content in files_contents:

View File

@@ -2,13 +2,6 @@
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
"basic_write_file"
],
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_method[challenge_data0-run_agent0]"
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
}
}

10
agent/hook.py Normal file
View File

@@ -0,0 +1,10 @@
async def run_specific_agent(task, conn):
while (
not conn.poll()
): # Check if there's a termination signal from the main process
response, cycle_count = await run_agent(
task
) # run the agent and get the response and cycle count
# Send response and cycle count back to the main process
conn.send((response, cycle_count))

View File

@@ -16,8 +16,6 @@ openai = "^0.27.8"
pydantic = "^1.10.9"
pytest-depends = "^1.0.1"
python-dotenv = "^1.0.0"
pexpect = "^4.8.0"
wexpect = "^4.0.0"
[build-system]