moving run agent to tests & agnostic run working

This commit is contained in:
Silen Naihin
2023-06-30 10:50:54 -04:00
parent fce421fb33
commit 2987d71264
13 changed files with 144 additions and 135 deletions

View File

@@ -1,3 +1,3 @@
AGENT_NAME=mini-agi AGENT_NAME=mini-agi
AGENT_TIMEOUT=60 ENVIRONMENT=local
MOCK_TEST=False MOCK_TEST=False

View File

@@ -4,7 +4,7 @@ import pytest
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from agbenchmark.challenges.define_task_types import Ground from agbenchmark.challenges.define_task_types import Ground
from agbenchmark.challenges.define_task_types import ChallengeData from agbenchmark.challenges.define_task_types import ChallengeData
from dotenv import load_dotenv, set_key from dotenv import load_dotenv
load_dotenv() load_dotenv()
@@ -40,22 +40,24 @@ class Challenge(ABC):
print("self.data.dependencies", self.data.dependencies) print("self.data.dependencies", self.data.dependencies)
return self.data.dependencies return self.data.dependencies
def setup_challenge(self, config):
from agbenchmark.agent_interface import run_agent
print("SETTING UP CHALLENGE...")
run_agent(self.task, self.mock, config)
@property @property
def name(self) -> str: def name(self) -> str:
print("self.data.name", self.data.name) print("self.data.name", self.data.name)
return self.data.name return self.data.name
@pytest.mark.parametrize(
"run_agent",
[(task, mock)],
indirect=True,
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"challenge_data", "challenge_data",
[data], [data],
indirect=True, indirect=True,
) )
def test_method(self, workspace): def test_method(self, config):
raise NotImplementedError raise NotImplementedError
@staticmethod @staticmethod

View File

@@ -0,0 +1,108 @@
import os
import sys
import subprocess
import time
from agbenchmark.mocks.MockManager import MockManager
from multiprocessing import Process, Pipe
from agent.hook import run_specific_agent
from dotenv import load_dotenv
load_dotenv()
MOCK_FLAG = os.getenv("MOCK_TEST")
def run_agent(task, mock_func, config):
"""Calling to get a response"""
if mock_func == None and MOCK_FLAG == "True":
print("No mock provided")
elif MOCK_FLAG == "True":
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_func)
mock_manager.delegate(mock_func)
else:
if config["agent"]["type"] == "python":
run_agent_function(config, task)
elif config["agent"]["type"] == "script":
run_agent_command(config, task)
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
def run_agent_command(config, task):
path = config["agent"]["path"]
if ENVIRONMENT == "local":
AGENT_NAME = os.getenv("AGENT_NAME")
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
timeout = config["agent"]["cutoff"] or sys.maxsize
print(f"Running {task} with timeout {timeout}")
command_from_config = config["agent"]["script"]
command_list = command_from_config.split()
# replace '{}' with the task
command_list = [cmd if cmd != "{}" else task for cmd in command_list]
print("path, command_list", path, command_list)
start_time = time.time()
proc = subprocess.Popen(
command_list,
cwd=path,
shell=True,
)
while True:
if time.time() - start_time > timeout:
print("The subprocess has exceeded the time limit and was terminated.")
proc.terminate()
break
if proc.poll() is not None:
print("The subprocess has finished running.")
break
def run_agent_function(config, task):
timeout = (
config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
)
print(
f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
)
parent_conn, child_conn = Pipe()
process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
start_time = time.time()
while True:
if parent_conn.poll(): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")
if cycle_count >= config["cutoff"]["count"]:
print(
f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
)
child_conn.send("terminate")
break
if time.time() - start_time > timeout:
print("The Python function has exceeded the time limit and was terminated.")
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break
if not process.is_alive():
print("The Python function has finished running.")
break
process.join()

View File

@@ -1,65 +0,0 @@
import os
import sys
import pexpect as expect
from dotenv import load_dotenv
load_dotenv()
def check_cycle_count(cycle_count: int, cutoff: int, proc):
"""Increment, print, and check cycle count."""
cycle_count += 1
print(f"Cycle count: {cycle_count}")
if cycle_count >= cutoff:
proc.terminate(force=True)
return cycle_count
AGENT_NAME = os.getenv("AGENT_NAME")
def run_agnostic(config, task):
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
timeout = sys.maxsize
if config["cutoff"]["type"] == "time":
timeout = config["cutoff"]["count"] or 60
# from pexpect.popen_spawn import PopenSpawn
print(f"Running {task} with timeout {timeout}")
# Starting the subprocess using pexpect
proc = expect.spawn("python", ["miniagi.py", task], timeout=timeout, cwd=path)
print("proc", proc)
cycle_count = 0
while True:
try:
# If we get the prompt for user input, we send "\n"
if config["cutoff"]["type"] == "user_input":
proc.expect([config["cutoff"]["user_prompt"]])
proc.sendline(config["cutoff"]["user_input"])
cycle_count = check_cycle_count(
cycle_count, config["cutoff"]["count"], proc
)
elif config["cutoff"]["type"] == "cycle_count":
match = proc.expect([r"Cycle count: (\d+)"])
if match is not None:
cycle_count = int(match.group(1)) # type: ignore
cycle_count = check_cycle_count(
cycle_count, config["cutoff"]["count"], proc
)
# for cutoff type "time", just let it run until timeout
except expect.TIMEOUT:
print("The subprocess has exceeded the time limit and was terminated.")
break
except expect.EOF:
print("The subprocess has finished running.")
break
proc.close()

View File

@@ -1,6 +1,4 @@
import pytest
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
import os import os
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
def get_file_path(self) -> str: # all tests must implement this method def get_file_path(self) -> str: # all tests must implement this method
return os.path.join(os.path.dirname(__file__), "r1_data.json") return os.path.join(os.path.dirname(__file__), "r1_data.json")
def test_method(self, workspace): def test_method(self, config):
files_contents = self.open_files(workspace, self.data.ground.files) self.setup_challenge(config)
files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = [] scores = []
for file_content in files_contents: for file_content in files_contents:

View File

@@ -1,9 +1,10 @@
{ {
"workspace": "C:\\Users\\silen\\miniagi", "workspace": "C:\\Users\\silen\\miniagi",
"cutoff": { "agent": {
"type": "time", "type": "script",
"user_prompt": "Press enter to continue or abort this action by typing feedback:", "path": "",
"script": "python miniagi.py {}",
"user_input": "\n", "user_input": "\n",
"count": 5 "cutoff": 60
} }
} }

View File

@@ -2,11 +2,7 @@ import json
import os import os
import pytest import pytest
import shutil import shutil
import subprocess
import sys
from agbenchmark.tests.regression.RegressionManager import RegressionManager from agbenchmark.tests.regression.RegressionManager import RegressionManager
from agbenchmark.mocks.MockManager import MockManager
from agbenchmark.benchmark import run_agnostic
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
@@ -41,29 +37,6 @@ def pytest_addoption(parser):
parser.addoption("--mock", action="store_true", default=False) parser.addoption("--mock", action="store_true", default=False)
@pytest.fixture(autouse=True)
def run_agent(request, config):
"""Calling to get a response"""
if isinstance(request.param, tuple):
task = request.param[0] # The task is passed in indirectly
mock_function_name = request.param[1] or None
else:
task = request.param
mock_function_name = None
if mock_function_name != None and (request.config.getoption("--mock")):
if mock_function_name:
mock_manager = MockManager(
task
) # workspace doesn't need to be passed in, stays the same
print("Server unavailable, using mock", mock_function_name)
mock_manager.delegate(mock_function_name)
else:
print("No mock provided")
else:
run_agnostic(config, task)
regression_json = "agbenchmark/tests/regression/regression_tests.json" regression_json = "agbenchmark/tests/regression/regression_tests.json"
regression_manager = RegressionManager(regression_json) regression_manager = RegressionManager(regression_json)
@@ -120,13 +93,3 @@ def pytest_generate_tests(metafunc):
# Add the parameters to the test function # Add the parameters to the test function
metafunc.parametrize("challenge_data", [params], indirect=True) metafunc.parametrize("challenge_data", [params], indirect=True)
if "run_agent" in metafunc.fixturenames:
# Get the instance of the test class
test_class = metafunc.cls()
# Generate the parameters
params = [(test_class.task, test_class.mock)]
# Add the parameters to the test function
metafunc.parametrize("run_agent", params, indirect=True)

View File

@@ -0,0 +1 @@
Washington DC is the capital of the United States of America

View File

@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
return os.path.join(os.path.dirname(__file__), "r_file_data.json") return os.path.join(os.path.dirname(__file__), "r_file_data.json")
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file") @pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
def test_method( def test_method(self, config):
self, workspace self.setup_challenge(config)
): # run_test is a common name that all tests must implement files_contents = self.open_files(config["workspace"], self.data.ground.files)
files_contents = self.open_files(workspace, self.data.ground.files)
scores = [] scores = []
for file_content in files_contents: for file_content in files_contents:

View File

@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
return os.path.join(os.path.dirname(__file__), "w_file_data.json") return os.path.join(os.path.dirname(__file__), "w_file_data.json")
@pytest.mark.depends(on=[], name="basic_write_file") @pytest.mark.depends(on=[], name="basic_write_file")
def test_method(self, workspace): def test_method(self, config):
print("my workspace is ", workspace) self.setup_challenge(config)
files_contents = self.open_files(workspace, self.data.ground.files) files_contents = self.open_files(config["workspace"], self.data.ground.files)
scores = [] scores = []
for file_content in files_contents: for file_content in files_contents:

View File

@@ -2,13 +2,6 @@
"TestWriteFile": { "TestWriteFile": {
"difficulty": "basic", "difficulty": "basic",
"dependencies": [], "dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]" "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
"basic_write_file"
],
"test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_method[challenge_data0-run_agent0]"
} }
} }

10
agent/hook.py Normal file
View File

@@ -0,0 +1,10 @@
async def run_specific_agent(task, conn):
while (
not conn.poll()
): # Check if there's a termination signal from the main process
response, cycle_count = await run_agent(
task
) # run the agent and get the response and cycle count
# Send response and cycle count back to the main process
conn.send((response, cycle_count))

View File

@@ -16,8 +16,6 @@ openai = "^0.27.8"
pydantic = "^1.10.9" pydantic = "^1.10.9"
pytest-depends = "^1.0.1" pytest-depends = "^1.0.1"
python-dotenv = "^1.0.0" python-dotenv = "^1.0.0"
pexpect = "^4.8.0"
wexpect = "^4.0.0"
[build-system] [build-system]