integrate config, agent_interface just func, hook

This commit is contained in:
Silen Naihin
2023-06-30 11:55:43 -04:00
parent 2987d71264
commit 7c352b745e
7 changed files with 73 additions and 110 deletions

View File

@@ -23,6 +23,7 @@ class Challenge(ABC):
@property
def data(self) -> ChallengeData:
# TODO: make it so that this is cached somewhere to just call self.deserialized_data
return ChallengeData.deserialize(self.get_file_path())
@property
@@ -37,19 +38,15 @@ class Challenge(ABC):
@property
def dependencies(self) -> list:
print("self.data.dependencies", self.data.dependencies)
return self.data.dependencies
def setup_challenge(self, config):
from agbenchmark.agent_interface import run_agent
print("SETTING UP CHALLENGE...")
run_agent(self.task, self.mock, config)
@property
def name(self) -> str:
print("self.data.name", self.data.name)
return self.data.name
@pytest.mark.parametrize(

View File

@@ -1,12 +1,9 @@
import os
import sys
import subprocess
import importlib
import time
from agbenchmark.mocks.MockManager import MockManager
from multiprocessing import Process, Pipe
from agent.hook import run_specific_agent
from dotenv import load_dotenv
load_dotenv()
@@ -26,83 +23,48 @@ def run_agent(task, mock_func, config):
print("Server unavailable, using mock", mock_func)
mock_manager.delegate(mock_func)
else:
if config["agent"]["type"] == "python":
run_agent_function(config, task)
elif config["agent"]["type"] == "script":
run_agent_command(config, task)
timeout = config["cutoff"]
print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
parent_conn, child_conn = Pipe()
# Import the specific agent dynamically
module_name = config["func_path"].replace("/", ".").rstrip(".py")
module = importlib.import_module(module_name)
run_specific_agent = getattr(module, "run_specific_agent")
process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
start_time = time.time()
while True:
if (
parent_conn.poll()
): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")
if cycle_count >= config["cutoff"]:
print(
f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
)
child_conn.send("terminate")
break
if time.time() - start_time > timeout:
print(
"The Python function has exceeded the time limit and was terminated."
)
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break
if not process.is_alive():
print("The Python function has finished running.")
break
process.join()
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
def run_agent_command(config, task):
path = config["agent"]["path"]
if ENVIRONMENT == "local":
AGENT_NAME = os.getenv("AGENT_NAME")
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
timeout = config["agent"]["cutoff"] or sys.maxsize
print(f"Running {task} with timeout {timeout}")
command_from_config = config["agent"]["script"]
command_list = command_from_config.split()
# replace '{}' with the task
command_list = [cmd if cmd != "{}" else task for cmd in command_list]
print("path, command_list", path, command_list)
start_time = time.time()
proc = subprocess.Popen(
command_list,
cwd=path,
shell=True,
)
while True:
if time.time() - start_time > timeout:
print("The subprocess has exceeded the time limit and was terminated.")
proc.terminate()
break
if proc.poll() is not None:
print("The subprocess has finished running.")
break
def run_agent_function(config, task):
timeout = (
config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
)
print(
f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
)
parent_conn, child_conn = Pipe()
process = Process(target=run_specific_agent, args=(task, child_conn))
process.start()
start_time = time.time()
while True:
if parent_conn.poll(): # Check if there's a new message from the child process
response, cycle_count = parent_conn.recv()
print(f"Cycle {cycle_count}: {response}")
if cycle_count >= config["cutoff"]["count"]:
print(
f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
)
child_conn.send("terminate")
break
if time.time() - start_time > timeout:
print("The Python function has exceeded the time limit and was terminated.")
child_conn.send(
"terminate"
) # Send a termination signal to the child process
break
if not process.is_alive():
print("The Python function has finished running.")
break
process.join()

View File

@@ -1,10 +1,5 @@
{
"workspace": "C:\\Users\\silen\\miniagi",
"agent": {
"type": "script",
"path": "",
"script": "python miniagi.py {}",
"user_input": "\n",
"cutoff": 60
}
"func_path": "agent/benchmarks.py",
"cutoff": 60
}

View File

@@ -29,7 +29,17 @@ def start(category, noreg, mock):
config["workspace"] = click.prompt(
"Please enter a new workspace path",
default=os.path.join(Path.home(), "miniagi"),
default=os.path.join(Path.home(), "workspace"),
)
config["func_path"] = click.prompt(
"Please enter a the path to your run_specific_agent function implementation",
default="/benchmarks.py",
)
config["cutoff"] = click.prompt(
"Please enter a hard cutoff runtime for your agent",
default="60",
)
with open(config_dir, "w") as f:

View File

@@ -1,7 +1 @@
{
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
}
}
{}

15
agent/benchmarks.py Normal file
View File

@@ -0,0 +1,15 @@
# import subprocess
def run_specific_agent(task, conn):
cycle_count = 0
while (
not conn.poll()
): # Check if there's a termination signal from the main process
response = run_agent(task) # run the agent and get the response and cycle count
if response:
cycle_count += 1
# Send response and cycle count back to the main process
conn.send((response, cycle_count))

View File

@@ -1,10 +0,0 @@
async def run_specific_agent(task, conn):
while (
not conn.poll()
): # Check if there's a termination signal from the main process
response, cycle_count = await run_agent(
task
) # run the agent and get the response and cycle count
# Send response and cycle count back to the main process
conn.send((response, cycle_count))