mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-03 06:14:32 +01:00
Merge pull request #42 from Significant-Gravitas/feat/kill
adding hook to integrate agnostically
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
AGENT_NAME=mini-agi
|
||||
AGENT_TIMEOUT=60
|
||||
ENVIRONMENT=local
|
||||
MOCK_TEST=False
|
||||
@@ -4,7 +4,7 @@ import pytest
|
||||
from abc import ABC, abstractmethod
|
||||
from agbenchmark.challenges.define_task_types import Ground
|
||||
from agbenchmark.challenges.define_task_types import ChallengeData
|
||||
from dotenv import load_dotenv, set_key
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -23,6 +23,7 @@ class Challenge(ABC):
|
||||
|
||||
@property
|
||||
def data(self) -> ChallengeData:
|
||||
# TODO: make it so that this is cached somewhere to just call self.deserialized_data
|
||||
return ChallengeData.deserialize(self.get_file_path())
|
||||
|
||||
@property
|
||||
@@ -37,25 +38,23 @@ class Challenge(ABC):
|
||||
|
||||
@property
|
||||
def dependencies(self) -> list:
|
||||
print("self.data.dependencies", self.data.dependencies)
|
||||
return self.data.dependencies
|
||||
|
||||
def setup_challenge(self, config):
|
||||
from agbenchmark.agent_interface import run_agent
|
||||
|
||||
run_agent(self.task, self.mock, config)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
print("self.data.name", self.data.name)
|
||||
return self.data.name
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"run_agent",
|
||||
[(task, mock)],
|
||||
indirect=True,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"challenge_data",
|
||||
[data],
|
||||
indirect=True,
|
||||
)
|
||||
def test_method(self, workspace):
|
||||
def test_method(self, config):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
|
||||
70
agbenchmark/agent_interface.py
Normal file
70
agbenchmark/agent_interface.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
import importlib
|
||||
import time
|
||||
from agbenchmark.mocks.MockManager import MockManager
|
||||
from multiprocessing import Process, Pipe
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
MOCK_FLAG = os.getenv("MOCK_TEST")
|
||||
|
||||
|
||||
def run_agent(task, mock_func, config):
|
||||
"""Calling to get a response"""
|
||||
|
||||
if mock_func == None and MOCK_FLAG == "True":
|
||||
print("No mock provided")
|
||||
elif MOCK_FLAG == "True":
|
||||
mock_manager = MockManager(
|
||||
task
|
||||
) # workspace doesn't need to be passed in, stays the same
|
||||
print("Server unavailable, using mock", mock_func)
|
||||
mock_manager.delegate(mock_func)
|
||||
else:
|
||||
timeout = config["cutoff"]
|
||||
print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
|
||||
|
||||
parent_conn, child_conn = Pipe()
|
||||
|
||||
# Import the specific agent dynamically
|
||||
module_name = config["func_path"].replace("/", ".").rstrip(".py")
|
||||
module = importlib.import_module(module_name)
|
||||
run_specific_agent = getattr(module, "run_specific_agent")
|
||||
|
||||
process = Process(target=run_specific_agent, args=(task, child_conn))
|
||||
process.start()
|
||||
start_time = time.time()
|
||||
|
||||
while True:
|
||||
if (
|
||||
parent_conn.poll()
|
||||
): # Check if there's a new message from the child process
|
||||
response, cycle_count = parent_conn.recv()
|
||||
print(f"Cycle {cycle_count}: {response}")
|
||||
|
||||
if cycle_count >= config["cutoff"]:
|
||||
print(
|
||||
f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
|
||||
)
|
||||
child_conn.send("terminate")
|
||||
break
|
||||
|
||||
if time.time() - start_time > timeout:
|
||||
print(
|
||||
"The Python function has exceeded the time limit and was terminated."
|
||||
)
|
||||
child_conn.send(
|
||||
"terminate"
|
||||
) # Send a termination signal to the child process
|
||||
break
|
||||
|
||||
if not process.is_alive():
|
||||
print("The Python function has finished running.")
|
||||
break
|
||||
|
||||
process.join()
|
||||
|
||||
|
||||
ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
|
||||
@@ -1,6 +1,4 @@
|
||||
import pytest
|
||||
from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
|
||||
from agbenchmark.challenges.define_task_types import ChallengeData, Ground
|
||||
import os
|
||||
|
||||
|
||||
@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
|
||||
def get_file_path(self) -> str: # all tests must implement this method
|
||||
return os.path.join(os.path.dirname(__file__), "r1_data.json")
|
||||
|
||||
def test_method(self, workspace):
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
{
|
||||
"hostname": "localhost"
|
||||
"workspace": "C:\\Users\\silen\\miniagi",
|
||||
"func_path": "agent/benchmarks.py",
|
||||
"cutoff": 60
|
||||
}
|
||||
|
||||
@@ -3,13 +3,6 @@ import os
|
||||
import pytest
|
||||
import shutil
|
||||
from agbenchmark.tests.regression.RegressionManager import RegressionManager
|
||||
import requests
|
||||
from agbenchmark.mocks.MockManager import MockManager
|
||||
import subprocess
|
||||
from agbenchmark.Challenge import Challenge
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@@ -44,47 +37,6 @@ def pytest_addoption(parser):
|
||||
parser.addoption("--mock", action="store_true", default=False)
|
||||
|
||||
|
||||
AGENT_NAME = os.getenv("AGENT_NAME")
|
||||
AGENT_TIMEOUT = os.getenv("AGENT_TIMEOUT")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def run_agent(request, config):
|
||||
"""Calling to get a response"""
|
||||
if isinstance(request.param, tuple):
|
||||
task = request.param[0] # The task is passed in indirectly
|
||||
mock_function_name = request.param[1] or None
|
||||
else:
|
||||
task = request.param
|
||||
mock_function_name = None
|
||||
|
||||
if mock_function_name != None and (request.config.getoption("--mock")):
|
||||
if mock_function_name:
|
||||
mock_manager = MockManager(
|
||||
task
|
||||
) # workspace doesn't need to be passed in, stays the same
|
||||
print("Server unavailable, using mock", mock_function_name)
|
||||
mock_manager.delegate(mock_function_name)
|
||||
else:
|
||||
print("No mock provided")
|
||||
else:
|
||||
path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
|
||||
|
||||
try:
|
||||
timeout = int(AGENT_TIMEOUT) if AGENT_TIMEOUT is not None else 60
|
||||
|
||||
subprocess.run(
|
||||
["python", "miniagi.py", task],
|
||||
check=True,
|
||||
cwd=path,
|
||||
timeout=timeout
|
||||
# text=True,
|
||||
# capture_output=True
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
print("The subprocess has exceeded the time limit and was terminated.")
|
||||
|
||||
|
||||
regression_json = "agbenchmark/tests/regression/regression_tests.json"
|
||||
|
||||
regression_manager = RegressionManager(regression_json)
|
||||
@@ -141,13 +93,3 @@ def pytest_generate_tests(metafunc):
|
||||
|
||||
# Add the parameters to the test function
|
||||
metafunc.parametrize("challenge_data", [params], indirect=True)
|
||||
|
||||
if "run_agent" in metafunc.fixturenames:
|
||||
# Get the instance of the test class
|
||||
test_class = metafunc.cls()
|
||||
|
||||
# Generate the parameters
|
||||
params = [(test_class.task, test_class.mock)]
|
||||
|
||||
# Add the parameters to the test function
|
||||
metafunc.parametrize("run_agent", params, indirect=True)
|
||||
|
||||
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
1
agbenchmark/mocks/workspace/file_to_check.txt
Normal file
@@ -0,0 +1 @@
|
||||
Washington DC is the capital of the United States of America
|
||||
@@ -29,7 +29,17 @@ def start(category, noreg, mock):
|
||||
|
||||
config["workspace"] = click.prompt(
|
||||
"Please enter a new workspace path",
|
||||
default=os.path.join(Path.home(), "miniagi"),
|
||||
default=os.path.join(Path.home(), "workspace"),
|
||||
)
|
||||
|
||||
config["func_path"] = click.prompt(
|
||||
"Please enter a the path to your run_specific_agent function implementation",
|
||||
default="/benchmarks.py",
|
||||
)
|
||||
|
||||
config["cutoff"] = click.prompt(
|
||||
"Please enter a hard cutoff runtime for your agent",
|
||||
default="60",
|
||||
)
|
||||
|
||||
with open(config_dir, "w") as f:
|
||||
|
||||
@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
|
||||
return os.path.join(os.path.dirname(__file__), "r_file_data.json")
|
||||
|
||||
@pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
|
||||
def test_method(
|
||||
self, workspace
|
||||
): # run_test is a common name that all tests must implement
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
|
||||
return os.path.join(os.path.dirname(__file__), "w_file_data.json")
|
||||
|
||||
@pytest.mark.depends(on=[], name="basic_write_file")
|
||||
def test_method(self, workspace):
|
||||
print("my workspace is ", workspace)
|
||||
files_contents = self.open_files(workspace, self.data.ground.files)
|
||||
def test_method(self, config):
|
||||
self.setup_challenge(config)
|
||||
files_contents = self.open_files(config["workspace"], self.data.ground.files)
|
||||
|
||||
scores = []
|
||||
for file_content in files_contents:
|
||||
|
||||
@@ -1,7 +1 @@
|
||||
{
|
||||
"TestWriteFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
|
||||
}
|
||||
}
|
||||
{}
|
||||
15
agent/benchmarks.py
Normal file
15
agent/benchmarks.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# import subprocess
|
||||
|
||||
|
||||
def run_specific_agent(task, conn):
|
||||
cycle_count = 0
|
||||
while (
|
||||
not conn.poll()
|
||||
): # Check if there's a termination signal from the main process
|
||||
response = run_agent(task) # run the agent and get the response and cycle count
|
||||
|
||||
if response:
|
||||
cycle_count += 1
|
||||
|
||||
# Send response and cycle count back to the main process
|
||||
conn.send((response, cycle_count))
|
||||
93
poetry.lock
generated
93
poetry.lock
generated
@@ -538,6 +538,20 @@ files = [
|
||||
{file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pexpect"
|
||||
version = "4.8.0"
|
||||
description = "Pexpect allows easy control of interactive console applications."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
|
||||
{file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
ptyprocess = ">=0.5"
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.0.0"
|
||||
@@ -553,6 +567,43 @@ files = [
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "5.9.5"
|
||||
description = "Cross-platform lib for process and system monitoring in Python."
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
files = [
|
||||
{file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"},
|
||||
{file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"},
|
||||
{file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"},
|
||||
{file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"},
|
||||
{file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"},
|
||||
{file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"},
|
||||
{file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"},
|
||||
{file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"},
|
||||
{file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"},
|
||||
{file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"},
|
||||
{file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"},
|
||||
{file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"},
|
||||
{file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"},
|
||||
{file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
|
||||
|
||||
[[package]]
|
||||
name = "ptyprocess"
|
||||
version = "0.7.0"
|
||||
description = "Run a subprocess in a pseudo terminal"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
|
||||
{file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydantic"
|
||||
version = "1.10.9"
|
||||
@@ -658,6 +709,29 @@ files = [
|
||||
[package.extras]
|
||||
cli = ["click (>=5.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pywin32"
|
||||
version = "306"
|
||||
description = "Python for Window Extensions"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"},
|
||||
{file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"},
|
||||
{file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"},
|
||||
{file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"},
|
||||
{file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"},
|
||||
{file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"},
|
||||
{file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"},
|
||||
{file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"},
|
||||
{file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"},
|
||||
{file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"},
|
||||
{file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"},
|
||||
{file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"},
|
||||
{file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"},
|
||||
{file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests"
|
||||
version = "2.31.0"
|
||||
@@ -738,6 +812,23 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.
|
||||
socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "wexpect"
|
||||
version = "4.0.0"
|
||||
description = "Windows alternative of pexpect"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "wexpect-4.0.0.tar.gz", hash = "sha256:de9e739e78ec4d74a39bf8499904dacb6c594007a674fb7e10752c9b131f6522"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
psutil = ">=5.0.0"
|
||||
pywin32 = ">=220"
|
||||
|
||||
[package.extras]
|
||||
test = ["codecov", "coverage", "pyinstaller", "setuptools (>=38.0)", "tox", "twine"]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.9.2"
|
||||
@@ -828,4 +919,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.9"
|
||||
content-hash = "f8de5e973c92360108aaca1cecc2fdd505f10a9c2975b46c83ea9c24b4af3cfe"
|
||||
content-hash = "8ab722acade739b9fb841ecae3b8cabd4f1d8a355864573a93d9faa11dcffb90"
|
||||
|
||||
Reference in New Issue
Block a user