moving run agent to tests & agnostic run working

2025-12-18 22:44:21 +01:00 · 2023-06-30 10:50:54 -04:00
parent fce421fb33
commit 2987d71264
13 changed files with 144 additions and 135 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,3 +1,3 @@
 AGENT_NAME=mini-agi
-AGENT_TIMEOUT=60
+ENVIRONMENT=local
 MOCK_TEST=False
--- a/agbenchmark/Challenge.py
+++ b/agbenchmark/Challenge.py
@@ -4,7 +4,7 @@ import pytest
 from abc import ABC, abstractmethod
 from agbenchmark.challenges.define_task_types import Ground
 from agbenchmark.challenges.define_task_types import ChallengeData
-from dotenv import load_dotenv, set_key
+from dotenv import load_dotenv

 load_dotenv()

@@ -40,22 +40,24 @@ class Challenge(ABC):
        print("self.data.dependencies", self.data.dependencies)
        return self.data.dependencies

+    def setup_challenge(self, config):
+        from agbenchmark.agent_interface import run_agent
+
+        print("SETTING UP CHALLENGE...")
+
+        run_agent(self.task, self.mock, config)
+
    @property
    def name(self) -> str:
        print("self.data.name", self.data.name)
        return self.data.name

-    @pytest.mark.parametrize(
-        "run_agent",
-        [(task, mock)],
-        indirect=True,
-    )
    @pytest.mark.parametrize(
        "challenge_data",
        [data],
        indirect=True,
    )
-    def test_method(self, workspace):
+    def test_method(self, config):
        raise NotImplementedError

    @staticmethod
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@@ -0,0 +1,108 @@
+import os
+import sys
+import subprocess
+import time
+from agbenchmark.mocks.MockManager import MockManager
+from multiprocessing import Process, Pipe
+
+from agent.hook import run_specific_agent
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+MOCK_FLAG = os.getenv("MOCK_TEST")
+
+
+def run_agent(task, mock_func, config):
+    """Calling to get a response"""
+
+    if mock_func == None and MOCK_FLAG == "True":
+        print("No mock provided")
+    elif MOCK_FLAG == "True":
+        mock_manager = MockManager(
+            task
+        )  # workspace doesn't need to be passed in, stays the same
+        print("Server unavailable, using mock", mock_func)
+        mock_manager.delegate(mock_func)
+    else:
+        if config["agent"]["type"] == "python":
+            run_agent_function(config, task)
+        elif config["agent"]["type"] == "script":
+            run_agent_command(config, task)
+
+
+ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
+
+
+def run_agent_command(config, task):
+    path = config["agent"]["path"]
+
+    if ENVIRONMENT == "local":
+        AGENT_NAME = os.getenv("AGENT_NAME")
+        path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
+
+    timeout = config["agent"]["cutoff"] or sys.maxsize
+    print(f"Running {task} with timeout {timeout}")
+
+    command_from_config = config["agent"]["script"]
+    command_list = command_from_config.split()
+
+    # replace '{}' with the task
+    command_list = [cmd if cmd != "{}" else task for cmd in command_list]
+    print("path, command_list", path, command_list)
+    start_time = time.time()
+    proc = subprocess.Popen(
+        command_list,
+        cwd=path,
+        shell=True,
+    )
+
+    while True:
+        if time.time() - start_time > timeout:
+            print("The subprocess has exceeded the time limit and was terminated.")
+            proc.terminate()
+            break
+
+        if proc.poll() is not None:
+            print("The subprocess has finished running.")
+            break
+
+
+def run_agent_function(config, task):
+    timeout = (
+        config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
+    )
+    print(
+        f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
+    )
+
+    parent_conn, child_conn = Pipe()
+    process = Process(target=run_specific_agent, args=(task, child_conn))
+    process.start()
+    start_time = time.time()
+
+    while True:
+        if parent_conn.poll():  # Check if there's a new message from the child process
+            response, cycle_count = parent_conn.recv()
+            print(f"Cycle {cycle_count}: {response}")
+
+            if cycle_count >= config["cutoff"]["count"]:
+                print(
+                    f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
+                )
+                child_conn.send("terminate")
+                break
+
+        if time.time() - start_time > timeout:
+            print("The Python function has exceeded the time limit and was terminated.")
+            child_conn.send(
+                "terminate"
+            )  # Send a termination signal to the child process
+            break
+
+        if not process.is_alive():
+            print("The Python function has finished running.")
+            break
+
+    process.join()
--- a/agbenchmark/benchmark.py
+++ b/agbenchmark/benchmark.py
@@ -1,65 +0,0 @@
-import os
-import sys
-import pexpect as expect
-from dotenv import load_dotenv
-
-load_dotenv()
-
-
-def check_cycle_count(cycle_count: int, cutoff: int, proc):
-    """Increment, print, and check cycle count."""
-    cycle_count += 1
-    print(f"Cycle count: {cycle_count}")
-    if cycle_count >= cutoff:
-        proc.terminate(force=True)
-    return cycle_count
-
-
-AGENT_NAME = os.getenv("AGENT_NAME")
-
-
-def run_agnostic(config, task):
-    path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
-
-    timeout = sys.maxsize
-
-    if config["cutoff"]["type"] == "time":
-        timeout = config["cutoff"]["count"] or 60
-
-    # from pexpect.popen_spawn import PopenSpawn
-
-    print(f"Running {task} with timeout {timeout}")
-
-    # Starting the subprocess using pexpect
-    proc = expect.spawn("python", ["miniagi.py", task], timeout=timeout, cwd=path)
-
-    print("proc", proc)
-
-    cycle_count = 0
-
-    while True:
-        try:
-            # If we get the prompt for user input, we send "\n"
-            if config["cutoff"]["type"] == "user_input":
-                proc.expect([config["cutoff"]["user_prompt"]])
-                proc.sendline(config["cutoff"]["user_input"])
-                cycle_count = check_cycle_count(
-                    cycle_count, config["cutoff"]["count"], proc
-                )
-            elif config["cutoff"]["type"] == "cycle_count":
-                match = proc.expect([r"Cycle count: (\d+)"])
-                if match is not None:
-                    cycle_count = int(match.group(1))  # type: ignore
-                    cycle_count = check_cycle_count(
-                        cycle_count, config["cutoff"]["count"], proc
-                    )
-
-            # for cutoff type "time", just let it run until timeout
-        except expect.TIMEOUT:
-            print("The subprocess has exceeded the time limit and was terminated.")
-            break
-        except expect.EOF:
-            print("The subprocess has finished running.")
-            break
-
-    proc.close()
--- a/agbenchmark/challenges/retrieval/r1/r1_test.py
+++ b/agbenchmark/challenges/retrieval/r1/r1_test.py
@@ -1,6 +1,4 @@
-import pytest
 from agbenchmark.challenges.retrieval.Retrieval import RetrievalChallenge
-from agbenchmark.challenges.define_task_types import ChallengeData, Ground
 import os


@@ -10,8 +8,9 @@ class TestRetrieval1(RetrievalChallenge):
    def get_file_path(self) -> str:  # all tests must implement this method
        return os.path.join(os.path.dirname(__file__), "r1_data.json")

-    def test_method(self, workspace):
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)

        scores = []
        for file_content in files_contents:
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -1,9 +1,10 @@
 {
  "workspace": "C:\\Users\\silen\\miniagi",
-  "cutoff": {
-    "type": "time",
-    "user_prompt": "Press enter to continue or abort this action by typing feedback:",
+  "agent": {
+    "type": "script",
+    "path": "",
+    "script": "python miniagi.py {}",
    "user_input": "\n",
-    "count": 5
+    "cutoff": 60
  }
 }
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -2,11 +2,7 @@ import json
 import os
 import pytest
 import shutil
-import subprocess
-import sys
 from agbenchmark.tests.regression.RegressionManager import RegressionManager
-from agbenchmark.mocks.MockManager import MockManager
-from agbenchmark.benchmark import run_agnostic


@pytest.fixture(scope="module")
@@ -41,29 +37,6 @@ def pytest_addoption(parser):
    parser.addoption("--mock", action="store_true", default=False)


-@pytest.fixture(autouse=True)
-def run_agent(request, config):
-    """Calling to get a response"""
-    if isinstance(request.param, tuple):
-        task = request.param[0]  # The task is passed in indirectly
-        mock_function_name = request.param[1] or None
-    else:
-        task = request.param
-        mock_function_name = None
-
-    if mock_function_name != None and (request.config.getoption("--mock")):
-        if mock_function_name:
-            mock_manager = MockManager(
-                task
-            )  # workspace doesn't need to be passed in, stays the same
-            print("Server unavailable, using mock", mock_function_name)
-            mock_manager.delegate(mock_function_name)
-        else:
-            print("No mock provided")
-    else:
-        run_agnostic(config, task)
-
-
 regression_json = "agbenchmark/tests/regression/regression_tests.json"

 regression_manager = RegressionManager(regression_json)
@@ -120,13 +93,3 @@ def pytest_generate_tests(metafunc):

        # Add the parameters to the test function
        metafunc.parametrize("challenge_data", [params], indirect=True)
-
-    if "run_agent" in metafunc.fixturenames:
-        # Get the instance of the test class
-        test_class = metafunc.cls()
-
-        # Generate the parameters
-        params = [(test_class.task, test_class.mock)]
-
-        # Add the parameters to the test function
-        metafunc.parametrize("run_agent", params, indirect=True)
--- a/agbenchmark/mocks/workspace/file_to_check.txt
+++ b/agbenchmark/mocks/workspace/file_to_check.txt
@@ -0,0 +1 @@
+Washington DC is the capital of the United States of America
--- a/agbenchmark/tests/basic_abilities/read_file/read_file_test.py
+++ b/agbenchmark/tests/basic_abilities/read_file/read_file_test.py
@@ -17,10 +17,9 @@ class TestReadFile(BasicChallenge):
        return os.path.join(os.path.dirname(__file__), "r_file_data.json")

    @pytest.mark.depends(on=["basic_write_file"], name="basic_read_file")
-    def test_method(
-        self, workspace
-    ):  # run_test is a common name that all tests must implement
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)

        scores = []
        for file_content in files_contents:
--- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
+++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@@ -10,9 +10,9 @@ class TestWriteFile(BasicChallenge):
        return os.path.join(os.path.dirname(__file__), "w_file_data.json")

    @pytest.mark.depends(on=[], name="basic_write_file")
-    def test_method(self, workspace):
-        print("my workspace is ", workspace)
-        files_contents = self.open_files(workspace, self.data.ground.files)
+    def test_method(self, config):
+        self.setup_challenge(config)
+        files_contents = self.open_files(config["workspace"], self.data.ground.files)

        scores = []
        for file_content in files_contents:
--- a/agbenchmark/tests/regression/regression_tests.json
+++ b/agbenchmark/tests/regression/regression_tests.json
@@ -2,13 +2,6 @@
    "TestWriteFile": {
        "difficulty": "basic",
        "dependencies": [],
-        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0-run_agent0]"
-    },
-    "TestReadFile": {
-        "difficulty": "basic",
-        "dependencies": [
-            "basic_write_file"
-        ],
-        "test": "agbenchmark/tests/basic_abilities/read_file/read_file_test.py::TestReadFile::test_method[challenge_data0-run_agent0]"
+        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
    }
 }
--- a/agent/hook.py
+++ b/agent/hook.py
@@ -0,0 +1,10 @@
+async def run_specific_agent(task, conn):
+    while (
+        not conn.poll()
+    ):  # Check if there's a termination signal from the main process
+        response, cycle_count = await run_agent(
+            task
+        )  # run the agent and get the response and cycle count
+
+        # Send response and cycle count back to the main process
+        conn.send((response, cycle_count))
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,8 +16,6 @@ openai = "^0.27.8"
 pydantic = "^1.10.9"
 pytest-depends = "^1.0.1"
 python-dotenv = "^1.0.0"
-pexpect = "^4.8.0"
-wexpect = "^4.0.0"


 [build-system]
				`@@ -0,0 +1 @@`
				`Washington DC is the capital of the United States of America`