integrate config, agent_interface just func, hook

2026-02-11 17:24:20 +01:00 · 2023-06-30 11:55:43 -04:00
parent 2987d71264
commit 7c352b745e
7 changed files with 73 additions and 110 deletions
--- a/agbenchmark/Challenge.py
+++ b/agbenchmark/Challenge.py
@@ -23,6 +23,7 @@ class Challenge(ABC):

    @property
    def data(self) -> ChallengeData:
+        # TODO: make it so that this is cached somewhere to just call self.deserialized_data
        return ChallengeData.deserialize(self.get_file_path())

    @property
@@ -37,19 +38,15 @@ class Challenge(ABC):

    @property
    def dependencies(self) -> list:
-        print("self.data.dependencies", self.data.dependencies)
        return self.data.dependencies

    def setup_challenge(self, config):
        from agbenchmark.agent_interface import run_agent

-        print("SETTING UP CHALLENGE...")
-
        run_agent(self.task, self.mock, config)

    @property
    def name(self) -> str:
-        print("self.data.name", self.data.name)
        return self.data.name

    @pytest.mark.parametrize(
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@@ -1,12 +1,9 @@
 import os
-import sys
-import subprocess
+import importlib
 import time
 from agbenchmark.mocks.MockManager import MockManager
 from multiprocessing import Process, Pipe

-from agent.hook import run_specific_agent
-
 from dotenv import load_dotenv

 load_dotenv()
@@ -26,83 +23,48 @@ def run_agent(task, mock_func, config):
        print("Server unavailable, using mock", mock_func)
        mock_manager.delegate(mock_func)
    else:
-        if config["agent"]["type"] == "python":
-            run_agent_function(config, task)
-        elif config["agent"]["type"] == "script":
-            run_agent_command(config, task)
+        timeout = config["cutoff"]
+        print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
+
+        parent_conn, child_conn = Pipe()
+
+        # Import the specific agent dynamically
+        module_name = config["func_path"].replace("/", ".").rstrip(".py")
+        module = importlib.import_module(module_name)
+        run_specific_agent = getattr(module, "run_specific_agent")
+
+        process = Process(target=run_specific_agent, args=(task, child_conn))
+        process.start()
+        start_time = time.time()
+
+        while True:
+            if (
+                parent_conn.poll()
+            ):  # Check if there's a new message from the child process
+                response, cycle_count = parent_conn.recv()
+                print(f"Cycle {cycle_count}: {response}")
+
+                if cycle_count >= config["cutoff"]:
+                    print(
+                        f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
+                    )
+                    child_conn.send("terminate")
+                    break
+
+            if time.time() - start_time > timeout:
+                print(
+                    "The Python function has exceeded the time limit and was terminated."
+                )
+                child_conn.send(
+                    "terminate"
+                )  # Send a termination signal to the child process
+                break
+
+            if not process.is_alive():
+                print("The Python function has finished running.")
+                break
+
+        process.join()


 ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
-
-
-def run_agent_command(config, task):
-    path = config["agent"]["path"]
-
-    if ENVIRONMENT == "local":
-        AGENT_NAME = os.getenv("AGENT_NAME")
-        path = os.path.join(os.getcwd(), f"agent\\{AGENT_NAME}")
-
-    timeout = config["agent"]["cutoff"] or sys.maxsize
-    print(f"Running {task} with timeout {timeout}")
-
-    command_from_config = config["agent"]["script"]
-    command_list = command_from_config.split()
-
-    # replace '{}' with the task
-    command_list = [cmd if cmd != "{}" else task for cmd in command_list]
-    print("path, command_list", path, command_list)
-    start_time = time.time()
-    proc = subprocess.Popen(
-        command_list,
-        cwd=path,
-        shell=True,
-    )
-
-    while True:
-        if time.time() - start_time > timeout:
-            print("The subprocess has exceeded the time limit and was terminated.")
-            proc.terminate()
-            break
-
-        if proc.poll() is not None:
-            print("The subprocess has finished running.")
-            break
-
-
-def run_agent_function(config, task):
-    timeout = (
-        config["cutoff"]["count"] if config["cutoff"]["type"] == "time" else sys.maxsize
-    )
-    print(
-        f"Running Python function '{config['agent']['function']}' with timeout {timeout}"
-    )
-
-    parent_conn, child_conn = Pipe()
-    process = Process(target=run_specific_agent, args=(task, child_conn))
-    process.start()
-    start_time = time.time()
-
-    while True:
-        if parent_conn.poll():  # Check if there's a new message from the child process
-            response, cycle_count = parent_conn.recv()
-            print(f"Cycle {cycle_count}: {response}")
-
-            if cycle_count >= config["cutoff"]["count"]:
-                print(
-                    f"Cycle count has reached the limit of {config['cutoff']['count']}. Terminating."
-                )
-                child_conn.send("terminate")
-                break
-
-        if time.time() - start_time > timeout:
-            print("The Python function has exceeded the time limit and was terminated.")
-            child_conn.send(
-                "terminate"
-            )  # Send a termination signal to the child process
-            break
-
-        if not process.is_alive():
-            print("The Python function has finished running.")
-            break
-
-    process.join()
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -1,10 +1,5 @@
 {
  "workspace": "C:\\Users\\silen\\miniagi",
-  "agent": {
-    "type": "script",
-    "path": "",
-    "script": "python miniagi.py {}",
-    "user_input": "\n",
-    "cutoff": 60
-  }
+  "func_path": "agent/benchmarks.py",
+  "cutoff": 60
 }
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@@ -29,7 +29,17 @@ def start(category, noreg, mock):

        config["workspace"] = click.prompt(
            "Please enter a new workspace path",
-            default=os.path.join(Path.home(), "miniagi"),
+            default=os.path.join(Path.home(), "workspace"),
+        )
+
+        config["func_path"] = click.prompt(
+            "Please enter a the path to your run_specific_agent function implementation",
+            default="/benchmarks.py",
+        )
+
+        config["cutoff"] = click.prompt(
+            "Please enter a hard cutoff runtime for your agent",
+            default="60",
        )

        with open(config_dir, "w") as f:
--- a/agbenchmark/tests/regression/regression_tests.json
+++ b/agbenchmark/tests/regression/regression_tests.json
@@ -1,7 +1 @@
-{
-    "TestWriteFile": {
-        "difficulty": "basic",
-        "dependencies": [],
-        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py::TestWriteFile::test_method[challenge_data0]"
-    }
-}
+{}
--- a/agent/benchmarks.py
+++ b/agent/benchmarks.py
@@ -0,0 +1,15 @@
+# import subprocess
+
+
+def run_specific_agent(task, conn):
+    cycle_count = 0
+    while (
+        not conn.poll()
+    ):  # Check if there's a termination signal from the main process
+        response = run_agent(task)  # run the agent and get the response and cycle count
+
+        if response:
+            cycle_count += 1
+
+        # Send response and cycle count back to the main process
+        conn.send((response, cycle_count))
--- a/agent/hook.py
+++ b/agent/hook.py
@@ -1,10 +0,0 @@
-async def run_specific_agent(task, conn):
-    while (
-        not conn.poll()
-    ):  # Check if there's a termination signal from the main process
-        response, cycle_count = await run_agent(
-            task
-        )  # run the agent and get the response and cycle count
-
-        # Send response and cycle count back to the main process
-        conn.send((response, cycle_count))