Fix API Mode (#5209)

2026-01-31 11:54:30 +01:00 · 2023-09-13 07:30:46 -07:00
parent d319473e3c
commit 52c8b53122
6 changed files with 9 additions and 58 deletions
--- a/benchmark/agbenchmark/main.py
+++ b/benchmark/agbenchmark/main.py
@@ -139,9 +139,8 @@ def run_benchmark(
        )
        return 1

-    assert not (
-        agent_benchmark_config.api_mode and not agent_benchmark_config.host
-    ), "Error: host needs to be added to the config if api_mode is set to True."
+    assert agent_benchmark_config.host, "Error: host needs to be added to the config."
+

    print("Current configuration:")
    for key, value in vars(agent_benchmark_config).items():
--- a/benchmark/agbenchmark/agent_api_interface.py
+++ b/benchmark/agbenchmark/agent_api_interface.py
@@ -14,11 +14,7 @@ async def run_api_agent(
 ) -> None:
    host_value = None

-    for arg in sys.argv:
-        if arg.startswith("--host="):
-            _, host_value = arg.split("=")
-            break
-    configuration = Configuration(host=host_value)
+    configuration = Configuration(host=config["AgentBenchmarkConfig"].host)
    async with ApiClient(configuration) as api_client:
        api_instance = AgentApi(api_client)
        task_request_body = TaskRequestBody(input=task.task)
--- a/benchmark/agbenchmark/conftest.py
+++ b/benchmark/agbenchmark/conftest.py
@@ -176,7 +176,6 @@ def pytest_addoption(parser: Any) -> None:
    It is used to add custom command-line options that are specific to the agent benchmark tests.
    These options can be used to control the behavior of the tests.
    The "--mock" option is used to run the tests in mock mode.
-    The "--api_mode" option is used to run the tests in API mode.
    The "--host" option is used to specify the host for the tests.
    The "--category" option is used to run only tests of a specific category.
    The "--nc" option is used to run the tests without caching.
@@ -193,7 +192,6 @@ def pytest_addoption(parser: Any) -> None:
    """
    parser.addoption("--no_dep", action="store_true", default=False)
    parser.addoption("--mock", action="store_true", default=False)
-    parser.addoption("--api_mode", action="store_true", default=False)
    parser.addoption("--host", action="store_true", default=None)
    parser.addoption("--nc", action="store_true", default=False)
    parser.addoption("--cutoff", action="store_true", default=False)
@@ -443,35 +441,3 @@ def pytest_collection_modifyitems(items: Any, config: Any) -> None:
        # Add category marker dynamically
        for category in categories:
            item.add_marker(getattr(pytest.mark, category))
-
-
-@pytest.fixture(scope="session", autouse=True)
-def run_agent(request: Any) -> Any:
-    """
-    This pytest fixture is responsible for running the agent. It is automatically used in every test session due to the 'autouse=True' parameter and 'session' scope.
-    If the "--api_mode" argument is not in the command line arguments, it starts a subprocess running the agbenchmark.
-    The subprocess is terminated after the test session.
-    If the "--api_mode" argument is present, it simply yields control back to the test session.
-    This fixture is essential for the pytest system as it provides the necessary setup and teardown for running the agent in each test session.
-
-    Args:
-        request (Any): The request object from which the agent benchmark configuration path is retrieved.
-
-    Yields:
-        None: Control is yielded back to the test session.
-    """
-    if "--api_mode" not in sys.argv:
-        command = [sys.executable, "-m", "agbenchmark.benchmarks"]
-        process = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            universal_newlines=True,
-            # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex
-        )
-        time.sleep(3)
-        yield
-        print(f"Terminating agent")
-        process.terminate()
-    else:
-        yield
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -64,20 +64,14 @@ class Challenge(ABC):
            f"\033[1;35m============Starting {self.data.name} challenge============\033[0m"
        )
        print(f"\033[1;30mTask: {self.task}\033[0m")
-
-        if "--api_mode" in sys.argv:
-            await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff)
-        elif "--mock" in sys.argv:
+        if "--mock" in sys.argv:
            print("Running mock agent")
            for path in artifact_paths:
                copy_artifacts_into_workspace(
                    config["workspace"], "artifacts_out", path
                )
        else:
-            agent_benchmark_config: AgentBenchmarkConfig = config[
-                "AgentBenchmarkConfig"
-            ]
-            run_agent(self.task, cutoff, agent_config=agent_benchmark_config)
+            await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff)

        # hidden files are added after the agent runs. Hidden files can be python test files.
        # We copy them in the workspace to make it easy to import the code produced by the agent
--- a/benchmark/agbenchmark/utils/data_types.py
+++ b/benchmark/agbenchmark/utils/data_types.py
@@ -84,18 +84,14 @@ class AgentBenchmarkConfig(BaseModel):
    This class represents the configuration for the Agent agbenchmark.
    It includes the following attributes:
    - agent_benchmark_config_path: The path to the agent benchmark config that this object was created from.
-    - entry_path: The path to the entry point of the benchmark for the agent, relative to the agent_benchmark_config_path.
    - workspace: The path to the workspace where the benchmark will be run.
    - reports_folder: The path to the folder where the benchmark reports will be stored.
-    - api_mode: A boolean indicating whether the benchmark is run in API mode.
    - host: The host where the benchmark is run.
    """

    agent_benchmark_config_path: Path | None = None
-    entry_path: str
    workspace: Workspace
    reports_folder: Path | None = None
-    api_mode: bool = False
    host: str | None

    def get_reports_location(self) -> Path:
@@ -119,9 +115,6 @@ class AgentBenchmarkConfig(BaseModel):
    def get_agent_home_directory(self) -> Path:
        return Path(self.agent_benchmark_config_path).resolve().parent

-    def get_agent_entry_path(self) -> Path:
-        return (self.get_agent_home_directory() / self.entry_path).resolve()
-

 class Info(BaseModel):
    difficulty: DifficultyLevel
--- a/benchmark/agbenchmark_config/config.json
+++ b/benchmark/agbenchmark_config/config.json
@@ -1 +1,4 @@
-{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"}
+{
+  "workspace": {"input":  "auto_gpt_workspace", "output": "auto_gpt_workspace"},
+  "host": "http://localhost:8000"
+}