diff --git a/benchmark/agbenchmark/__main__.py b/benchmark/agbenchmark/__main__.py index db7350de..ec06c84f 100644 --- a/benchmark/agbenchmark/__main__.py +++ b/benchmark/agbenchmark/__main__.py @@ -139,9 +139,8 @@ def run_benchmark( ) return 1 - assert not ( - agent_benchmark_config.api_mode and not agent_benchmark_config.host - ), "Error: host needs to be added to the config if api_mode is set to True." + assert agent_benchmark_config.host, "Error: host needs to be added to the config." + print("Current configuration:") for key, value in vars(agent_benchmark_config).items(): diff --git a/benchmark/agbenchmark/agent_api_interface.py b/benchmark/agbenchmark/agent_api_interface.py index 850d4322..8960351f 100644 --- a/benchmark/agbenchmark/agent_api_interface.py +++ b/benchmark/agbenchmark/agent_api_interface.py @@ -14,11 +14,7 @@ async def run_api_agent( ) -> None: host_value = None - for arg in sys.argv: - if arg.startswith("--host="): - _, host_value = arg.split("=") - break - configuration = Configuration(host=host_value) + configuration = Configuration(host=config["AgentBenchmarkConfig"].host) async with ApiClient(configuration) as api_client: api_instance = AgentApi(api_client) task_request_body = TaskRequestBody(input=task.task) diff --git a/benchmark/agbenchmark/conftest.py b/benchmark/agbenchmark/conftest.py index 8edb38b5..d59415d5 100644 --- a/benchmark/agbenchmark/conftest.py +++ b/benchmark/agbenchmark/conftest.py @@ -176,7 +176,6 @@ def pytest_addoption(parser: Any) -> None: It is used to add custom command-line options that are specific to the agent benchmark tests. These options can be used to control the behavior of the tests. The "--mock" option is used to run the tests in mock mode. - The "--api_mode" option is used to run the tests in API mode. The "--host" option is used to specify the host for the tests. The "--category" option is used to run only tests of a specific category. The "--nc" option is used to run the tests without caching. @@ -193,7 +192,6 @@ def pytest_addoption(parser: Any) -> None: """ parser.addoption("--no_dep", action="store_true", default=False) parser.addoption("--mock", action="store_true", default=False) - parser.addoption("--api_mode", action="store_true", default=False) parser.addoption("--host", action="store_true", default=None) parser.addoption("--nc", action="store_true", default=False) parser.addoption("--cutoff", action="store_true", default=False) @@ -443,35 +441,3 @@ def pytest_collection_modifyitems(items: Any, config: Any) -> None: # Add category marker dynamically for category in categories: item.add_marker(getattr(pytest.mark, category)) - - -@pytest.fixture(scope="session", autouse=True) -def run_agent(request: Any) -> Any: - """ - This pytest fixture is responsible for running the agent. It is automatically used in every test session due to the 'autouse=True' parameter and 'session' scope. - If the "--api_mode" argument is not in the command line arguments, it starts a subprocess running the agbenchmark. - The subprocess is terminated after the test session. - If the "--api_mode" argument is present, it simply yields control back to the test session. - This fixture is essential for the pytest system as it provides the necessary setup and teardown for running the agent in each test session. - - Args: - request (Any): The request object from which the agent benchmark configuration path is retrieved. - - Yields: - None: Control is yielded back to the test session. - """ - if "--api_mode" not in sys.argv: - command = [sys.executable, "-m", "agbenchmark.benchmarks"] - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, - # cwd=agent_benchmark_config_path.entry_path.parent.parent, # even if it's necessary to make it work, let's get rid ot that, this is too complex - ) - time.sleep(3) - yield - print(f"Terminating agent") - process.terminate() - else: - yield diff --git a/benchmark/agbenchmark/utils/challenge.py b/benchmark/agbenchmark/utils/challenge.py index e3fcbb8f..4b783615 100644 --- a/benchmark/agbenchmark/utils/challenge.py +++ b/benchmark/agbenchmark/utils/challenge.py @@ -64,20 +64,14 @@ class Challenge(ABC): f"\033[1;35m============Starting {self.data.name} challenge============\033[0m" ) print(f"\033[1;30mTask: {self.task}\033[0m") - - if "--api_mode" in sys.argv: - await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff) - elif "--mock" in sys.argv: + if "--mock" in sys.argv: print("Running mock agent") for path in artifact_paths: copy_artifacts_into_workspace( config["workspace"], "artifacts_out", path ) else: - agent_benchmark_config: AgentBenchmarkConfig = config[ - "AgentBenchmarkConfig" - ] - run_agent(self.task, cutoff, agent_config=agent_benchmark_config) + await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff) # hidden files are added after the agent runs. Hidden files can be python test files. # We copy them in the workspace to make it easy to import the code produced by the agent diff --git a/benchmark/agbenchmark/utils/data_types.py b/benchmark/agbenchmark/utils/data_types.py index 65de2856..f0543813 100644 --- a/benchmark/agbenchmark/utils/data_types.py +++ b/benchmark/agbenchmark/utils/data_types.py @@ -84,18 +84,14 @@ class AgentBenchmarkConfig(BaseModel): This class represents the configuration for the Agent agbenchmark. It includes the following attributes: - agent_benchmark_config_path: The path to the agent benchmark config that this object was created from. - - entry_path: The path to the entry point of the benchmark for the agent, relative to the agent_benchmark_config_path. - workspace: The path to the workspace where the benchmark will be run. - reports_folder: The path to the folder where the benchmark reports will be stored. - - api_mode: A boolean indicating whether the benchmark is run in API mode. - host: The host where the benchmark is run. """ agent_benchmark_config_path: Path | None = None - entry_path: str workspace: Workspace reports_folder: Path | None = None - api_mode: bool = False host: str | None def get_reports_location(self) -> Path: @@ -119,9 +115,6 @@ class AgentBenchmarkConfig(BaseModel): def get_agent_home_directory(self) -> Path: return Path(self.agent_benchmark_config_path).resolve().parent - def get_agent_entry_path(self) -> Path: - return (self.get_agent_home_directory() / self.entry_path).resolve() - class Info(BaseModel): difficulty: DifficultyLevel diff --git a/benchmark/agbenchmark_config/config.json b/benchmark/agbenchmark_config/config.json index d3762ac0..2ea0cab7 100644 --- a/benchmark/agbenchmark_config/config.json +++ b/benchmark/agbenchmark_config/config.json @@ -1 +1,4 @@ -{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"} \ No newline at end of file +{ + "workspace": {"input": "auto_gpt_workspace", "output": "auto_gpt_workspace"}, + "host": "http://localhost:8000" +}