Integrate benchmark and autogpt (#5208)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2025-12-17 14:04:27 +01:00 · 2023-09-12 19:30:45 -07:00
parent 69dadee416
commit b2fc8f2a06
5 changed files with 20 additions and 22 deletions
--- a/autogpts/autogpt/agbenchmark_config/config.json
+++ b/autogpts/autogpt/agbenchmark_config/config.json
@@ -1 +1 @@
-{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"}
+{"workspace": {"input": "auto_gpt_workspace", "output":"auto_gpt_workspace"  }, "entry_path": "agbenchmark.benchmarks"}
--- a/autogpts/autogpt/challenges_already_beaten.json
+++ b/autogpts/autogpt/challenges_already_beaten.json
@@ -0,0 +1,3 @@
 {
    "TestWriteFile": true
 }
--- a/benchmark/agbenchmark/agent_interface.py
+++ b/benchmark/agbenchmark/agent_interface.py
@@ -75,19 +75,15 @@ def run_windows_env(process: Any, start_time: float, timeout: float) -> None:
 def run_agent(task: str, timeout: int, agent_config: AgentBenchmarkConfig) -> None:
-    """Calling to get a response"""
+    print(f"Running agbenchmark/benchmarks.py with timeout {timeout}")
-    entry_path = agent_config.get_agent_entry_path()
+    command = [sys.executable, "-m", "agbenchmark_config.benchmarks", str(task)]
    print(f"Running '{entry_path}' with timeout {timeout}")
    command = [sys.executable, entry_path, str(task)]
    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
        cwd=agent_config.get_agent_directory(),
        bufsize=1,
    )
--- a/benchmark/agbenchmark/conftest.py
+++ b/benchmark/agbenchmark/conftest.py
@@ -53,7 +53,7 @@ def load_config_from_request(request: Any) -> AgentBenchmarkConfig:
        raise
-def resolve_workspace(workspace: Path) -> Path:
+def resolve_workspace_path(workspace: Path) -> Path:
    """
    This function resolves the workspace path.
@@ -83,10 +83,10 @@ def resolve_workspace(workspace: Path) -> Path:
            return path_value
        else:
            raise ValueError("Invalid workspace path expression.")
-    elif isinstance(workspace, Path):
+    elif isinstance(workspace, str):
-        return os.path.abspath(workspace)
+        return os.path.abspath(Path.cwd() / workspace)
    else:
-        raise ValueError("Invalid workspace type. Expected str or Path.")
+        raise ValueError("Invalid workspace type. Expected str")
@pytest.fixture(scope="module")
@@ -119,15 +119,12 @@ def config(request: Any) -> Any:
    config["AgentBenchmarkConfig"] = agent_benchmark_config
-    if isinstance(config["workspace"], str):
+    config["workspace"]["input"] = resolve_workspace_path(
-        config["workspace"] = resolve_workspace(agent_benchmark_config.workspace)
+        agent_benchmark_config.workspace.input
-    else:  # it's a input output dict
+    )
-        config["workspace"]["input"] = resolve_workspace(
+    config["workspace"]["output"] = resolve_workspace_path(
-            agent_benchmark_config.workspace / "input"
+        agent_benchmark_config.workspace.output
-        )
+    )
        config["workspace"]["output"] = resolve_workspace(
            agent_benchmark_config.workspace / "output"
        )
    return config
--- a/benchmark/agbenchmark/utils/data_types.py
+++ b/benchmark/agbenchmark/utils/data_types.py
@@ -17,7 +17,9 @@ class DifficultyLevel(Enum):
    expert = "expert"
    human = "human"
-
+class Workspace(BaseModel):
    input: str
    output: str
 # map from enum to difficulty level (numeric)
 DIFFICULTY_MAP = {
    DifficultyLevel.interface: 1,
@@ -85,7 +87,7 @@ class AgentBenchmarkConfig(BaseModel):
    agent_benchmark_config_path: Path | None = None
    entry_path: str
-    workspace: Path
+    workspace: Workspace
    reports_folder: Path | None = None
    api_mode: bool = False
    host: str | None
`@@ -1 +1 @@`
	`{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"}`	`{"workspace": {"input": "auto_gpt_workspace", "output":"auto_gpt_workspace" }, "entry_path": "agbenchmark.benchmarks"}`