mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 14:04:27 +01:00
Integrate benchmark and autogpt (#5208)
Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
@@ -1 +1 @@
|
|||||||
{"workspace": "auto_gpt_workspace", "entry_path": "agbenchmark.benchmarks"}
|
{"workspace": {"input": "auto_gpt_workspace", "output":"auto_gpt_workspace" }, "entry_path": "agbenchmark.benchmarks"}
|
||||||
|
|||||||
3
autogpts/autogpt/challenges_already_beaten.json
Normal file
3
autogpts/autogpt/challenges_already_beaten.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"TestWriteFile": true
|
||||||
|
}
|
||||||
@@ -75,19 +75,15 @@ def run_windows_env(process: Any, start_time: float, timeout: float) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def run_agent(task: str, timeout: int, agent_config: AgentBenchmarkConfig) -> None:
|
def run_agent(task: str, timeout: int, agent_config: AgentBenchmarkConfig) -> None:
|
||||||
"""Calling to get a response"""
|
print(f"Running agbenchmark/benchmarks.py with timeout {timeout}")
|
||||||
|
|
||||||
entry_path = agent_config.get_agent_entry_path()
|
command = [sys.executable, "-m", "agbenchmark_config.benchmarks", str(task)]
|
||||||
print(f"Running '{entry_path}' with timeout {timeout}")
|
|
||||||
|
|
||||||
command = [sys.executable, entry_path, str(task)]
|
|
||||||
|
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
command,
|
command,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
universal_newlines=True,
|
universal_newlines=True,
|
||||||
cwd=agent_config.get_agent_directory(),
|
|
||||||
bufsize=1,
|
bufsize=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ def load_config_from_request(request: Any) -> AgentBenchmarkConfig:
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
def resolve_workspace(workspace: Path) -> Path:
|
def resolve_workspace_path(workspace: Path) -> Path:
|
||||||
"""
|
"""
|
||||||
This function resolves the workspace path.
|
This function resolves the workspace path.
|
||||||
|
|
||||||
@@ -83,10 +83,10 @@ def resolve_workspace(workspace: Path) -> Path:
|
|||||||
return path_value
|
return path_value
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid workspace path expression.")
|
raise ValueError("Invalid workspace path expression.")
|
||||||
elif isinstance(workspace, Path):
|
elif isinstance(workspace, str):
|
||||||
return os.path.abspath(workspace)
|
return os.path.abspath(Path.cwd() / workspace)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid workspace type. Expected str or Path.")
|
raise ValueError("Invalid workspace type. Expected str")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
@@ -119,14 +119,11 @@ def config(request: Any) -> Any:
|
|||||||
|
|
||||||
config["AgentBenchmarkConfig"] = agent_benchmark_config
|
config["AgentBenchmarkConfig"] = agent_benchmark_config
|
||||||
|
|
||||||
if isinstance(config["workspace"], str):
|
config["workspace"]["input"] = resolve_workspace_path(
|
||||||
config["workspace"] = resolve_workspace(agent_benchmark_config.workspace)
|
agent_benchmark_config.workspace.input
|
||||||
else: # it's a input output dict
|
|
||||||
config["workspace"]["input"] = resolve_workspace(
|
|
||||||
agent_benchmark_config.workspace / "input"
|
|
||||||
)
|
)
|
||||||
config["workspace"]["output"] = resolve_workspace(
|
config["workspace"]["output"] = resolve_workspace_path(
|
||||||
agent_benchmark_config.workspace / "output"
|
agent_benchmark_config.workspace.output
|
||||||
)
|
)
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|||||||
@@ -17,7 +17,9 @@ class DifficultyLevel(Enum):
|
|||||||
expert = "expert"
|
expert = "expert"
|
||||||
human = "human"
|
human = "human"
|
||||||
|
|
||||||
|
class Workspace(BaseModel):
|
||||||
|
input: str
|
||||||
|
output: str
|
||||||
# map from enum to difficulty level (numeric)
|
# map from enum to difficulty level (numeric)
|
||||||
DIFFICULTY_MAP = {
|
DIFFICULTY_MAP = {
|
||||||
DifficultyLevel.interface: 1,
|
DifficultyLevel.interface: 1,
|
||||||
@@ -85,7 +87,7 @@ class AgentBenchmarkConfig(BaseModel):
|
|||||||
|
|
||||||
agent_benchmark_config_path: Path | None = None
|
agent_benchmark_config_path: Path | None = None
|
||||||
entry_path: str
|
entry_path: str
|
||||||
workspace: Path
|
workspace: Workspace
|
||||||
reports_folder: Path | None = None
|
reports_folder: Path | None = None
|
||||||
api_mode: bool = False
|
api_mode: bool = False
|
||||||
host: str | None
|
host: str | None
|
||||||
|
|||||||
Reference in New Issue
Block a user