Integrate Auto-GPT with Auto-GPT-Benchmarks (#4987)

* WIP

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* WIP

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Update config for benchmark changes (#4883)

* Add Helicone

* Add reports, consolidate, update benchmark files (#4941)

* updating config

* add reports, consolidate, update benchmark files

* Update benchmarks.py

* Change entrypath and add __init__.py

* Remove Helicone integration because we now have proxy at the system level

* Support more regression tests

* Fix Auto-GPT/benchmark integration

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Remove cutoff

* Install agbenchmark and make continuous mode dynamic

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

---------

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
This commit is contained in:
merwanehamadi
2023-07-22 14:58:46 -07:00
committed by GitHub
parent e0bcde178e
commit 4ada7d1784
6 changed files with 50 additions and 9 deletions

0
agbenchmark/__init__.py Normal file
View File

View File

@@ -1,4 +1,6 @@
import sys
from pathlib import Path
from typing import Tuple
from autogpt.agents import Agent
from autogpt.app.main import run_interaction_loop
@@ -12,14 +14,15 @@ from autogpt.workspace import Workspace
PROJECT_DIR = Path().resolve()
def run_task(task) -> None:
agent = bootstrap_agent(task)
def run_specific_agent(task, continuous_mode=False) -> Tuple[str, int]:
agent = bootstrap_agent(task, continuous_mode)
run_interaction_loop(agent)
def bootstrap_agent(task):
def bootstrap_agent(task, continuous_mode) -> Agent:
config = ConfigBuilder.build_config_from_env(workdir=PROJECT_DIR)
config.continuous_mode = False
config.debug_mode = True
config.continuous_mode = continuous_mode
config.temperature = 0
config.plain_output = True
command_registry = get_command_registry(config)
@@ -29,7 +32,7 @@ def bootstrap_agent(task):
ai_config = AIConfig(
ai_name="Auto-GPT",
ai_role="a multi-purpose AI assistant.",
ai_goals=[task.user_input],
ai_goals=[task],
)
ai_config.command_registry = command_registry
return Agent(
@@ -50,3 +53,12 @@ def get_command_registry(config: Config):
for command_category in enabled_command_categories:
command_registry.import_commands(command_category)
return command_registry
if __name__ == "__main__":
# The first argument is the script name itself, second is the task
if len(sys.argv) != 2:
print("Usage: python script.py <task>")
sys.exit(1)
task = sys.argv[1]
run_specific_agent(task, continuous_mode=True)

4
agbenchmark/config.json Normal file
View File

@@ -0,0 +1,4 @@
{
"workspace": "autogpt/workspace/auto_gpt_workspace",
"entry_path": "agbenchmark.benchmarks"
}

View File

@@ -0,0 +1,24 @@
{
"TestBasicCodeGeneration": {
"difficulty": "basic",
"dependencies": [
"TestWriteFile"
],
"data_path": "agbenchmark/challenges/code/d3"
},
"TestBasicMemory": {
"difficulty": "basic",
"data_path": "agbenchmark/challenges/memory/m1"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
"TestWriteFile"
],
"data_path": "agbenchmark/challenges/interface/read_file"
},
"TestWriteFile": {
"dependencies": [],
"data_path": "agbenchmark/challenges/interface/write_file"
}
}

View File

@@ -1,9 +1,9 @@
beautifulsoup4>=4.12.2
colorama==0.4.6
distro==1.8.0
openai==0.27.2
openai==0.27.8
playsound==1.2.2
python-dotenv==1.0.0
python-dotenv==0.21
pyyaml==6.0
PyPDF2
python-docx
@@ -31,6 +31,7 @@ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_
prompt_toolkit>=3.0.38
pydantic
inflection
agbenchmark
# web server
fastapi

View File

@@ -6,9 +6,9 @@ from typing import Any, Generator
import pytest
from agbenchmark.benchmarks import run_specific_agent
from autogpt.logs import LogCycleHandler
from autogpt.workspace import Workspace
from benchmarks import run_task
from tests.challenges.schema import Task
@@ -75,4 +75,4 @@ def run_challenge(
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
task = Task(user_input=user_input)
with contextlib.suppress(SystemExit):
run_task(task)
run_specific_agent(task.user_input)