mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-04 13:54:31 +01:00
Integrate Auto-GPT with Auto-GPT-Benchmarks (#4987)
* WIP Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> * WIP Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> * Update config for benchmark changes (#4883) * Add Helicone * Add reports, consolidate, update benchmark files (#4941) * updating config * add reports, consolidate, update benchmark files * Update benchmarks.py * Change entrypath and add __init__.py * Remove Helicone integration because we now have proxy at the system level * Support more regression tests * Fix Auto-GPT/benchmark integration Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> * Remove cutoff * Install agbenchmark and make continuous mode dynamic Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> --------- Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com> Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
This commit is contained in:
0
agbenchmark/__init__.py
Normal file
0
agbenchmark/__init__.py
Normal file
@@ -1,4 +1,6 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
|
||||
from autogpt.agents import Agent
|
||||
from autogpt.app.main import run_interaction_loop
|
||||
@@ -12,14 +14,15 @@ from autogpt.workspace import Workspace
|
||||
PROJECT_DIR = Path().resolve()
|
||||
|
||||
|
||||
def run_task(task) -> None:
|
||||
agent = bootstrap_agent(task)
|
||||
def run_specific_agent(task, continuous_mode=False) -> Tuple[str, int]:
|
||||
agent = bootstrap_agent(task, continuous_mode)
|
||||
run_interaction_loop(agent)
|
||||
|
||||
|
||||
def bootstrap_agent(task):
|
||||
def bootstrap_agent(task, continuous_mode) -> Agent:
|
||||
config = ConfigBuilder.build_config_from_env(workdir=PROJECT_DIR)
|
||||
config.continuous_mode = False
|
||||
config.debug_mode = True
|
||||
config.continuous_mode = continuous_mode
|
||||
config.temperature = 0
|
||||
config.plain_output = True
|
||||
command_registry = get_command_registry(config)
|
||||
@@ -29,7 +32,7 @@ def bootstrap_agent(task):
|
||||
ai_config = AIConfig(
|
||||
ai_name="Auto-GPT",
|
||||
ai_role="a multi-purpose AI assistant.",
|
||||
ai_goals=[task.user_input],
|
||||
ai_goals=[task],
|
||||
)
|
||||
ai_config.command_registry = command_registry
|
||||
return Agent(
|
||||
@@ -50,3 +53,12 @@ def get_command_registry(config: Config):
|
||||
for command_category in enabled_command_categories:
|
||||
command_registry.import_commands(command_category)
|
||||
return command_registry
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# The first argument is the script name itself, second is the task
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python script.py <task>")
|
||||
sys.exit(1)
|
||||
task = sys.argv[1]
|
||||
run_specific_agent(task, continuous_mode=True)
|
||||
4
agbenchmark/config.json
Normal file
4
agbenchmark/config.json
Normal file
@@ -0,0 +1,4 @@
|
||||
{
|
||||
"workspace": "autogpt/workspace/auto_gpt_workspace",
|
||||
"entry_path": "agbenchmark.benchmarks"
|
||||
}
|
||||
24
agbenchmark/regression_tests.json
Normal file
24
agbenchmark/regression_tests.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"TestBasicCodeGeneration": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestWriteFile"
|
||||
],
|
||||
"data_path": "agbenchmark/challenges/code/d3"
|
||||
},
|
||||
"TestBasicMemory": {
|
||||
"difficulty": "basic",
|
||||
"data_path": "agbenchmark/challenges/memory/m1"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestWriteFile"
|
||||
],
|
||||
"data_path": "agbenchmark/challenges/interface/read_file"
|
||||
},
|
||||
"TestWriteFile": {
|
||||
"dependencies": [],
|
||||
"data_path": "agbenchmark/challenges/interface/write_file"
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
beautifulsoup4>=4.12.2
|
||||
colorama==0.4.6
|
||||
distro==1.8.0
|
||||
openai==0.27.2
|
||||
openai==0.27.8
|
||||
playsound==1.2.2
|
||||
python-dotenv==1.0.0
|
||||
python-dotenv==0.21
|
||||
pyyaml==6.0
|
||||
PyPDF2
|
||||
python-docx
|
||||
@@ -31,6 +31,7 @@ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_
|
||||
prompt_toolkit>=3.0.38
|
||||
pydantic
|
||||
inflection
|
||||
agbenchmark
|
||||
|
||||
# web server
|
||||
fastapi
|
||||
|
||||
@@ -6,9 +6,9 @@ from typing import Any, Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from agbenchmark.benchmarks import run_specific_agent
|
||||
from autogpt.logs import LogCycleHandler
|
||||
from autogpt.workspace import Workspace
|
||||
from benchmarks import run_task
|
||||
from tests.challenges.schema import Task
|
||||
|
||||
|
||||
@@ -75,4 +75,4 @@ def run_challenge(
|
||||
setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
|
||||
task = Task(user_input=user_input)
|
||||
with contextlib.suppress(SystemExit):
|
||||
run_task(task)
|
||||
run_specific_agent(task.user_input)
|
||||
|
||||
Reference in New Issue
Block a user