mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-11 01:04:20 +01:00
Add agbenchmark routes (#10)
Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
15
.flake8
Normal file
15
.flake8
Normal file
@@ -0,0 +1,15 @@
|
||||
[flake8]
|
||||
max-line-length = 88
|
||||
select = "E303, W293, W291, W292, E305, E231, E302"
|
||||
exclude =
|
||||
.tox,
|
||||
__pycache__,
|
||||
*.pyc,
|
||||
.env
|
||||
venv*/*,
|
||||
.venv/*,
|
||||
reports/*,
|
||||
dist/*,
|
||||
agent/*,
|
||||
code,
|
||||
agbenchmark/challenges/*
|
||||
107
.github/workflows/ci.yml
vendored
Normal file
107
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
branches: [master]
|
||||
schedule:
|
||||
- cron: '0 8 * * *'
|
||||
push:
|
||||
branches: [master, ci-test*]
|
||||
pull_request:
|
||||
branches: [stable, master, release-*]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
min-python-version: '3.10'
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
export POETRY_VIRTUALENVS_IN_PROJECT=true
|
||||
poetry install -vvv
|
||||
|
||||
- name: Lint with flake8
|
||||
run: poetry run flake8
|
||||
|
||||
- name: Check black formatting
|
||||
run: poetry run black . --exclude test.py --check
|
||||
if: success() || failure()
|
||||
|
||||
- name: Check isort formatting
|
||||
run: poetry run isort . --check
|
||||
if: success() || failure()
|
||||
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
submodules: true
|
||||
token: ${{ secrets.GH_TOKEN }}
|
||||
|
||||
- name: Setup Chrome and ChromeDriver
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y wget
|
||||
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||
sudo dpkg -i google-chrome-stable_current_amd64.deb
|
||||
sudo apt-get install -f
|
||||
|
||||
|
||||
- name: Set up Python ${{ env.min-python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ env.min-python-version }}
|
||||
|
||||
- id: get_date
|
||||
name: Get date
|
||||
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Install Poetry
|
||||
run: |
|
||||
curl -sSL https://install.python-poetry.org | python -
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: Run regression tests
|
||||
run: |
|
||||
poetry run python -m autogpt &
|
||||
newman run https://raw.githubusercontent.com/Significant-Gravitas/devtool-postman/master/Postman%20Collections/devtool_experience.json --env-var "url= http://127.0.0.1:8000" || echo "The backend is not ready yet, so the tests will fail"
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
AGENT_NAME: ${{ matrix.agent-name }}
|
||||
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
|
||||
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
|
||||
HELICONE_CACHE_ENABLED: false
|
||||
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
|
||||
REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}
|
||||
@@ -1,11 +0,0 @@
|
||||
import subprocess
|
||||
|
||||
if __name__ == "__main__":
|
||||
command = [
|
||||
"poetry",
|
||||
"run",
|
||||
"python",
|
||||
"-m",
|
||||
"autogpt",
|
||||
]
|
||||
subprocess.run(command)
|
||||
@@ -1 +0,0 @@
|
||||
{"workspace": "agbenchmark/workspace", "entry_path": "agbenchmark.benchmarks", "api_mode": "True", "host": "http://localhost:8000"}
|
||||
@@ -1 +0,0 @@
|
||||
{}
|
||||
@@ -5,10 +5,13 @@ from dotenv import load_dotenv
|
||||
|
||||
import autogpt.agent
|
||||
import autogpt.db
|
||||
from autogpt.benchmark_integration import add_benchmark_routes
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Runs the agent server"""
|
||||
load_dotenv()
|
||||
router = add_benchmark_routes()
|
||||
|
||||
database_name = os.getenv("DATABASE_STRING")
|
||||
print(database_name)
|
||||
port = os.getenv("PORT")
|
||||
@@ -19,4 +22,4 @@ if __name__ == "__main__":
|
||||
agent = Agent.setup_agent(auto_gpt.task_handler, auto_gpt.step_handler)
|
||||
agent.db = database
|
||||
agent.workspace = workspace
|
||||
agent.start(port=port)
|
||||
agent.start(port=port, router=router)
|
||||
|
||||
28
autogpt/benchmark_integration.py
Normal file
28
autogpt/benchmark_integration.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from pathlib import Path
|
||||
|
||||
from agbenchmark.app import get_artifact, get_skill_tree
|
||||
from agent_protocol.agent import base_router
|
||||
from fastapi import APIRouter
|
||||
from fastapi import (
|
||||
HTTPException as FastAPIHTTPException, # Import HTTPException from FastAPI
|
||||
)
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
|
||||
def add_benchmark_routes():
|
||||
new_router = APIRouter()
|
||||
|
||||
@new_router.get("/skill_tree")
|
||||
async def get_skill_tree_endpoint() -> dict: # Renamed to avoid a clash with the function import
|
||||
return get_skill_tree()
|
||||
|
||||
@new_router.get("/agent/challenges/{challenge_id}/artifacts/{artifact_id}")
|
||||
async def get_artifact_endpoint(
|
||||
challenge_id: str, artifact_id: str
|
||||
) -> FileResponse: # Added return type annotation
|
||||
return get_artifact(challenge_id, artifact_id)
|
||||
|
||||
# Include the new router in the base router
|
||||
base_router.include_router(new_router)
|
||||
|
||||
return base_router
|
||||
136
autogpt/db.py
136
autogpt/db.py
@@ -5,11 +5,20 @@ IT IS NOT ADVISED TO USE THIS IN PRODUCTION!
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Boolean, LargeBinary
|
||||
from sqlalchemy.orm import relationship, sessionmaker, DeclarativeBase
|
||||
|
||||
from agent_protocol import Artifact, Step, Task, TaskDB
|
||||
from agent_protocol.models import Status, TaskInput
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
Column,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
LargeBinary,
|
||||
String,
|
||||
create_engine,
|
||||
)
|
||||
from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
@@ -20,7 +29,7 @@ class DataNotFoundError(Exception):
|
||||
|
||||
|
||||
class TaskModel(Base):
|
||||
__tablename__ = 'tasks'
|
||||
__tablename__ = "tasks"
|
||||
|
||||
task_id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
input = Column(String)
|
||||
@@ -31,10 +40,10 @@ class TaskModel(Base):
|
||||
|
||||
|
||||
class StepModel(Base):
|
||||
__tablename__ = 'steps'
|
||||
__tablename__ = "steps"
|
||||
|
||||
step_id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
task_id = Column(Integer, ForeignKey('tasks.task_id'))
|
||||
task_id = Column(Integer, ForeignKey("tasks.task_id"))
|
||||
name = Column(String)
|
||||
status = Column(String)
|
||||
is_last = Column(Boolean, default=False)
|
||||
@@ -44,18 +53,19 @@ class StepModel(Base):
|
||||
|
||||
|
||||
class ArtifactModel(Base):
|
||||
__tablename__ = 'artifacts'
|
||||
__tablename__ = "artifacts"
|
||||
|
||||
artifact_id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
task_id = Column(Integer, ForeignKey('tasks.task_id'))
|
||||
step_id = Column(Integer, ForeignKey('steps.step_id'))
|
||||
task_id = Column(Integer, ForeignKey("tasks.task_id"))
|
||||
step_id = Column(Integer, ForeignKey("steps.step_id"))
|
||||
file_name = Column(String)
|
||||
relative_path = Column(String)
|
||||
file_data = Column(LargeBinary)
|
||||
|
||||
task = relationship("TaskModel", back_populates="artifacts")
|
||||
|
||||
#sqlite:///{database_name}
|
||||
|
||||
# sqlite:///{database_name}
|
||||
class AgentDB(TaskDB):
|
||||
def __init__(self, database_string) -> None:
|
||||
super().__init__()
|
||||
@@ -64,30 +74,59 @@ class AgentDB(TaskDB):
|
||||
self.Session = sessionmaker(bind=self.engine)
|
||||
print("Databases Created")
|
||||
|
||||
async def create_task(self, input: Optional[str], additional_input: Optional[TaskInput] = None,
|
||||
artifacts: List[Artifact] = None, steps: List[Step] = None) -> Task:
|
||||
async def create_task(
|
||||
self,
|
||||
input: Optional[str],
|
||||
additional_input: Optional[TaskInput] = None,
|
||||
artifacts: List[Artifact] = None,
|
||||
steps: List[Step] = None,
|
||||
) -> Task:
|
||||
session = self.Session()
|
||||
new_task = TaskModel(input=input, additional_input=additional_input.json() if additional_input else None)
|
||||
new_task = TaskModel(
|
||||
input=input,
|
||||
additional_input=additional_input.json() if additional_input else None,
|
||||
)
|
||||
session.add(new_task)
|
||||
session.commit()
|
||||
session.refresh(new_task)
|
||||
return await self.get_task(new_task.task_id)
|
||||
|
||||
async def create_step(self, task_id: str, name: Optional[str] = None, is_last: bool = False,
|
||||
additional_properties: Optional[Dict[str, str]] = None) -> Step:
|
||||
async def create_step(
|
||||
self,
|
||||
task_id: str,
|
||||
name: Optional[str] = None,
|
||||
is_last: bool = False,
|
||||
additional_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Step:
|
||||
session = self.Session()
|
||||
new_step = StepModel(task_id=task_id, name=name, status="created", is_last=is_last,
|
||||
additional_properties=additional_properties)
|
||||
new_step = StepModel(
|
||||
task_id=task_id,
|
||||
name=name,
|
||||
status="created",
|
||||
is_last=is_last,
|
||||
additional_properties=additional_properties,
|
||||
)
|
||||
session.add(new_step)
|
||||
session.commit()
|
||||
session.refresh(new_step)
|
||||
return await self.get_step(task_id, new_step.step_id)
|
||||
|
||||
async def create_artifact(self, task_id: str, file_name: str, relative_path: Optional[str] = None,
|
||||
step_id: Optional[str] = None, file_data: bytes | None = None) -> Artifact:
|
||||
async def create_artifact(
|
||||
self,
|
||||
task_id: str,
|
||||
file_name: str,
|
||||
relative_path: Optional[str] = None,
|
||||
step_id: Optional[str] = None,
|
||||
file_data: bytes | None = None,
|
||||
) -> Artifact:
|
||||
session = self.Session()
|
||||
new_artifact = ArtifactModel(task_id=task_id, step_id=step_id, file_name=file_name,
|
||||
relative_path=relative_path, file_data=file_data)
|
||||
new_artifact = ArtifactModel(
|
||||
task_id=task_id,
|
||||
step_id=step_id,
|
||||
file_name=file_name,
|
||||
relative_path=relative_path,
|
||||
file_data=file_data,
|
||||
)
|
||||
session.add(new_artifact)
|
||||
session.commit()
|
||||
session.refresh(new_artifact)
|
||||
@@ -98,11 +137,18 @@ class AgentDB(TaskDB):
|
||||
session = self.Session()
|
||||
task_obj = session.query(TaskModel).filter_by(task_id=task_id).first()
|
||||
if task_obj:
|
||||
task = Task(task_id=task_obj.task_id, input=task_obj.input, additional_input=task_obj.additional_input, steps=[])
|
||||
task = Task(
|
||||
task_id=task_obj.task_id,
|
||||
input=task_obj.input,
|
||||
additional_input=task_obj.additional_input,
|
||||
steps=[],
|
||||
)
|
||||
steps_obj = session.query(StepModel).filter_by(task_id=task_id).all()
|
||||
if steps_obj:
|
||||
for step in steps_obj:
|
||||
status = Status.created if step.status == "created" else Status.completed
|
||||
status = (
|
||||
Status.created if step.status == "created" else Status.completed
|
||||
)
|
||||
task.steps.append(
|
||||
Step(
|
||||
task_id=step.task_id,
|
||||
@@ -117,7 +163,6 @@ class AgentDB(TaskDB):
|
||||
else:
|
||||
raise DataNotFoundError("Task not found")
|
||||
|
||||
|
||||
async def get_step(self, task_id: int, step_id: int) -> Step:
|
||||
session = self.Session()
|
||||
if (
|
||||
@@ -126,13 +171,24 @@ class AgentDB(TaskDB):
|
||||
.first()
|
||||
):
|
||||
status = Status.completed if step.status == "completed" else Status.created
|
||||
return Step(task_id=task_id, step_id=step_id, name=step.name, status=status, is_last=step.is_last == 1,
|
||||
additional_properties=step.additional_properties)
|
||||
return Step(
|
||||
task_id=task_id,
|
||||
step_id=step_id,
|
||||
name=step.name,
|
||||
status=status,
|
||||
is_last=step.is_last == 1,
|
||||
additional_properties=step.additional_properties,
|
||||
)
|
||||
else:
|
||||
raise DataNotFoundError("Step not found")
|
||||
|
||||
async def update_step(self, task_id: str, step_id: str, status: str,
|
||||
additional_properties: Optional[Dict[str, str]] = None) -> Step:
|
||||
async def update_step(
|
||||
self,
|
||||
task_id: str,
|
||||
step_id: str,
|
||||
status: str,
|
||||
additional_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Step:
|
||||
session = self.Session()
|
||||
if (
|
||||
step := session.query(StepModel)
|
||||
@@ -153,8 +209,11 @@ class AgentDB(TaskDB):
|
||||
.filter_by(task_id=task_id, artifact_id=artifact_id)
|
||||
.first()
|
||||
):
|
||||
return Artifact(artifact_id=artifact.artifact_id, file_name=artifact.file_name,
|
||||
relative_path=artifact.relative_path)
|
||||
return Artifact(
|
||||
artifact_id=artifact.artifact_id,
|
||||
file_name=artifact.file_name,
|
||||
relative_path=artifact.relative_path,
|
||||
)
|
||||
else:
|
||||
raise DataNotFoundError("Artifact not found")
|
||||
|
||||
@@ -172,9 +231,24 @@ class AgentDB(TaskDB):
|
||||
async def list_tasks(self) -> List[Task]:
|
||||
session = self.Session()
|
||||
tasks = session.query(TaskModel).all()
|
||||
return [Task(task_id=task.task_id, input=task.input, additional_input=task.additional_input) for task in tasks]
|
||||
return [
|
||||
Task(
|
||||
task_id=task.task_id,
|
||||
input=task.input,
|
||||
additional_input=task.additional_input,
|
||||
)
|
||||
for task in tasks
|
||||
]
|
||||
|
||||
async def list_steps(self, task_id: str) -> List[Step]:
|
||||
session = self.Session()
|
||||
steps = session.query(StepModel).filter_by(task_id=task_id).all()
|
||||
return [Step(task_id=task_id, step_id=step.step_id, name=step.name, status=step.status) for step in steps]
|
||||
return [
|
||||
Step(
|
||||
task_id=task_id,
|
||||
step_id=step.step_id,
|
||||
name=step.name,
|
||||
status=step.status,
|
||||
)
|
||||
for step in steps
|
||||
]
|
||||
|
||||
@@ -27,7 +27,7 @@ def test_table_creation():
|
||||
)
|
||||
assert cursor.fetchone() is not None
|
||||
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -39,7 +39,7 @@ async def test_create_task():
|
||||
|
||||
task = await agent_db.create_task("task_input")
|
||||
assert task.input == "task_input"
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -49,7 +49,7 @@ async def test_create_and_get_task():
|
||||
await agent_db.create_task("task_input")
|
||||
task = await agent_db.get_task(1)
|
||||
assert task.input == "task_input"
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -58,7 +58,7 @@ async def test_get_task_not_found():
|
||||
agent_db = AgentDB(db_name)
|
||||
with pytest.raises(DataNotFoundError):
|
||||
await agent_db.get_task(9999)
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -69,7 +69,7 @@ async def test_create_and_get_step():
|
||||
await agent_db.create_step(1, "step_name")
|
||||
step = await agent_db.get_step(1, 1)
|
||||
assert step.name == "step_name"
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -82,7 +82,7 @@ async def test_updating_step():
|
||||
|
||||
step = await agent_db.get_step(1, 1)
|
||||
assert step.status.value == "completed"
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -91,7 +91,7 @@ async def test_get_step_not_found():
|
||||
agent_db = AgentDB(db_name)
|
||||
with pytest.raises(DataNotFoundError):
|
||||
await agent_db.get_step(9999, 9999)
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -113,7 +113,7 @@ async def test_get_artifact():
|
||||
assert fetched_artifact.artifact_id == artifact.artifact_id
|
||||
assert fetched_artifact.file_name == "sample_file.txt"
|
||||
assert fetched_artifact.relative_path == "/path/to/sample_file.txt"
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -137,7 +137,7 @@ async def test_get_artifact_file():
|
||||
|
||||
# Then: The fetched artifact matches the original
|
||||
assert fetched_artifact == sample_data
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -156,7 +156,7 @@ async def test_list_tasks():
|
||||
task_ids = [task.task_id for task in fetched_tasks]
|
||||
assert task1.task_id in task_ids
|
||||
assert task2.task_id in task_ids
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -176,4 +176,4 @@ async def test_list_steps():
|
||||
step_ids = [step.step_id for step in fetched_steps]
|
||||
assert step1.step_id in step_ids
|
||||
assert step2.step_id in step_ids
|
||||
os.remove(db_name.split('///')[1])
|
||||
os.remove(db_name.split("///")[1])
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import dash
|
||||
from dash import dcc, html, Input, Output
|
||||
import dash_bootstrap_components as dbc
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import json
|
||||
import os
|
||||
from dash import Input, Output, dcc, html
|
||||
|
||||
|
||||
# Sample data
|
||||
# Function to load data based on the selected folder
|
||||
@@ -14,45 +16,81 @@ def load_data(folder_name):
|
||||
|
||||
|
||||
# List the available subfolders in the reports directory
|
||||
available_folders = sorted([f for f in os.listdir("./agbenchmark/reports") if os.path.isdir(os.path.join("./agbenchmark/reports", f))])
|
||||
available_folders = sorted(
|
||||
[
|
||||
f
|
||||
for f in os.listdir("./agbenchmark/reports")
|
||||
if os.path.isdir(os.path.join("./agbenchmark/reports", f))
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
||||
|
||||
|
||||
def generate_table(data_frame):
|
||||
return dbc.Table(
|
||||
# Header
|
||||
[html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))] +
|
||||
[html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))]
|
||||
+
|
||||
# Body
|
||||
[html.Tbody([
|
||||
html.Tr([
|
||||
html.Td(data_frame.iloc[i][col], style={'backgroundColor': '#77dd77' if data_frame.iloc[i]['Status'] == 'Passed' else '#ff6961'}) for col in data_frame.columns
|
||||
]) for i in range(len(data_frame))
|
||||
])]
|
||||
[
|
||||
html.Tbody(
|
||||
[
|
||||
html.Tr(
|
||||
[
|
||||
html.Td(
|
||||
data_frame.iloc[i][col],
|
||||
style={
|
||||
"backgroundColor": "#77dd77"
|
||||
if data_frame.iloc[i]["Status"] == "Passed"
|
||||
else "#ff6961"
|
||||
},
|
||||
)
|
||||
for col in data_frame.columns
|
||||
]
|
||||
)
|
||||
for i in range(len(data_frame))
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
app.layout = dbc.Container([
|
||||
dbc.Row([
|
||||
dbc.Col(html.H1("AG Benchmark Tests Overview"), width={"size": 6, "offset": 3}),
|
||||
]),
|
||||
|
||||
dbc.Row([
|
||||
dbc.Col([
|
||||
dcc.Dropdown(
|
||||
id="folder-selector",
|
||||
options=[{"label": folder_name, "value": folder_name} for folder_name in available_folders],
|
||||
value=None,
|
||||
placeholder="Select a folder to load data"
|
||||
),
|
||||
html.Div(id="folder-data-output")
|
||||
])
|
||||
]),
|
||||
])
|
||||
|
||||
app.layout = dbc.Container(
|
||||
[
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(
|
||||
html.H1("AG Benchmark Tests Overview"),
|
||||
width={"size": 6, "offset": 3},
|
||||
),
|
||||
]
|
||||
),
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
dcc.Dropdown(
|
||||
id="folder-selector",
|
||||
options=[
|
||||
{"label": folder_name, "value": folder_name}
|
||||
for folder_name in available_folders
|
||||
],
|
||||
value=None,
|
||||
placeholder="Select a folder to load data",
|
||||
),
|
||||
html.Div(id="folder-data-output"),
|
||||
]
|
||||
)
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output("folder-data-output", "children"),
|
||||
[Input("folder-selector", "value")]
|
||||
Output("folder-data-output", "children"), [Input("folder-selector", "value")]
|
||||
)
|
||||
def display_folder_data(selected_folder):
|
||||
if not selected_folder:
|
||||
@@ -61,66 +99,102 @@ def display_folder_data(selected_folder):
|
||||
data = load_data(selected_folder)
|
||||
|
||||
# Extract the necessary data from the report
|
||||
command = data['command']
|
||||
benchmark_git_commit_sha = data['benchmark_git_commit_sha'] or "N/A"
|
||||
benchmark_git_commit_sha = benchmark_git_commit_sha.split('/')[-1][:8] if benchmark_git_commit_sha != "N/A" else "N/A"
|
||||
agent_git_commit_sha = data['agent_git_commit_sha'] or "N/A"
|
||||
agent_git_commit_sha = agent_git_commit_sha.split('/')[-1][:8] if agent_git_commit_sha != "N/A" else "N/A"
|
||||
completion_time = data['completion_time']
|
||||
benchmark_start_time = data['benchmark_start_time']
|
||||
run_time = data['metrics']['run_time']
|
||||
highest_difficulty = data['metrics']['highest_difficulty']
|
||||
command = data["command"]
|
||||
benchmark_git_commit_sha = data["benchmark_git_commit_sha"] or "N/A"
|
||||
benchmark_git_commit_sha = (
|
||||
benchmark_git_commit_sha.split("/")[-1][:8]
|
||||
if benchmark_git_commit_sha != "N/A"
|
||||
else "N/A"
|
||||
)
|
||||
agent_git_commit_sha = data["agent_git_commit_sha"] or "N/A"
|
||||
agent_git_commit_sha = (
|
||||
agent_git_commit_sha.split("/")[-1][:8]
|
||||
if agent_git_commit_sha != "N/A"
|
||||
else "N/A"
|
||||
)
|
||||
completion_time = data["completion_time"]
|
||||
benchmark_start_time = data["benchmark_start_time"]
|
||||
run_time = data["metrics"]["run_time"]
|
||||
highest_difficulty = data["metrics"]["highest_difficulty"]
|
||||
|
||||
return [
|
||||
dbc.Row([
|
||||
dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3),
|
||||
|
||||
dbc.Col(html.Div("Run Time: " + run_time), width=3),
|
||||
dbc.Col(html.Div("Highest Difficulty Achieved: " + highest_difficulty), width=3),
|
||||
dbc.Col(html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha), width=3),
|
||||
], className="mb-3"),
|
||||
|
||||
dbc.Row([
|
||||
dbc.Col(html.Div("Completion Time: " + completion_time), width=3),
|
||||
dbc.Col(html.Div("Command: " + command), width=3),
|
||||
dbc.Col(), # Empty column for alignment
|
||||
|
||||
dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3),
|
||||
], className="mb-3"),
|
||||
|
||||
dbc.Row([
|
||||
dbc.Col([
|
||||
dcc.Graph(id="category-pass-rate"),
|
||||
])
|
||||
]),
|
||||
dbc.Row([
|
||||
dbc.Col([
|
||||
generate_table(pd.DataFrame({
|
||||
'Test Name': list(data['tests'].keys()),
|
||||
'Status': ['Passed' if t['metrics'].get('success', False) else 'Failed' for t in data['tests'].values()]
|
||||
}))
|
||||
])
|
||||
])
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3),
|
||||
dbc.Col(html.Div("Run Time: " + run_time), width=3),
|
||||
dbc.Col(
|
||||
html.Div("Highest Difficulty Achieved: " + highest_difficulty),
|
||||
width=3,
|
||||
),
|
||||
dbc.Col(
|
||||
html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha),
|
||||
width=3,
|
||||
),
|
||||
],
|
||||
className="mb-3",
|
||||
),
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(html.Div("Completion Time: " + completion_time), width=3),
|
||||
dbc.Col(html.Div("Command: " + command), width=3),
|
||||
dbc.Col(), # Empty column for alignment
|
||||
dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3),
|
||||
],
|
||||
className="mb-3",
|
||||
),
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
dcc.Graph(id="category-pass-rate"),
|
||||
]
|
||||
)
|
||||
]
|
||||
),
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
generate_table(
|
||||
pd.DataFrame(
|
||||
{
|
||||
"Test Name": list(data["tests"].keys()),
|
||||
"Status": [
|
||||
"Passed"
|
||||
if t["metrics"].get("success", False)
|
||||
else "Failed"
|
||||
for t in data["tests"].values()
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
]
|
||||
)
|
||||
]
|
||||
),
|
||||
]
|
||||
|
||||
@app.callback(
|
||||
Output("subtest-output", "children"),
|
||||
[Input("test-selector", "value")]
|
||||
)
|
||||
|
||||
@app.callback(Output("subtest-output", "children"), [Input("test-selector", "value")])
|
||||
def display_subtests(selected_test):
|
||||
if not selected_test:
|
||||
return "No test selected"
|
||||
|
||||
subtests = data['tests'][selected_test]['tests']
|
||||
df = pd.DataFrame({
|
||||
'Subtest Name': list(subtests.keys()),
|
||||
'Status': ['Passed' if st['metrics']['success'] else 'Failed' for st in subtests.values()]
|
||||
})
|
||||
subtests = data["tests"][selected_test]["tests"]
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"Subtest Name": list(subtests.keys()),
|
||||
"Status": [
|
||||
"Passed" if st["metrics"]["success"] else "Failed"
|
||||
for st in subtests.values()
|
||||
],
|
||||
}
|
||||
)
|
||||
return generate_table(df)
|
||||
|
||||
|
||||
@app.callback(
|
||||
Output('category-pass-rate', 'figure'),
|
||||
[Input('folder-selector', 'value')]
|
||||
Output("category-pass-rate", "figure"), [Input("folder-selector", "value")]
|
||||
)
|
||||
def update_radar_chart(selected_folder):
|
||||
if not selected_folder:
|
||||
@@ -129,11 +203,11 @@ def update_radar_chart(selected_folder):
|
||||
data = load_data(selected_folder)
|
||||
# Extract all categories from the data
|
||||
categories = set()
|
||||
for test in data['tests'].keys():
|
||||
if 'category' not in data['tests'][test]:
|
||||
for test in data["tests"].keys():
|
||||
if "category" not in data["tests"][test]:
|
||||
print(f"Test {test} has no category")
|
||||
continue
|
||||
cat = data['tests'][test]['category']
|
||||
cat = data["tests"][test]["category"]
|
||||
categories.update(cat)
|
||||
|
||||
# Calculate pass rates for each category
|
||||
@@ -141,31 +215,49 @@ def update_radar_chart(selected_folder):
|
||||
for cat in categories:
|
||||
total_tests = 0
|
||||
passed_tests = 0
|
||||
for test in data['tests'].keys():
|
||||
if 'category' not in data['tests'][test] or cat not in data['tests'][test]['category']:
|
||||
for test in data["tests"].keys():
|
||||
if (
|
||||
"category" not in data["tests"][test]
|
||||
or cat not in data["tests"][test]["category"]
|
||||
):
|
||||
continue
|
||||
total_tests = total_tests + 1 if cat in data['tests'][test]['category'] else total_tests
|
||||
passed_tests = passed_tests + 1 if cat in data['tests'][test]['category'] and data['tests'][test]['metrics']['success'] else passed_tests
|
||||
total_tests = (
|
||||
total_tests + 1
|
||||
if cat in data["tests"][test]["category"]
|
||||
else total_tests
|
||||
)
|
||||
passed_tests = (
|
||||
passed_tests + 1
|
||||
if cat in data["tests"][test]["category"]
|
||||
and data["tests"][test]["metrics"]["success"]
|
||||
else passed_tests
|
||||
)
|
||||
pass_rate[cat] = (passed_tests / total_tests) * 100
|
||||
|
||||
df = pd.DataFrame({
|
||||
'Category': list(pass_rate.keys()),
|
||||
'Pass Rate (%)': list(pass_rate.values())
|
||||
}).sort_values(by=['Category'], ascending=True)
|
||||
|
||||
fig = px.line_polar(df, r='Pass Rate (%)', theta='Category', line_close=True, template="plotly", title="Pass Rate by Category")
|
||||
fig.update_traces(fill='toself')
|
||||
df = pd.DataFrame(
|
||||
{"Category": list(pass_rate.keys()), "Pass Rate (%)": list(pass_rate.values())}
|
||||
).sort_values(by=["Category"], ascending=True)
|
||||
|
||||
fig = px.line_polar(
|
||||
df,
|
||||
r="Pass Rate (%)",
|
||||
theta="Category",
|
||||
line_close=True,
|
||||
template="plotly",
|
||||
title="Pass Rate by Category",
|
||||
)
|
||||
fig.update_traces(fill="toself")
|
||||
|
||||
# Set the radial axis maximum range to 100
|
||||
fig.update_layout(
|
||||
polar=dict(
|
||||
radialaxis=dict(
|
||||
visible=True,
|
||||
range=[0, 100] # Setting range from 0 to 100%
|
||||
visible=True, range=[0, 100] # Setting range from 0 to 100%
|
||||
)
|
||||
)
|
||||
)
|
||||
return fig
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run_server(debug=True)
|
||||
|
||||
13
mypy.ini
Normal file
13
mypy.ini
Normal file
@@ -0,0 +1,13 @@
|
||||
[mypy]
|
||||
namespace_packages = True
|
||||
follow_imports = skip
|
||||
check_untyped_defs = True
|
||||
disallow_untyped_defs = True
|
||||
exclude = ^(agbenchmark/challenges/|agent/|venv|venv-dev)
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-agbenchmark.utils.data_types.*]
|
||||
ignore_errors = True
|
||||
|
||||
[mypy-numpy.*]
|
||||
ignore_errors = True
|
||||
472
poetry.lock
generated
472
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,7 @@ black = "^23.3.0"
|
||||
pre-commit = "^3.3.3"
|
||||
mypy = "^1.4.1"
|
||||
flake8 = "^6.0.0"
|
||||
agbenchmark = "^0.0.7"
|
||||
agbenchmark = "^0.0.9"
|
||||
types-requests = "^2.31.0.2"
|
||||
pytest = "^7.4.0"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
@@ -39,3 +39,21 @@ dash-bootstrap-components = "^1.4.2"
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.black]
|
||||
line-length = 88
|
||||
target-version = ['py310']
|
||||
include = '\.pyi?$'
|
||||
packages = ["autogpt"]
|
||||
extend-exclude = '(/dist|/.venv|/venv|/build|/agent|agbenchmark/challenges)/'
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
multi_line_output = 3
|
||||
include_trailing_comma = true
|
||||
force_grid_wrap = 0
|
||||
use_parentheses = true
|
||||
ensure_newline_before_comments = true
|
||||
line_length = 88
|
||||
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
|
||||
skip_glob = [".tox", "__pycache__", "*.pyc", "venv*/*", "reports", "venv", "env", "node_modules", ".env", ".venv", "dist", "agent/*", "agbenchmark/challenges/*"]
|
||||
|
||||
Reference in New Issue
Block a user