Add agbenchmark routes (#10)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
merwanehamadi
2023-08-16 18:56:26 -07:00
committed by GitHub
parent d7dd3b2f0b
commit 3d13f9522e
14 changed files with 653 additions and 458 deletions

15
.flake8 Normal file
View File

@@ -0,0 +1,15 @@
[flake8]
max-line-length = 88
select = "E303, W293, W291, W292, E305, E231, E302"
exclude =
.tox,
__pycache__,
*.pyc,
.env
venv*/*,
.venv/*,
reports/*,
dist/*,
agent/*,
code,
agbenchmark/challenges/*

107
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,107 @@
name: CI
on:
workflow_dispatch:
branches: [master]
schedule:
- cron: '0 8 * * *'
push:
branches: [master, ci-test*]
pull_request:
branches: [stable, master, release-*]
jobs:
lint:
runs-on: ubuntu-latest
env:
min-python-version: '3.10'
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
submodules: true
- name: Set up Python ${{ env.min-python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ env.min-python-version }}
- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python -
- name: Install dependencies
run: |
export POETRY_VIRTUALENVS_IN_PROJECT=true
poetry install -vvv
- name: Lint with flake8
run: poetry run flake8
- name: Check black formatting
run: poetry run black . --exclude test.py --check
if: success() || failure()
- name: Check isort formatting
run: poetry run isort . --check
if: success() || failure()
tests:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
submodules: true
token: ${{ secrets.GH_TOKEN }}
- name: Setup Chrome and ChromeDriver
run: |
sudo apt-get update
sudo apt-get install -y wget
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
sudo dpkg -i google-chrome-stable_current_amd64.deb
sudo apt-get install -f
- name: Set up Python ${{ env.min-python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ env.min-python-version }}
- id: get_date
name: Get date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python -
- name: Install dependencies
run: |
poetry install
- name: Run regression tests
run: |
poetry run python -m autogpt &
newman run https://raw.githubusercontent.com/Significant-Gravitas/devtool-postman/master/Postman%20Collections/devtool_experience.json --env-var "url= http://127.0.0.1:8000" || echo "The backend is not ready yet, so the tests will fail"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AGENT_NAME: ${{ matrix.agent-name }}
HELICONE_API_KEY: ${{ secrets.HELICONE_API_KEY }}
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt
HELICONE_CACHE_ENABLED: false
HELICONE_PROPERTY_AGENT: ${{ matrix.agent-name }}
REPORT_LOCATION: ${{ format('../../reports/{0}', matrix.agent-name) }}

View File

@@ -1,11 +0,0 @@
import subprocess
if __name__ == "__main__":
command = [
"poetry",
"run",
"python",
"-m",
"autogpt",
]
subprocess.run(command)

View File

@@ -1 +0,0 @@
{"workspace": "agbenchmark/workspace", "entry_path": "agbenchmark.benchmarks", "api_mode": "True", "host": "http://localhost:8000"}

View File

@@ -1 +0,0 @@
{}

View File

@@ -5,10 +5,13 @@ from dotenv import load_dotenv
import autogpt.agent
import autogpt.db
from autogpt.benchmark_integration import add_benchmark_routes
if __name__ == "__main__":
"""Runs the agent server"""
load_dotenv()
router = add_benchmark_routes()
database_name = os.getenv("DATABASE_STRING")
print(database_name)
port = os.getenv("PORT")
@@ -19,4 +22,4 @@ if __name__ == "__main__":
agent = Agent.setup_agent(auto_gpt.task_handler, auto_gpt.step_handler)
agent.db = database
agent.workspace = workspace
agent.start(port=port)
agent.start(port=port, router=router)

View File

@@ -0,0 +1,28 @@
from pathlib import Path
from agbenchmark.app import get_artifact, get_skill_tree
from agent_protocol.agent import base_router
from fastapi import APIRouter
from fastapi import (
HTTPException as FastAPIHTTPException, # Import HTTPException from FastAPI
)
from fastapi.responses import FileResponse
def add_benchmark_routes():
new_router = APIRouter()
@new_router.get("/skill_tree")
async def get_skill_tree_endpoint() -> dict: # Renamed to avoid a clash with the function import
return get_skill_tree()
@new_router.get("/agent/challenges/{challenge_id}/artifacts/{artifact_id}")
async def get_artifact_endpoint(
challenge_id: str, artifact_id: str
) -> FileResponse: # Added return type annotation
return get_artifact(challenge_id, artifact_id)
# Include the new router in the base router
base_router.include_router(new_router)
return base_router

View File

@@ -5,11 +5,20 @@ IT IS NOT ADVISED TO USE THIS IN PRODUCTION!
"""
from typing import Dict, List, Optional
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Boolean, LargeBinary
from sqlalchemy.orm import relationship, sessionmaker, DeclarativeBase
from agent_protocol import Artifact, Step, Task, TaskDB
from agent_protocol.models import Status, TaskInput
from sqlalchemy import (
Boolean,
Column,
ForeignKey,
Integer,
LargeBinary,
String,
create_engine,
)
from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker
class Base(DeclarativeBase):
pass
@@ -20,7 +29,7 @@ class DataNotFoundError(Exception):
class TaskModel(Base):
__tablename__ = 'tasks'
__tablename__ = "tasks"
task_id = Column(Integer, primary_key=True, autoincrement=True)
input = Column(String)
@@ -31,10 +40,10 @@ class TaskModel(Base):
class StepModel(Base):
__tablename__ = 'steps'
__tablename__ = "steps"
step_id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(Integer, ForeignKey('tasks.task_id'))
task_id = Column(Integer, ForeignKey("tasks.task_id"))
name = Column(String)
status = Column(String)
is_last = Column(Boolean, default=False)
@@ -44,18 +53,19 @@ class StepModel(Base):
class ArtifactModel(Base):
__tablename__ = 'artifacts'
__tablename__ = "artifacts"
artifact_id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(Integer, ForeignKey('tasks.task_id'))
step_id = Column(Integer, ForeignKey('steps.step_id'))
task_id = Column(Integer, ForeignKey("tasks.task_id"))
step_id = Column(Integer, ForeignKey("steps.step_id"))
file_name = Column(String)
relative_path = Column(String)
file_data = Column(LargeBinary)
task = relationship("TaskModel", back_populates="artifacts")
#sqlite:///{database_name}
# sqlite:///{database_name}
class AgentDB(TaskDB):
def __init__(self, database_string) -> None:
super().__init__()
@@ -64,30 +74,59 @@ class AgentDB(TaskDB):
self.Session = sessionmaker(bind=self.engine)
print("Databases Created")
async def create_task(self, input: Optional[str], additional_input: Optional[TaskInput] = None,
artifacts: List[Artifact] = None, steps: List[Step] = None) -> Task:
async def create_task(
self,
input: Optional[str],
additional_input: Optional[TaskInput] = None,
artifacts: List[Artifact] = None,
steps: List[Step] = None,
) -> Task:
session = self.Session()
new_task = TaskModel(input=input, additional_input=additional_input.json() if additional_input else None)
new_task = TaskModel(
input=input,
additional_input=additional_input.json() if additional_input else None,
)
session.add(new_task)
session.commit()
session.refresh(new_task)
return await self.get_task(new_task.task_id)
async def create_step(self, task_id: str, name: Optional[str] = None, is_last: bool = False,
additional_properties: Optional[Dict[str, str]] = None) -> Step:
async def create_step(
self,
task_id: str,
name: Optional[str] = None,
is_last: bool = False,
additional_properties: Optional[Dict[str, str]] = None,
) -> Step:
session = self.Session()
new_step = StepModel(task_id=task_id, name=name, status="created", is_last=is_last,
additional_properties=additional_properties)
new_step = StepModel(
task_id=task_id,
name=name,
status="created",
is_last=is_last,
additional_properties=additional_properties,
)
session.add(new_step)
session.commit()
session.refresh(new_step)
return await self.get_step(task_id, new_step.step_id)
async def create_artifact(self, task_id: str, file_name: str, relative_path: Optional[str] = None,
step_id: Optional[str] = None, file_data: bytes | None = None) -> Artifact:
async def create_artifact(
self,
task_id: str,
file_name: str,
relative_path: Optional[str] = None,
step_id: Optional[str] = None,
file_data: bytes | None = None,
) -> Artifact:
session = self.Session()
new_artifact = ArtifactModel(task_id=task_id, step_id=step_id, file_name=file_name,
relative_path=relative_path, file_data=file_data)
new_artifact = ArtifactModel(
task_id=task_id,
step_id=step_id,
file_name=file_name,
relative_path=relative_path,
file_data=file_data,
)
session.add(new_artifact)
session.commit()
session.refresh(new_artifact)
@@ -98,11 +137,18 @@ class AgentDB(TaskDB):
session = self.Session()
task_obj = session.query(TaskModel).filter_by(task_id=task_id).first()
if task_obj:
task = Task(task_id=task_obj.task_id, input=task_obj.input, additional_input=task_obj.additional_input, steps=[])
task = Task(
task_id=task_obj.task_id,
input=task_obj.input,
additional_input=task_obj.additional_input,
steps=[],
)
steps_obj = session.query(StepModel).filter_by(task_id=task_id).all()
if steps_obj:
for step in steps_obj:
status = Status.created if step.status == "created" else Status.completed
status = (
Status.created if step.status == "created" else Status.completed
)
task.steps.append(
Step(
task_id=step.task_id,
@@ -117,7 +163,6 @@ class AgentDB(TaskDB):
else:
raise DataNotFoundError("Task not found")
async def get_step(self, task_id: int, step_id: int) -> Step:
session = self.Session()
if (
@@ -126,13 +171,24 @@ class AgentDB(TaskDB):
.first()
):
status = Status.completed if step.status == "completed" else Status.created
return Step(task_id=task_id, step_id=step_id, name=step.name, status=status, is_last=step.is_last == 1,
additional_properties=step.additional_properties)
return Step(
task_id=task_id,
step_id=step_id,
name=step.name,
status=status,
is_last=step.is_last == 1,
additional_properties=step.additional_properties,
)
else:
raise DataNotFoundError("Step not found")
async def update_step(self, task_id: str, step_id: str, status: str,
additional_properties: Optional[Dict[str, str]] = None) -> Step:
async def update_step(
self,
task_id: str,
step_id: str,
status: str,
additional_properties: Optional[Dict[str, str]] = None,
) -> Step:
session = self.Session()
if (
step := session.query(StepModel)
@@ -153,8 +209,11 @@ class AgentDB(TaskDB):
.filter_by(task_id=task_id, artifact_id=artifact_id)
.first()
):
return Artifact(artifact_id=artifact.artifact_id, file_name=artifact.file_name,
relative_path=artifact.relative_path)
return Artifact(
artifact_id=artifact.artifact_id,
file_name=artifact.file_name,
relative_path=artifact.relative_path,
)
else:
raise DataNotFoundError("Artifact not found")
@@ -172,9 +231,24 @@ class AgentDB(TaskDB):
async def list_tasks(self) -> List[Task]:
session = self.Session()
tasks = session.query(TaskModel).all()
return [Task(task_id=task.task_id, input=task.input, additional_input=task.additional_input) for task in tasks]
return [
Task(
task_id=task.task_id,
input=task.input,
additional_input=task.additional_input,
)
for task in tasks
]
async def list_steps(self, task_id: str) -> List[Step]:
session = self.Session()
steps = session.query(StepModel).filter_by(task_id=task_id).all()
return [Step(task_id=task_id, step_id=step.step_id, name=step.name, status=step.status) for step in steps]
return [
Step(
task_id=task_id,
step_id=step.step_id,
name=step.name,
status=step.status,
)
for step in steps
]

View File

@@ -27,7 +27,7 @@ def test_table_creation():
)
assert cursor.fetchone() is not None
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -39,7 +39,7 @@ async def test_create_task():
task = await agent_db.create_task("task_input")
assert task.input == "task_input"
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -49,7 +49,7 @@ async def test_create_and_get_task():
await agent_db.create_task("task_input")
task = await agent_db.get_task(1)
assert task.input == "task_input"
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -58,7 +58,7 @@ async def test_get_task_not_found():
agent_db = AgentDB(db_name)
with pytest.raises(DataNotFoundError):
await agent_db.get_task(9999)
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -69,7 +69,7 @@ async def test_create_and_get_step():
await agent_db.create_step(1, "step_name")
step = await agent_db.get_step(1, 1)
assert step.name == "step_name"
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -82,7 +82,7 @@ async def test_updating_step():
step = await agent_db.get_step(1, 1)
assert step.status.value == "completed"
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -91,7 +91,7 @@ async def test_get_step_not_found():
agent_db = AgentDB(db_name)
with pytest.raises(DataNotFoundError):
await agent_db.get_step(9999, 9999)
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -113,7 +113,7 @@ async def test_get_artifact():
assert fetched_artifact.artifact_id == artifact.artifact_id
assert fetched_artifact.file_name == "sample_file.txt"
assert fetched_artifact.relative_path == "/path/to/sample_file.txt"
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -137,7 +137,7 @@ async def test_get_artifact_file():
# Then: The fetched artifact matches the original
assert fetched_artifact == sample_data
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -156,7 +156,7 @@ async def test_list_tasks():
task_ids = [task.task_id for task in fetched_tasks]
assert task1.task_id in task_ids
assert task2.task_id in task_ids
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])
@pytest.mark.asyncio
@@ -176,4 +176,4 @@ async def test_list_steps():
step_ids = [step.step_id for step in fetched_steps]
assert step1.step_id in step_ids
assert step2.step_id in step_ids
os.remove(db_name.split('///')[1])
os.remove(db_name.split("///")[1])

View File

@@ -1,10 +1,12 @@
import json
import os
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import json
import os
from dash import Input, Output, dcc, html
# Sample data
# Function to load data based on the selected folder
@@ -14,45 +16,81 @@ def load_data(folder_name):
# List the available subfolders in the reports directory
available_folders = sorted([f for f in os.listdir("./agbenchmark/reports") if os.path.isdir(os.path.join("./agbenchmark/reports", f))])
available_folders = sorted(
[
f
for f in os.listdir("./agbenchmark/reports")
if os.path.isdir(os.path.join("./agbenchmark/reports", f))
]
)
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
def generate_table(data_frame):
return dbc.Table(
# Header
[html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))] +
[html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))]
+
# Body
[html.Tbody([
html.Tr([
html.Td(data_frame.iloc[i][col], style={'backgroundColor': '#77dd77' if data_frame.iloc[i]['Status'] == 'Passed' else '#ff6961'}) for col in data_frame.columns
]) for i in range(len(data_frame))
])]
[
html.Tbody(
[
html.Tr(
[
html.Td(
data_frame.iloc[i][col],
style={
"backgroundColor": "#77dd77"
if data_frame.iloc[i]["Status"] == "Passed"
else "#ff6961"
},
)
for col in data_frame.columns
]
)
for i in range(len(data_frame))
]
)
]
)
app.layout = dbc.Container([
dbc.Row([
dbc.Col(html.H1("AG Benchmark Tests Overview"), width={"size": 6, "offset": 3}),
]),
dbc.Row([
dbc.Col([
dcc.Dropdown(
id="folder-selector",
options=[{"label": folder_name, "value": folder_name} for folder_name in available_folders],
value=None,
placeholder="Select a folder to load data"
),
html.Div(id="folder-data-output")
])
]),
])
app.layout = dbc.Container(
[
dbc.Row(
[
dbc.Col(
html.H1("AG Benchmark Tests Overview"),
width={"size": 6, "offset": 3},
),
]
),
dbc.Row(
[
dbc.Col(
[
dcc.Dropdown(
id="folder-selector",
options=[
{"label": folder_name, "value": folder_name}
for folder_name in available_folders
],
value=None,
placeholder="Select a folder to load data",
),
html.Div(id="folder-data-output"),
]
)
]
),
]
)
@app.callback(
Output("folder-data-output", "children"),
[Input("folder-selector", "value")]
Output("folder-data-output", "children"), [Input("folder-selector", "value")]
)
def display_folder_data(selected_folder):
if not selected_folder:
@@ -61,66 +99,102 @@ def display_folder_data(selected_folder):
data = load_data(selected_folder)
# Extract the necessary data from the report
command = data['command']
benchmark_git_commit_sha = data['benchmark_git_commit_sha'] or "N/A"
benchmark_git_commit_sha = benchmark_git_commit_sha.split('/')[-1][:8] if benchmark_git_commit_sha != "N/A" else "N/A"
agent_git_commit_sha = data['agent_git_commit_sha'] or "N/A"
agent_git_commit_sha = agent_git_commit_sha.split('/')[-1][:8] if agent_git_commit_sha != "N/A" else "N/A"
completion_time = data['completion_time']
benchmark_start_time = data['benchmark_start_time']
run_time = data['metrics']['run_time']
highest_difficulty = data['metrics']['highest_difficulty']
command = data["command"]
benchmark_git_commit_sha = data["benchmark_git_commit_sha"] or "N/A"
benchmark_git_commit_sha = (
benchmark_git_commit_sha.split("/")[-1][:8]
if benchmark_git_commit_sha != "N/A"
else "N/A"
)
agent_git_commit_sha = data["agent_git_commit_sha"] or "N/A"
agent_git_commit_sha = (
agent_git_commit_sha.split("/")[-1][:8]
if agent_git_commit_sha != "N/A"
else "N/A"
)
completion_time = data["completion_time"]
benchmark_start_time = data["benchmark_start_time"]
run_time = data["metrics"]["run_time"]
highest_difficulty = data["metrics"]["highest_difficulty"]
return [
dbc.Row([
dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3),
dbc.Col(html.Div("Run Time: " + run_time), width=3),
dbc.Col(html.Div("Highest Difficulty Achieved: " + highest_difficulty), width=3),
dbc.Col(html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha), width=3),
], className="mb-3"),
dbc.Row([
dbc.Col(html.Div("Completion Time: " + completion_time), width=3),
dbc.Col(html.Div("Command: " + command), width=3),
dbc.Col(), # Empty column for alignment
dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3),
], className="mb-3"),
dbc.Row([
dbc.Col([
dcc.Graph(id="category-pass-rate"),
])
]),
dbc.Row([
dbc.Col([
generate_table(pd.DataFrame({
'Test Name': list(data['tests'].keys()),
'Status': ['Passed' if t['metrics'].get('success', False) else 'Failed' for t in data['tests'].values()]
}))
])
])
dbc.Row(
[
dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3),
dbc.Col(html.Div("Run Time: " + run_time), width=3),
dbc.Col(
html.Div("Highest Difficulty Achieved: " + highest_difficulty),
width=3,
),
dbc.Col(
html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha),
width=3,
),
],
className="mb-3",
),
dbc.Row(
[
dbc.Col(html.Div("Completion Time: " + completion_time), width=3),
dbc.Col(html.Div("Command: " + command), width=3),
dbc.Col(), # Empty column for alignment
dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3),
],
className="mb-3",
),
dbc.Row(
[
dbc.Col(
[
dcc.Graph(id="category-pass-rate"),
]
)
]
),
dbc.Row(
[
dbc.Col(
[
generate_table(
pd.DataFrame(
{
"Test Name": list(data["tests"].keys()),
"Status": [
"Passed"
if t["metrics"].get("success", False)
else "Failed"
for t in data["tests"].values()
],
}
)
)
]
)
]
),
]
@app.callback(
Output("subtest-output", "children"),
[Input("test-selector", "value")]
)
@app.callback(Output("subtest-output", "children"), [Input("test-selector", "value")])
def display_subtests(selected_test):
if not selected_test:
return "No test selected"
subtests = data['tests'][selected_test]['tests']
df = pd.DataFrame({
'Subtest Name': list(subtests.keys()),
'Status': ['Passed' if st['metrics']['success'] else 'Failed' for st in subtests.values()]
})
subtests = data["tests"][selected_test]["tests"]
df = pd.DataFrame(
{
"Subtest Name": list(subtests.keys()),
"Status": [
"Passed" if st["metrics"]["success"] else "Failed"
for st in subtests.values()
],
}
)
return generate_table(df)
@app.callback(
Output('category-pass-rate', 'figure'),
[Input('folder-selector', 'value')]
Output("category-pass-rate", "figure"), [Input("folder-selector", "value")]
)
def update_radar_chart(selected_folder):
if not selected_folder:
@@ -129,11 +203,11 @@ def update_radar_chart(selected_folder):
data = load_data(selected_folder)
# Extract all categories from the data
categories = set()
for test in data['tests'].keys():
if 'category' not in data['tests'][test]:
for test in data["tests"].keys():
if "category" not in data["tests"][test]:
print(f"Test {test} has no category")
continue
cat = data['tests'][test]['category']
cat = data["tests"][test]["category"]
categories.update(cat)
# Calculate pass rates for each category
@@ -141,31 +215,49 @@ def update_radar_chart(selected_folder):
for cat in categories:
total_tests = 0
passed_tests = 0
for test in data['tests'].keys():
if 'category' not in data['tests'][test] or cat not in data['tests'][test]['category']:
for test in data["tests"].keys():
if (
"category" not in data["tests"][test]
or cat not in data["tests"][test]["category"]
):
continue
total_tests = total_tests + 1 if cat in data['tests'][test]['category'] else total_tests
passed_tests = passed_tests + 1 if cat in data['tests'][test]['category'] and data['tests'][test]['metrics']['success'] else passed_tests
total_tests = (
total_tests + 1
if cat in data["tests"][test]["category"]
else total_tests
)
passed_tests = (
passed_tests + 1
if cat in data["tests"][test]["category"]
and data["tests"][test]["metrics"]["success"]
else passed_tests
)
pass_rate[cat] = (passed_tests / total_tests) * 100
df = pd.DataFrame({
'Category': list(pass_rate.keys()),
'Pass Rate (%)': list(pass_rate.values())
}).sort_values(by=['Category'], ascending=True)
fig = px.line_polar(df, r='Pass Rate (%)', theta='Category', line_close=True, template="plotly", title="Pass Rate by Category")
fig.update_traces(fill='toself')
df = pd.DataFrame(
{"Category": list(pass_rate.keys()), "Pass Rate (%)": list(pass_rate.values())}
).sort_values(by=["Category"], ascending=True)
fig = px.line_polar(
df,
r="Pass Rate (%)",
theta="Category",
line_close=True,
template="plotly",
title="Pass Rate by Category",
)
fig.update_traces(fill="toself")
# Set the radial axis maximum range to 100
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100] # Setting range from 0 to 100%
visible=True, range=[0, 100] # Setting range from 0 to 100%
)
)
)
return fig
if __name__ == "__main__":
app.run_server(debug=True)

13
mypy.ini Normal file
View File

@@ -0,0 +1,13 @@
[mypy]
namespace_packages = True
follow_imports = skip
check_untyped_defs = True
disallow_untyped_defs = True
exclude = ^(agbenchmark/challenges/|agent/|venv|venv-dev)
ignore_missing_imports = True
[mypy-agbenchmark.utils.data_types.*]
ignore_errors = True
[mypy-numpy.*]
ignore_errors = True

472
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -22,7 +22,7 @@ black = "^23.3.0"
pre-commit = "^3.3.3"
mypy = "^1.4.1"
flake8 = "^6.0.0"
agbenchmark = "^0.0.7"
agbenchmark = "^0.0.9"
types-requests = "^2.31.0.2"
pytest = "^7.4.0"
pytest-asyncio = "^0.21.1"
@@ -39,3 +39,21 @@ dash-bootstrap-components = "^1.4.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 88
target-version = ['py310']
include = '\.pyi?$'
packages = ["autogpt"]
extend-exclude = '(/dist|/.venv|/venv|/build|/agent|agbenchmark/challenges)/'
[tool.isort]
profile = "black"
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
line_length = 88
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
skip_glob = [".tox", "__pycache__", "*.pyc", "venv*/*", "reports", "venv", "env", "node_modules", ".env", ".venv", "dist", "agent/*", "agbenchmark/challenges/*"]