mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-27 19:04:25 +01:00
264 lines
7.7 KiB
Python
264 lines
7.7 KiB
Python
import json
|
|
import os
|
|
|
|
import dash
|
|
import dash_bootstrap_components as dbc
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
from dash import Input, Output, dcc, html
|
|
|
|
|
|
# Sample data
|
|
# Function to load data based on the selected folder
|
|
def load_data(folder_name):
|
|
with open(f"./agbenchmark/reports/{folder_name}/report.json", "r") as f:
|
|
return json.load(f)
|
|
|
|
|
|
# List the available subfolders in the reports directory
|
|
available_folders = sorted(
|
|
[
|
|
f
|
|
for f in os.listdir("./agbenchmark/reports")
|
|
if os.path.isdir(os.path.join("./agbenchmark/reports", f))
|
|
]
|
|
)
|
|
|
|
|
|
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
|
|
|
|
|
|
def generate_table(data_frame):
|
|
return dbc.Table(
|
|
# Header
|
|
[html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))]
|
|
+
|
|
# Body
|
|
[
|
|
html.Tbody(
|
|
[
|
|
html.Tr(
|
|
[
|
|
html.Td(
|
|
data_frame.iloc[i][col],
|
|
style={
|
|
"backgroundColor": "#77dd77"
|
|
if data_frame.iloc[i]["Status"] == "Passed"
|
|
else "#ff6961"
|
|
},
|
|
)
|
|
for col in data_frame.columns
|
|
]
|
|
)
|
|
for i in range(len(data_frame))
|
|
]
|
|
)
|
|
]
|
|
)
|
|
|
|
|
|
app.layout = dbc.Container(
|
|
[
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(
|
|
html.H1("AG Benchmark Tests Overview"),
|
|
width={"size": 6, "offset": 3},
|
|
),
|
|
]
|
|
),
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(
|
|
[
|
|
dcc.Dropdown(
|
|
id="folder-selector",
|
|
options=[
|
|
{"label": folder_name, "value": folder_name}
|
|
for folder_name in available_folders
|
|
],
|
|
value=None,
|
|
placeholder="Select a folder to load data",
|
|
),
|
|
html.Div(id="folder-data-output"),
|
|
]
|
|
)
|
|
]
|
|
),
|
|
]
|
|
)
|
|
|
|
|
|
@app.callback(
|
|
Output("folder-data-output", "children"), [Input("folder-selector", "value")]
|
|
)
|
|
def display_folder_data(selected_folder):
|
|
if not selected_folder:
|
|
return "No folder selected"
|
|
|
|
data = load_data(selected_folder)
|
|
|
|
# Extract the necessary data from the report
|
|
command = data["command"]
|
|
benchmark_git_commit_sha = data["benchmark_git_commit_sha"] or "N/A"
|
|
benchmark_git_commit_sha = (
|
|
benchmark_git_commit_sha.split("/")[-1][:8]
|
|
if benchmark_git_commit_sha != "N/A"
|
|
else "N/A"
|
|
)
|
|
agent_git_commit_sha = data["agent_git_commit_sha"] or "N/A"
|
|
agent_git_commit_sha = (
|
|
agent_git_commit_sha.split("/")[-1][:8]
|
|
if agent_git_commit_sha != "N/A"
|
|
else "N/A"
|
|
)
|
|
completion_time = data["completion_time"]
|
|
benchmark_start_time = data["benchmark_start_time"]
|
|
run_time = data["metrics"]["run_time"]
|
|
highest_difficulty = data["metrics"]["highest_difficulty"]
|
|
|
|
return [
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3),
|
|
dbc.Col(html.Div("Run Time: " + run_time), width=3),
|
|
dbc.Col(
|
|
html.Div("Highest Difficulty Achieved: " + highest_difficulty),
|
|
width=3,
|
|
),
|
|
dbc.Col(
|
|
html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha),
|
|
width=3,
|
|
),
|
|
],
|
|
className="mb-3",
|
|
),
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(html.Div("Completion Time: " + completion_time), width=3),
|
|
dbc.Col(html.Div("Command: " + command), width=3),
|
|
dbc.Col(), # Empty column for alignment
|
|
dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3),
|
|
],
|
|
className="mb-3",
|
|
),
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(
|
|
[
|
|
dcc.Graph(id="category-pass-rate"),
|
|
]
|
|
)
|
|
]
|
|
),
|
|
dbc.Row(
|
|
[
|
|
dbc.Col(
|
|
[
|
|
generate_table(
|
|
pd.DataFrame(
|
|
{
|
|
"Test Name": list(data["tests"].keys()),
|
|
"Status": [
|
|
"Passed"
|
|
if t["metrics"].get("success", False)
|
|
else "Failed"
|
|
for t in data["tests"].values()
|
|
],
|
|
}
|
|
)
|
|
)
|
|
]
|
|
)
|
|
]
|
|
),
|
|
]
|
|
|
|
|
|
@app.callback(Output("subtest-output", "children"), [Input("test-selector", "value")])
|
|
def display_subtests(selected_test):
|
|
if not selected_test:
|
|
return "No test selected"
|
|
|
|
subtests = data["tests"][selected_test]["tests"]
|
|
df = pd.DataFrame(
|
|
{
|
|
"Subtest Name": list(subtests.keys()),
|
|
"Status": [
|
|
"Passed" if st["metrics"]["success"] else "Failed"
|
|
for st in subtests.values()
|
|
],
|
|
}
|
|
)
|
|
return generate_table(df)
|
|
|
|
|
|
@app.callback(
|
|
Output("category-pass-rate", "figure"), [Input("folder-selector", "value")]
|
|
)
|
|
def update_radar_chart(selected_folder):
|
|
if not selected_folder:
|
|
return "No folder selected"
|
|
|
|
data = load_data(selected_folder)
|
|
# Extract all categories from the data
|
|
categories = set()
|
|
for test in data["tests"].keys():
|
|
if "category" not in data["tests"][test]:
|
|
print(f"Test {test} has no category")
|
|
continue
|
|
cat = data["tests"][test]["category"]
|
|
categories.update(cat)
|
|
|
|
# Calculate pass rates for each category
|
|
pass_rate = {}
|
|
for cat in categories:
|
|
total_tests = 0
|
|
passed_tests = 0
|
|
for test in data["tests"].keys():
|
|
if (
|
|
"category" not in data["tests"][test]
|
|
or cat not in data["tests"][test]["category"]
|
|
):
|
|
continue
|
|
total_tests = (
|
|
total_tests + 1
|
|
if cat in data["tests"][test]["category"]
|
|
else total_tests
|
|
)
|
|
passed_tests = (
|
|
passed_tests + 1
|
|
if cat in data["tests"][test]["category"]
|
|
and data["tests"][test]["metrics"]["success"]
|
|
else passed_tests
|
|
)
|
|
pass_rate[cat] = (passed_tests / total_tests) * 100
|
|
|
|
df = pd.DataFrame(
|
|
{"Category": list(pass_rate.keys()), "Pass Rate (%)": list(pass_rate.values())}
|
|
).sort_values(by=["Category"], ascending=True)
|
|
|
|
fig = px.line_polar(
|
|
df,
|
|
r="Pass Rate (%)",
|
|
theta="Category",
|
|
line_close=True,
|
|
template="plotly",
|
|
title="Pass Rate by Category",
|
|
)
|
|
fig.update_traces(fill="toself")
|
|
|
|
# Set the radial axis maximum range to 100
|
|
fig.update_layout(
|
|
polar=dict(
|
|
radialaxis=dict(
|
|
visible=True, range=[0, 100] # Setting range from 0 to 100%
|
|
)
|
|
)
|
|
)
|
|
return fig
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app.run_server(debug=True)
|