import json import os import dash import dash_bootstrap_components as dbc import pandas as pd import plotly.express as px from dash import Input, Output, dcc, html # Sample data # Function to load data based on the selected folder def load_data(folder_name): with open(f"./agbenchmark/reports/{folder_name}/report.json", "r") as f: return json.load(f) # List the available subfolders in the reports directory available_folders = sorted( [ f for f in os.listdir("./agbenchmark/reports") if os.path.isdir(os.path.join("./agbenchmark/reports", f)) ] ) app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) def generate_table(data_frame): return dbc.Table( # Header [html.Thead(html.Tr([html.Th(col) for col in data_frame.columns]))] + # Body [ html.Tbody( [ html.Tr( [ html.Td( data_frame.iloc[i][col], style={ "backgroundColor": "#77dd77" if data_frame.iloc[i]["Status"] == "Passed" else "#ff6961" }, ) for col in data_frame.columns ] ) for i in range(len(data_frame)) ] ) ] ) app.layout = dbc.Container( [ dbc.Row( [ dbc.Col( html.H1("AG Benchmark Tests Overview"), width={"size": 6, "offset": 3}, ), ] ), dbc.Row( [ dbc.Col( [ dcc.Dropdown( id="folder-selector", options=[ {"label": folder_name, "value": folder_name} for folder_name in available_folders ], value=None, placeholder="Select a folder to load data", ), html.Div(id="folder-data-output"), ] ) ] ), ] ) @app.callback( Output("folder-data-output", "children"), [Input("folder-selector", "value")] ) def display_folder_data(selected_folder): if not selected_folder: return "No folder selected" data = load_data(selected_folder) # Extract the necessary data from the report command = data["command"] benchmark_git_commit_sha = data["benchmark_git_commit_sha"] or "N/A" benchmark_git_commit_sha = ( benchmark_git_commit_sha.split("/")[-1][:8] if benchmark_git_commit_sha != "N/A" else "N/A" ) agent_git_commit_sha = data["agent_git_commit_sha"] or "N/A" agent_git_commit_sha = ( agent_git_commit_sha.split("/")[-1][:8] if agent_git_commit_sha != "N/A" else "N/A" ) completion_time = data["completion_time"] benchmark_start_time = data["benchmark_start_time"] run_time = data["metrics"]["run_time"] highest_difficulty = data["metrics"]["highest_difficulty"] return [ dbc.Row( [ dbc.Col(html.Div("Start Time: " + benchmark_start_time), width=3), dbc.Col(html.Div("Run Time: " + run_time), width=3), dbc.Col( html.Div("Highest Difficulty Achieved: " + highest_difficulty), width=3, ), dbc.Col( html.Div("Benchmark Git Commit: " + benchmark_git_commit_sha), width=3, ), ], className="mb-3", ), dbc.Row( [ dbc.Col(html.Div("Completion Time: " + completion_time), width=3), dbc.Col(html.Div("Command: " + command), width=3), dbc.Col(), # Empty column for alignment dbc.Col(html.Div("Agent Git Commit: " + agent_git_commit_sha), width=3), ], className="mb-3", ), dbc.Row( [ dbc.Col( [ dcc.Graph(id="category-pass-rate"), ] ) ] ), dbc.Row( [ dbc.Col( [ generate_table( pd.DataFrame( { "Test Name": list(data["tests"].keys()), "Status": [ "Passed" if t["metrics"].get("success", False) else "Failed" for t in data["tests"].values() ], } ) ) ] ) ] ), ] @app.callback(Output("subtest-output", "children"), [Input("test-selector", "value")]) def display_subtests(selected_test): if not selected_test: return "No test selected" subtests = data["tests"][selected_test]["tests"] df = pd.DataFrame( { "Subtest Name": list(subtests.keys()), "Status": [ "Passed" if st["metrics"]["success"] else "Failed" for st in subtests.values() ], } ) return generate_table(df) @app.callback( Output("category-pass-rate", "figure"), [Input("folder-selector", "value")] ) def update_radar_chart(selected_folder): if not selected_folder: return "No folder selected" data = load_data(selected_folder) # Extract all categories from the data categories = set() for test in data["tests"].keys(): if "category" not in data["tests"][test]: print(f"Test {test} has no category") continue cat = data["tests"][test]["category"] categories.update(cat) # Calculate pass rates for each category pass_rate = {} for cat in categories: total_tests = 0 passed_tests = 0 for test in data["tests"].keys(): if ( "category" not in data["tests"][test] or cat not in data["tests"][test]["category"] ): continue total_tests = ( total_tests + 1 if cat in data["tests"][test]["category"] else total_tests ) passed_tests = ( passed_tests + 1 if cat in data["tests"][test]["category"] and data["tests"][test]["metrics"]["success"] else passed_tests ) pass_rate[cat] = (passed_tests / total_tests) * 100 df = pd.DataFrame( {"Category": list(pass_rate.keys()), "Pass Rate (%)": list(pass_rate.values())} ).sort_values(by=["Category"], ascending=True) fig = px.line_polar( df, r="Pass Rate (%)", theta="Category", line_close=True, template="plotly", title="Pass Rate by Category", ) fig.update_traces(fill="toself") # Set the radial axis maximum range to 100 fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100] # Setting range from 0 to 100% ) ) ) return fig if __name__ == "__main__": app.run_server(debug=True)