Files
Auto-GPT/benchmark/notebooks/Visualization.ipynb
2023-09-04 16:35:41 +02:00

376 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"import json\n",
"\n",
"def get_last_file_in_directory(directory_path):\n",
" # Get all files in the directory\n",
" files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]\n",
"\n",
" # Sort the files by modification time\n",
" files.sort(key=lambda x: os.path.getmtime(os.path.join(directory_path, x)))\n",
"\n",
" # Return the last file in the list\n",
" return files[-1] if files else None\n",
"\n",
"def get_latest_files_in_subdirectories(directory_path):\n",
" latest_files = []\n",
" for subdir in os.scandir(directory_path):\n",
" if subdir.is_dir():\n",
" latest_file = get_last_file_in_directory(subdir.path)\n",
" if latest_file is not None:\n",
" latest_files.append((subdir.path, latest_file))\n",
" return latest_files"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Optional, Dict, List, Union\n",
"from pydantic import BaseModel, Field\n",
"\n",
"class Metrics(BaseModel):\n",
" difficulty: str\n",
" success: bool\n",
" success_percent: float = Field(..., alias=\"success_%\")\n",
" run_time: Optional[str] = None\n",
" fail_reason: Optional[str] = None\n",
"\n",
"class MetricsOverall(BaseModel):\n",
" run_time: str\n",
" highest_difficulty: str\n",
" percentage: Optional[float] = None\n",
"\n",
"class Test(BaseModel):\n",
" data_path: str\n",
" is_regression: bool\n",
" answer: str\n",
" description: str\n",
" metrics: Metrics\n",
" category: List[str]\n",
" task: Optional[str] = None\n",
" reached_cutoff: Optional[bool] = None\n",
"\n",
"class SuiteTest(BaseModel):\n",
" data_path: str\n",
" metrics: MetricsOverall\n",
" tests: Dict[str, Test]\n",
" category: Optional[List[str]] = None\n",
" task: Optional[str] = None\n",
" reached_cutoff: Optional[bool] = None\n",
"\n",
"class Report(BaseModel):\n",
" command: str\n",
" completion_time: str\n",
" benchmark_start_time: str\n",
" metrics: MetricsOverall\n",
" tests: Dict[str, Union[Test, SuiteTest]]\n",
" config: Dict[str, str]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reports_path = Path.cwd().parent / 'reports'\n",
"\n",
"latest_files = get_latest_files_in_subdirectories(reports_path)\n",
"print(latest_files)\n",
"\n",
"reports_data = {}\n",
"\n",
"# This will print the latest file in each subdirectory and add to the files_data dictionary\n",
"for subdir, file in latest_files:\n",
" subdir_name = os.path.basename(os.path.normpath(subdir))\n",
" print(f\"Subdirectory: {subdir}, Latest file: {file}\")\n",
" if subdir_name not in [\"beebot\", \"mini-agi\"]:\n",
" continue\n",
" with open(Path(subdir) / file, 'r') as f:\n",
" # Load the JSON data from the file\n",
" json_data = json.load(f)\n",
" converted_data = Report.parse_obj(json_data)\n",
" # get the last directory name in the path as key\n",
" reports_data[subdir_name] = converted_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"STRING_DIFFICULTY_MAP = {\n",
" \"interface\": 1,\n",
" \"basic\": 2,\n",
" \"novice\": 3,\n",
" \"intermediate\": 4,\n",
" \"advanced\": 5,\n",
" \"expert\": 6,\n",
" \"human\": 7,\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Any\n",
"\n",
"def get_agent_category(report: Report) -> dict[str, Any]:\n",
" categories: dict[str, Any] = {}\n",
" \n",
" def get_highest_category_difficulty(data) -> None:\n",
" for category in data.category:\n",
" if category == \"interface\":\n",
" continue\n",
" num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]\n",
" if num_dif > categories.setdefault(category, 0):\n",
" categories[category] = num_dif\n",
"\n",
" for _, test_data in report.tests.items():\n",
" suite = False\n",
"\n",
" if isinstance(test_data, SuiteTest):\n",
" for _, test_data in test_data.tests.items():\n",
" get_highest_category_difficulty(test_data)\n",
" else:\n",
" get_highest_category_difficulty(test_data)\n",
" \n",
" return categories\n",
"\n",
"all_categories: dict[str, Any] = {}\n",
" \n",
"for name, report in reports_data.items():\n",
" categories = get_agent_category(report)\n",
" all_categories[name] = categories\n",
" \n",
"print(all_categories)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from matplotlib.colors import Normalize\n",
"import matplotlib.patches as mpatches\n",
"import matplotlib.ticker as ticker\n",
"\n",
"\n",
"def save_combined_radar_chart(categories):\n",
"\n",
" labels=np.array(list(next(iter(categories.values())).keys())) # We use the first category to get the keys\n",
" num_vars = len(labels)\n",
" angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
" angles += angles[:1] # Add the first angle to the end of the list to ensure the polygon is closed\n",
"\n",
" # Create radar chart\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" ax.set_theta_offset(np.pi / 2)\n",
" ax.set_theta_direction(-1)\n",
" ax.spines['polar'].set_visible(False) # Remove border\n",
"\n",
" # Define a custom normalization to start the color from the middle\n",
" norm = Normalize(vmin=0, vmax=max([max(val.values()) for val in categories.values()])) # We use the maximum of all categories for normalization\n",
"\n",
" colors = ['#40c463', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # Define more colors for more categories\n",
"\n",
" for i, (cat_name, cat_values) in enumerate(categories.items()): # Iterating through each category (series)\n",
" values=np.array(list(cat_values.values()))\n",
" values = np.concatenate((values, values[:1])) # Ensure the polygon is closed\n",
"\n",
" ax.fill(angles, values, color=colors[i], alpha=0.25) # Draw the filled polygon\n",
" ax.plot(angles, values, color=colors[i], linewidth=2) # Draw polygon\n",
" ax.plot(angles, values, 'o', color='white', markersize=7, markeredgecolor=colors[i], markeredgewidth=2) # Draw points\n",
"\n",
" # Draw legend\n",
" ax.legend(handles=[mpatches.Patch(color=color, label=cat_name, alpha=0.25) for cat_name, color in zip(categories.keys(), colors)])\n",
"\n",
" lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(next(iter(categories.values())).keys()))) # We use the first category to get the keys\n",
"\n",
" # Move labels away from the plot\n",
" for label in labels:\n",
" label.set_position((label.get_position()[0], label.get_position()[1] + -0.05)) # adjust 0.1 as needed\n",
"\n",
" ax.set_rlabel_position(180) # Move radial labels away from the plot\n",
" \n",
" ax.set_yticks([]) # Remove default yticks\n",
"\n",
" # Manually create gridlines\n",
" for y in np.arange(0, norm.vmax + 1, 1):\n",
" if y != norm.vmax:\n",
" ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n",
" # Add labels for manually created gridlines\n",
" ax.text(angles[0], y + 0.2, str(int(y)), color='black', size=9, horizontalalignment='center', verticalalignment='center')\n",
"\n",
" plt.show()\n",
"\n",
"# Here is how you can use the function\n",
"categories = {\n",
" 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
" 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n",
"}\n",
"save_combined_radar_chart(categories)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import matplotlib.patches as mpatches\n",
"\n",
"def save_single_radar_chart(category_dict: dict[str, int], save_path: str | Path, name: str) -> None:\n",
" labels=np.array(list(category_dict.keys()))\n",
" values=np.array(list(category_dict.values()))\n",
"\n",
" num_vars = len(labels)\n",
"\n",
" angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
"\n",
" angles += angles[:1]\n",
" values = np.concatenate((values, values[:1]))\n",
"\n",
" colors = ['#40c463']\n",
"\n",
" fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
" ax.set_theta_offset(np.pi / 2)\n",
" ax.set_theta_direction(-1)\n",
"\n",
" ax.spines['polar'].set_visible(False)\n",
"\n",
" lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(category_dict.keys())))\n",
"\n",
" for label in labels:\n",
" label.set_position((label.get_position()[0], label.get_position()[1] + -0.05))\n",
"\n",
" ax.fill(angles, values, color=colors[0], alpha=0.25)\n",
" ax.plot(angles, values, color=colors[0], linewidth=2)\n",
"\n",
" for i, (angle, value) in enumerate(zip(angles, values)):\n",
" ha = 'left'\n",
" if angle in {0, np.pi}:\n",
" ha = 'center'\n",
" elif np.pi < angle < 2*np.pi:\n",
" ha = 'right'\n",
" ax.text(angle, value - 0.5, f'{value}', size=10, horizontalalignment=ha, verticalalignment=\"center\", color='black')\n",
"\n",
" ax.set_yticklabels([])\n",
"\n",
" ax.set_yticks([])\n",
"\n",
" for y in np.arange(0, values.max(), 1):\n",
" ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n",
"\n",
" for angle, value in zip(angles, values):\n",
" ax.plot(angle, value, 'o', color='white', markersize=7, markeredgecolor=colors[0], markeredgewidth=2)\n",
"\n",
" green_patch = mpatches.Patch(color='#40c463', label='Mini-AGI', alpha=0.25)\n",
" plt.legend(handles=[green_patch])\n",
" \n",
" plt.savefig(f\"{save_path}/{name}.png\", dpi=300) # Save the figure as a PNG file\n",
" plt.close() # Close the figure to free up memory\n",
"\n",
"# Here's how you can use this function:\n",
"categories = {'content_gen': 2, 'safety': 4, 'memory': 5, 'code': 5, 'iterate': 5, 'retrieval': 4, 'adaptability': 4}\n",
"save_single_radar_chart(categories, Path.cwd(), \"test\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Define data\n",
"categories = {\n",
" 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
" 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2},\n",
"}\n",
"\n",
"# Convert dictionary to DataFrame\n",
"df = pd.DataFrame(categories)\n",
"\n",
"# Create heatmap\n",
"plt.figure(figsize=(8, 6))\n",
"sns.heatmap(df, annot=True, cmap=\"YlGnBu\", fmt=\"d\", linewidths=.5)\n",
"plt.title('Heatmap of Categories')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"categories = {\n",
" 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
" 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n",
" # include other agents here...\n",
"}\n",
"\n",
"# Convert dictionary to DataFrame\n",
"df = pd.DataFrame(categories)\n",
"\n",
"# Create a grouped bar chart\n",
"df.plot(kind='bar', figsize=(10, 7))\n",
"\n",
"plt.title('Performance by Category for Each Agent')\n",
"plt.xlabel('Category')\n",
"plt.ylabel('Performance')\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}