{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from pathlib import Path\n", "import json\n", "\n", "def get_last_file_in_directory(directory_path):\n", " # Get all files in the directory\n", " files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]\n", "\n", " # Sort the files by modification time\n", " files.sort(key=lambda x: os.path.getmtime(os.path.join(directory_path, x)))\n", "\n", " # Return the last file in the list\n", " return files[-1] if files else None\n", "\n", "def get_latest_files_in_subdirectories(directory_path):\n", " latest_files = []\n", " for subdir in os.scandir(directory_path):\n", " if subdir.is_dir():\n", " latest_file = get_last_file_in_directory(subdir.path)\n", " if latest_file is not None:\n", " latest_files.append((subdir.path, latest_file))\n", " return latest_files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from typing import Optional, Dict, List, Union\n", "from pydantic import BaseModel, Field\n", "\n", "class Metrics(BaseModel):\n", " difficulty: str\n", " success: bool\n", " success_percent: float = Field(..., alias=\"success_%\")\n", " run_time: Optional[str] = None\n", " fail_reason: Optional[str] = None\n", "\n", "class MetricsOverall(BaseModel):\n", " run_time: str\n", " highest_difficulty: str\n", " percentage: Optional[float] = None\n", "\n", "class Test(BaseModel):\n", " data_path: str\n", " is_regression: bool\n", " answer: str\n", " description: str\n", " metrics: Metrics\n", " category: List[str]\n", " task: Optional[str] = None\n", " reached_cutoff: Optional[bool] = None\n", "\n", "class SuiteTest(BaseModel):\n", " data_path: str\n", " metrics: MetricsOverall\n", " tests: Dict[str, Test]\n", " category: Optional[List[str]] = None\n", " task: Optional[str] = None\n", " reached_cutoff: Optional[bool] = None\n", "\n", "class Report(BaseModel):\n", " command: str\n", " completion_time: str\n", " benchmark_start_time: str\n", " metrics: MetricsOverall\n", " tests: Dict[str, Union[Test, SuiteTest]]\n", " config: Dict[str, str]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "reports_path = Path.cwd().parent / 'reports'\n", "\n", "latest_files = get_latest_files_in_subdirectories(reports_path)\n", "print(latest_files)\n", "\n", "reports_data = {}\n", "\n", "# This will print the latest file in each subdirectory and add to the files_data dictionary\n", "for subdir, file in latest_files:\n", " subdir_name = os.path.basename(os.path.normpath(subdir))\n", " print(f\"Subdirectory: {subdir}, Latest file: {file}\")\n", " if subdir_name not in [\"beebot\", \"mini-agi\"]:\n", " continue\n", " with open(Path(subdir) / file, 'r') as f:\n", " # Load the JSON data from the file\n", " json_data = json.load(f)\n", " converted_data = Report.parse_obj(json_data)\n", " # get the last directory name in the path as key\n", " reports_data[subdir_name] = converted_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "STRING_DIFFICULTY_MAP = {\n", " \"interface\": 1,\n", " \"basic\": 2,\n", " \"novice\": 3,\n", " \"intermediate\": 4,\n", " \"advanced\": 5,\n", " \"expert\": 6,\n", " \"human\": 7,\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from typing import Any\n", "\n", "def get_agent_category(report: Report) -> dict[str, Any]:\n", " categories: dict[str, Any] = {}\n", " \n", " def get_highest_category_difficulty(data) -> None:\n", " for category in data.category:\n", " if category == \"interface\":\n", " continue\n", " num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]\n", " if num_dif > categories.setdefault(category, 0):\n", " categories[category] = num_dif\n", "\n", " for _, test_data in report.tests.items():\n", " suite = False\n", "\n", " if isinstance(test_data, SuiteTest):\n", " for _, test_data in test_data.tests.items():\n", " get_highest_category_difficulty(test_data)\n", " else:\n", " get_highest_category_difficulty(test_data)\n", " \n", " return categories\n", "\n", "all_categories: dict[str, Any] = {}\n", " \n", "for name, report in reports_data.items():\n", " categories = get_agent_category(report)\n", " all_categories[name] = categories\n", " \n", "print(all_categories)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from matplotlib.colors import Normalize\n", "import matplotlib.patches as mpatches\n", "import matplotlib.ticker as ticker\n", "\n", "\n", "def save_combined_radar_chart(categories):\n", "\n", " labels=np.array(list(next(iter(categories.values())).keys())) # We use the first category to get the keys\n", " num_vars = len(labels)\n", " angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n", " angles += angles[:1] # Add the first angle to the end of the list to ensure the polygon is closed\n", "\n", " # Create radar chart\n", " fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n", " ax.set_theta_offset(np.pi / 2)\n", " ax.set_theta_direction(-1)\n", " ax.spines['polar'].set_visible(False) # Remove border\n", "\n", " # Define a custom normalization to start the color from the middle\n", " norm = Normalize(vmin=0, vmax=max([max(val.values()) for val in categories.values()])) # We use the maximum of all categories for normalization\n", "\n", " colors = ['#40c463', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # Define more colors for more categories\n", "\n", " for i, (cat_name, cat_values) in enumerate(categories.items()): # Iterating through each category (series)\n", " values=np.array(list(cat_values.values()))\n", " values = np.concatenate((values, values[:1])) # Ensure the polygon is closed\n", "\n", " ax.fill(angles, values, color=colors[i], alpha=0.25) # Draw the filled polygon\n", " ax.plot(angles, values, color=colors[i], linewidth=2) # Draw polygon\n", " ax.plot(angles, values, 'o', color='white', markersize=7, markeredgecolor=colors[i], markeredgewidth=2) # Draw points\n", "\n", " # Draw legend\n", " ax.legend(handles=[mpatches.Patch(color=color, label=cat_name, alpha=0.25) for cat_name, color in zip(categories.keys(), colors)])\n", "\n", " lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(next(iter(categories.values())).keys()))) # We use the first category to get the keys\n", "\n", " # Move labels away from the plot\n", " for label in labels:\n", " label.set_position((label.get_position()[0], label.get_position()[1] + -0.05)) # adjust 0.1 as needed\n", "\n", " ax.set_rlabel_position(180) # Move radial labels away from the plot\n", " \n", " ax.set_yticks([]) # Remove default yticks\n", "\n", " # Manually create gridlines\n", " for y in np.arange(0, norm.vmax + 1, 1):\n", " if y != norm.vmax:\n", " ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n", " # Add labels for manually created gridlines\n", " ax.text(angles[0], y + 0.2, str(int(y)), color='black', size=9, horizontalalignment='center', verticalalignment='center')\n", "\n", " plt.show()\n", "\n", "# Here is how you can use the function\n", "categories = {\n", " 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n", " 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n", "}\n", "save_combined_radar_chart(categories)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import matplotlib.patches as mpatches\n", "\n", "def save_single_radar_chart(category_dict: dict[str, int], save_path: str | Path, name: str) -> None:\n", " labels=np.array(list(category_dict.keys()))\n", " values=np.array(list(category_dict.values()))\n", "\n", " num_vars = len(labels)\n", "\n", " angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n", "\n", " angles += angles[:1]\n", " values = np.concatenate((values, values[:1]))\n", "\n", " colors = ['#40c463']\n", "\n", " fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n", " ax.set_theta_offset(np.pi / 2)\n", " ax.set_theta_direction(-1)\n", "\n", " ax.spines['polar'].set_visible(False)\n", "\n", " lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(category_dict.keys())))\n", "\n", " for label in labels:\n", " label.set_position((label.get_position()[0], label.get_position()[1] + -0.05))\n", "\n", " ax.fill(angles, values, color=colors[0], alpha=0.25)\n", " ax.plot(angles, values, color=colors[0], linewidth=2)\n", "\n", " for i, (angle, value) in enumerate(zip(angles, values)):\n", " ha = 'left'\n", " if angle in {0, np.pi}:\n", " ha = 'center'\n", " elif np.pi < angle < 2*np.pi:\n", " ha = 'right'\n", " ax.text(angle, value - 0.5, f'{value}', size=10, horizontalalignment=ha, verticalalignment=\"center\", color='black')\n", "\n", " ax.set_yticklabels([])\n", "\n", " ax.set_yticks([])\n", "\n", " for y in np.arange(0, values.max(), 1):\n", " ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n", "\n", " for angle, value in zip(angles, values):\n", " ax.plot(angle, value, 'o', color='white', markersize=7, markeredgecolor=colors[0], markeredgewidth=2)\n", "\n", " green_patch = mpatches.Patch(color='#40c463', label='Mini-AGI', alpha=0.25)\n", " plt.legend(handles=[green_patch])\n", " \n", " plt.savefig(f\"{save_path}/{name}.png\", dpi=300) # Save the figure as a PNG file\n", " plt.close() # Close the figure to free up memory\n", "\n", "# Here's how you can use this function:\n", "categories = {'content_gen': 2, 'safety': 4, 'memory': 5, 'code': 5, 'iterate': 5, 'retrieval': 4, 'adaptability': 4}\n", "save_single_radar_chart(categories, Path.cwd(), \"test\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "# Define data\n", "categories = {\n", " 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n", " 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2},\n", "}\n", "\n", "# Convert dictionary to DataFrame\n", "df = pd.DataFrame(categories)\n", "\n", "# Create heatmap\n", "plt.figure(figsize=(8, 6))\n", "sns.heatmap(df, annot=True, cmap=\"YlGnBu\", fmt=\"d\", linewidths=.5)\n", "plt.title('Heatmap of Categories')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "categories = {\n", " 'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n", " 'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n", " # include other agents here...\n", "}\n", "\n", "# Convert dictionary to DataFrame\n", "df = pd.DataFrame(categories)\n", "\n", "# Create a grouped bar chart\n", "df.plot(kind='bar', figsize=(10, 7))\n", "\n", "plt.title('Performance by Category for Each Agent')\n", "plt.xlabel('Category')\n", "plt.ylabel('Performance')\n", "plt.show()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }