Auto-GPT/benchmark/notebooks/Visualization.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from pathlib import Path\n",
    "import json\n",
    "\n",
    "def get_last_file_in_directory(directory_path):\n",
    "    # Get all files in the directory\n",
    "    files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]\n",
    "\n",
    "    # Sort the files by modification time\n",
    "    files.sort(key=lambda x: os.path.getmtime(os.path.join(directory_path, x)))\n",
    "\n",
    "    # Return the last file in the list\n",
    "    return files[-1] if files else None\n",
    "\n",
    "def get_latest_files_in_subdirectories(directory_path):\n",
    "    latest_files = []\n",
    "    for subdir in os.scandir(directory_path):\n",
    "        if subdir.is_dir():\n",
    "            latest_file = get_last_file_in_directory(subdir.path)\n",
    "            if latest_file is not None:\n",
    "                latest_files.append((subdir.path, latest_file))\n",
    "    return latest_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Optional, Dict, List, Union\n",
    "from pydantic import BaseModel, Field\n",
    "\n",
    "class Metrics(BaseModel):\n",
    "    difficulty: str\n",
    "    success: bool\n",
    "    success_percent: float = Field(..., alias=\"success_%\")\n",
    "    run_time: Optional[str] = None\n",
    "    fail_reason: Optional[str] = None\n",
    "\n",
    "class MetricsOverall(BaseModel):\n",
    "    run_time: str\n",
    "    highest_difficulty: str\n",
    "    percentage: Optional[float] = None\n",
    "\n",
    "class Test(BaseModel):\n",
    "    data_path: str\n",
    "    is_regression: bool\n",
    "    answer: str\n",
    "    description: str\n",
    "    metrics: Metrics\n",
    "    category: List[str]\n",
    "    task: Optional[str] = None\n",
    "    reached_cutoff: Optional[bool] = None\n",
    "\n",
    "class SuiteTest(BaseModel):\n",
    "    data_path: str\n",
    "    metrics: MetricsOverall\n",
    "    tests: Dict[str, Test]\n",
    "    category: Optional[List[str]] = None\n",
    "    task: Optional[str] = None\n",
    "    reached_cutoff: Optional[bool] = None\n",
    "\n",
    "class Report(BaseModel):\n",
    "    command: str\n",
    "    completion_time: str\n",
    "    benchmark_start_time: str\n",
    "    metrics: MetricsOverall\n",
    "    tests: Dict[str, Union[Test, SuiteTest]]\n",
    "    config: Dict[str, str]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reports_path = Path.cwd().parent / 'reports'\n",
    "\n",
    "latest_files = get_latest_files_in_subdirectories(reports_path)\n",
    "print(latest_files)\n",
    "\n",
    "reports_data = {}\n",
    "\n",
    "# This will print the latest file in each subdirectory and add to the files_data dictionary\n",
    "for subdir, file in latest_files:\n",
    "    subdir_name = os.path.basename(os.path.normpath(subdir))\n",
    "    print(f\"Subdirectory: {subdir}, Latest file: {file}\")\n",
    "    if subdir_name not in [\"beebot\", \"mini-agi\"]:\n",
    "        continue\n",
    "    with open(Path(subdir) / file, 'r') as f:\n",
    "        # Load the JSON data from the file\n",
    "        json_data = json.load(f)\n",
    "        converted_data = Report.parse_obj(json_data)\n",
    "        # get the last directory name in the path as key\n",
    "        reports_data[subdir_name] = converted_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "STRING_DIFFICULTY_MAP = {\n",
    "    \"interface\": 1,\n",
    "    \"basic\": 2,\n",
    "    \"novice\": 3,\n",
    "    \"intermediate\": 4,\n",
    "    \"advanced\": 5,\n",
    "    \"expert\": 6,\n",
    "    \"human\": 7,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Any\n",
    "\n",
    "def get_agent_category(report: Report) -> dict[str, Any]:\n",
    "    categories: dict[str, Any] = {}\n",
    "    \n",
    "    def get_highest_category_difficulty(data) -> None:\n",
    "        for category in data.category:\n",
    "            if category == \"interface\":\n",
    "                continue\n",
    "            num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty]\n",
    "            if num_dif > categories.setdefault(category, 0):\n",
    "                categories[category] = num_dif\n",
    "\n",
    "    for _, test_data in report.tests.items():\n",
    "        suite = False\n",
    "\n",
    "        if isinstance(test_data, SuiteTest):\n",
    "            for _, test_data in test_data.tests.items():\n",
    "                get_highest_category_difficulty(test_data)\n",
    "        else:\n",
    "            get_highest_category_difficulty(test_data)\n",
    "            \n",
    "    return categories\n",
    "\n",
    "all_categories: dict[str, Any] = {}\n",
    " \n",
    "for name, report in reports_data.items():\n",
    "    categories = get_agent_category(report)\n",
    "    all_categories[name] = categories\n",
    "                        \n",
    "print(all_categories)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "from matplotlib.colors import Normalize\n",
    "import matplotlib.patches as mpatches\n",
    "import matplotlib.ticker as ticker\n",
    "\n",
    "\n",
    "def save_combined_radar_chart(categories):\n",
    "\n",
    "    labels=np.array(list(next(iter(categories.values())).keys())) # We use the first category to get the keys\n",
    "    num_vars = len(labels)\n",
    "    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
    "    angles += angles[:1] # Add the first angle to the end of the list to ensure the polygon is closed\n",
    "\n",
    "    # Create radar chart\n",
    "    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
    "    ax.set_theta_offset(np.pi / 2)\n",
    "    ax.set_theta_direction(-1)\n",
    "    ax.spines['polar'].set_visible(False) # Remove border\n",
    "\n",
    "    # Define a custom normalization to start the color from the middle\n",
    "    norm = Normalize(vmin=0, vmax=max([max(val.values()) for val in categories.values()])) # We use the maximum of all categories for normalization\n",
    "\n",
    "    colors = ['#40c463', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # Define more colors for more categories\n",
    "\n",
    "    for i, (cat_name, cat_values) in enumerate(categories.items()): # Iterating through each category (series)\n",
    "        values=np.array(list(cat_values.values()))\n",
    "        values = np.concatenate((values, values[:1])) # Ensure the polygon is closed\n",
    "\n",
    "        ax.fill(angles, values, color=colors[i], alpha=0.25) # Draw the filled polygon\n",
    "        ax.plot(angles, values, color=colors[i], linewidth=2) # Draw polygon\n",
    "        ax.plot(angles, values, 'o', color='white', markersize=7, markeredgecolor=colors[i], markeredgewidth=2) # Draw points\n",
    "\n",
    "        # Draw legend\n",
    "        ax.legend(handles=[mpatches.Patch(color=color, label=cat_name, alpha=0.25) for cat_name, color in zip(categories.keys(), colors)])\n",
    "\n",
    "    lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(next(iter(categories.values())).keys()))) # We use the first category to get the keys\n",
    "\n",
    "    # Move labels away from the plot\n",
    "    for label in labels:\n",
    "        label.set_position((label.get_position()[0], label.get_position()[1] + -0.05))  # adjust 0.1 as needed\n",
    "\n",
    "    ax.set_rlabel_position(180) # Move radial labels away from the plot\n",
    "    \n",
    "    ax.set_yticks([]) # Remove default yticks\n",
    "\n",
    "    # Manually create gridlines\n",
    "    for y in np.arange(0, norm.vmax + 1, 1):\n",
    "        if y != norm.vmax:\n",
    "            ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n",
    "        # Add labels for manually created gridlines\n",
    "        ax.text(angles[0], y + 0.2, str(int(y)), color='black', size=9, horizontalalignment='center', verticalalignment='center')\n",
    "\n",
    "    plt.show()\n",
    "\n",
    "# Here is how you can use the function\n",
    "categories = {\n",
    "    'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
    "    'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n",
    "}\n",
    "save_combined_radar_chart(categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import matplotlib.patches as mpatches\n",
    "\n",
    "def save_single_radar_chart(category_dict: dict[str, int], save_path: str | Path, name: str) -> None:\n",
    "    labels=np.array(list(category_dict.keys()))\n",
    "    values=np.array(list(category_dict.values()))\n",
    "\n",
    "    num_vars = len(labels)\n",
    "\n",
    "    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
    "\n",
    "    angles += angles[:1]\n",
    "    values = np.concatenate((values, values[:1]))\n",
    "\n",
    "    colors = ['#40c463']\n",
    "\n",
    "    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))\n",
    "    ax.set_theta_offset(np.pi / 2)\n",
    "    ax.set_theta_direction(-1)\n",
    "\n",
    "    ax.spines['polar'].set_visible(False)\n",
    "\n",
    "    lines, labels = plt.thetagrids(np.degrees(angles[:-1]), (list(category_dict.keys())))\n",
    "\n",
    "    for label in labels:\n",
    "        label.set_position((label.get_position()[0], label.get_position()[1] + -0.05))\n",
    "\n",
    "    ax.fill(angles, values, color=colors[0], alpha=0.25)\n",
    "    ax.plot(angles, values, color=colors[0], linewidth=2)\n",
    "\n",
    "    for i, (angle, value) in enumerate(zip(angles, values)):\n",
    "        ha = 'left'\n",
    "        if angle in {0, np.pi}:\n",
    "            ha = 'center'\n",
    "        elif np.pi < angle < 2*np.pi:\n",
    "            ha = 'right'\n",
    "        ax.text(angle, value - 0.5, f'{value}', size=10, horizontalalignment=ha, verticalalignment=\"center\", color='black')\n",
    "\n",
    "    ax.set_yticklabels([])\n",
    "\n",
    "    ax.set_yticks([])\n",
    "\n",
    "    for y in np.arange(0, values.max(), 1):\n",
    "        ax.plot(angles, [y] * len(angles), color='gray', linewidth=0.5, linestyle=':')\n",
    "\n",
    "    for angle, value in zip(angles, values):\n",
    "        ax.plot(angle, value, 'o', color='white', markersize=7, markeredgecolor=colors[0], markeredgewidth=2)\n",
    "\n",
    "    green_patch = mpatches.Patch(color='#40c463', label='Mini-AGI', alpha=0.25)\n",
    "    plt.legend(handles=[green_patch])\n",
    "    \n",
    "    plt.savefig(f\"{save_path}/{name}.png\", dpi=300)  # Save the figure as a PNG file\n",
    "    plt.close()  # Close the figure to free up memory\n",
    "\n",
    "# Here's how you can use this function:\n",
    "categories = {'content_gen': 2, 'safety': 4, 'memory': 5, 'code': 5, 'iterate': 5, 'retrieval': 4, 'adaptability': 4}\n",
    "save_single_radar_chart(categories, Path.cwd(), \"test\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Define data\n",
    "categories = {\n",
    "    'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
    "    'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2},\n",
    "}\n",
    "\n",
    "# Convert dictionary to DataFrame\n",
    "df = pd.DataFrame(categories)\n",
    "\n",
    "# Create heatmap\n",
    "plt.figure(figsize=(8, 6))\n",
    "sns.heatmap(df, annot=True, cmap=\"YlGnBu\", fmt=\"d\", linewidths=.5)\n",
    "plt.title('Heatmap of Categories')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "categories = {\n",
    "    'beebot': {'content_gen': 2, 'safety': 4, 'memory': 1, 'code': 2, 'iterate': 3, 'retrieval': 4, 'adaptability': 3}, \n",
    "    'mini-agi': {'content_gen': 4, 'safety': 1, 'memory': 5, 'code': 4, 'iterate': 5, 'retrieval': 4, 'adaptability': 2}\n",
    "    # include other agents here...\n",
    "}\n",
    "\n",
    "# Convert dictionary to DataFrame\n",
    "df = pd.DataFrame(categories)\n",
    "\n",
    "# Create a grouped bar chart\n",
    "df.plot(kind='bar', figsize=(10, 7))\n",
    "\n",
    "plt.title('Performance by Category for Each Agent')\n",
    "plt.xlabel('Category')\n",
    "plt.ylabel('Performance')\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}