diff --git a/agbenchmark/reports/processing/gen_combined_chart.py b/agbenchmark/reports/processing/gen_combined_chart.py index 784e8ff9..ca6ede7d 100644 --- a/agbenchmark/reports/processing/gen_combined_chart.py +++ b/agbenchmark/reports/processing/gen_combined_chart.py @@ -19,7 +19,7 @@ def generate_combined_chart() -> None: reports_data = get_reports_data(str(all_agents_path)) - categories = all_agent_categories(reports_data) + categories = all_agent_categories(reports_data, combined=True) # Count the number of directories in this directory num_dirs = len([f for f in combined_charts_folder.iterdir() if f.is_dir()]) diff --git a/agbenchmark/reports/processing/graphs.py b/agbenchmark/reports/processing/graphs.py index af910bac..e84544ef 100644 --- a/agbenchmark/reports/processing/graphs.py +++ b/agbenchmark/reports/processing/graphs.py @@ -72,6 +72,11 @@ def save_combined_radar_chart( np.degrees(angles[:-1]), (list(next(iter(categories.values())).keys())) ) # We use the first category to get the keys + highest_score = 7 + + # Set y-axis limit to 7 + ax.set_ylim(top=highest_score) + # Move labels away from the plot for label in labels: label.set_position( @@ -84,8 +89,8 @@ def save_combined_radar_chart( ax.set_yticks([]) # Remove default yticks # Manually create gridlines - for y in np.arange(0, norm.vmax + 1, 1): - if y != norm.vmax: + for y in np.arange(0, highest_score + 1, 1): + if y != highest_score: ax.plot( angles, [y] * len(angles), color="gray", linewidth=0.5, linestyle=":" ) diff --git a/agbenchmark/reports/processing/process_report.py b/agbenchmark/reports/processing/process_report.py index 7c0a2e9b..2c5f3f59 100644 --- a/agbenchmark/reports/processing/process_report.py +++ b/agbenchmark/reports/processing/process_report.py @@ -1,7 +1,7 @@ import json import os from pathlib import Path -from typing import Any +from typing import Any, Optional from agbenchmark.reports.processing.get_files import ( get_latest_report_from_agent_directories, @@ -31,13 +31,17 @@ def get_reports_data(report_path: str) -> dict[str, Any]: return reports_data -def get_agent_category(report: Report) -> dict[str, Any]: +def get_agent_category( + report: Report, combined: Optional[bool] = None +) -> dict[str, Any]: categories: dict[str, Any] = {} def get_highest_category_difficulty(data: Test) -> None: for category in data.category: if category == "interface" or category == "iterate": continue + if combined: + categories.setdefault(category, 0) if data.metrics.success: num_dif = STRING_DIFFICULTY_MAP[data.metrics.difficulty] if num_dif > categories.setdefault(category, 0): @@ -53,11 +57,13 @@ def get_agent_category(report: Report) -> dict[str, Any]: return categories -def all_agent_categories(reports_data: dict[str, Any]) -> dict[str, Any]: +def all_agent_categories( + reports_data: dict[str, Any], combined: Optional[bool] = None +) -> dict[str, Any]: all_categories: dict[str, Any] = {} for name, report in reports_data.items(): - categories = get_agent_category(report) + categories = get_agent_category(report, combined) all_categories[name] = categories return all_categories diff --git a/agent/gpt-engineer b/agent/gpt-engineer index 9bb81041..47bc50b7 160000 --- a/agent/gpt-engineer +++ b/agent/gpt-engineer @@ -1 +1 @@ -Subproject commit 9bb81041ace9f09e8ea0e34e29f2e46bb9d46a36 +Subproject commit 47bc50b71c0465349a6489e0170792c0018472f3 diff --git a/reports/combined_charts/run2/bar_chart.png b/reports/combined_charts/run2/bar_chart.png new file mode 100644 index 00000000..3afc6b77 Binary files /dev/null and b/reports/combined_charts/run2/bar_chart.png differ diff --git a/reports/combined_charts/run2/radar_chart.png b/reports/combined_charts/run2/radar_chart.png new file mode 100644 index 00000000..8bb5d11c Binary files /dev/null and b/reports/combined_charts/run2/radar_chart.png differ diff --git a/reports/combined_charts/run2/run_info.json b/reports/combined_charts/run2/run_info.json new file mode 100644 index 00000000..d96d0c9d --- /dev/null +++ b/reports/combined_charts/run2/run_info.json @@ -0,0 +1 @@ +{"Auto-GPT": "2023-08-01-12:47", "BabyAGI": "2023-08-01-12:50", "beebot": "2023-08-01-12:48", "gpt-engineer": "2023-08-01-12:47", "mini-agi": "2023-08-01-12:47", "smol-developer": "2023-08-01-12:46"} \ No newline at end of file diff --git a/reports/gpt-engineer/folder10_08-01-12-47/radar_chart.png b/reports/gpt-engineer/folder10_08-01-12-47/radar_chart.png deleted file mode 100644 index a6f7e0b4..00000000 Binary files a/reports/gpt-engineer/folder10_08-01-12-47/radar_chart.png and /dev/null differ diff --git a/reports/gpt-engineer/folder11_08-01-03-20/radar_chart.png b/reports/gpt-engineer/folder11_08-01-03-20/radar_chart.png deleted file mode 100644 index bcc9ef66..00000000 Binary files a/reports/gpt-engineer/folder11_08-01-03-20/radar_chart.png and /dev/null differ