perf/throughput: Improve reproducibility

Improve reproducibility by documenting the steps needed to run the benchmarks and generate the plots. Also simplify plot generation a bit.
2026-02-23 17:05:36 +01:00 · 2025-10-27 10:51:02 +02:00
parent 1fb1fbf210
commit f10431d24f
3 changed files with 41 additions and 18 deletions
--- a/perf/throughput/plot/plot-compute-impact.py
+++ b/perf/throughput/plot/plot-compute-impact.py
@@ -0,0 +1,83 @@
+import sys
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import scienceplots  # noqa: F401
+
+plt.style.use(["science"])
+plt.rcParams.update({
+  "text.usetex": True,
+  "font.family": "serif",
+  "font.serif": ["Times"],
+})
+
+# Get CSV filenames from command line arguments
+if len(sys.argv) < 2:
+    print("Usage: python script.py <csv_filename> [<csv_filename> ...]")
+    sys.exit(1)
+
+csv_filenames = sys.argv[1:]
+
+# Output filename
+output_filename = "compute-impact.pdf"
+
+# Read data from all CSV files and concatenate
+dfs = [pd.read_csv(filename) for filename in csv_filenames]
+df = pd.concat(dfs, ignore_index=True)
+
+# Create figure and axis
+fig, ax = plt.subplots(figsize=(10, 6))
+
+# Get unique systems and thread counts
+systems = df["system"].unique()
+thread_counts = sorted(df["threads"].unique())
+
+# Get colors from the current color cycle
+prop_cycle = plt.rcParams["axes.prop_cycle"]
+colors_list = prop_cycle.by_key()["color"]
+
+# Plot a line for each system-thread combination
+markers = ["o", "s", "^", "D"]
+linestyles = ["-", "--", "-.", ":"]
+
+plot_idx = 0
+for sys_idx, system in enumerate(systems):
+    df_system = df[df["system"] == system]
+    for thread_idx, threads in enumerate(thread_counts):
+        df_thread = df_system[df_system["threads"] == threads].sort_values("compute")
+        if len(df_thread) > 0:
+            ax.plot(df_thread["compute"], df_thread["throughput"],
+                    marker=markers[thread_idx % len(markers)],
+                    color=colors_list[plot_idx % len(colors_list)],
+                    linestyle=linestyles[sys_idx % len(linestyles)],
+                    linewidth=2, markersize=8,
+                    label=f'{system} ({threads} thread{"s" if threads > 1 else ""})')
+            plot_idx += 1
+
+# Customize the plot
+ax.set_xlabel(r"Compute Time (microseconds)", fontsize=14, fontweight="bold")
+ax.set_ylabel("Throughput (rows/second)", fontsize=14, fontweight="bold")
+
+# Set y-axis to start from 0 with dynamic upper limit
+max_throughput = df["throughput"].max()
+ax.set_ylim(0, max_throughput * 1.15)  # Add 15% tolerance for legend space
+
+# Format y-axis labels
+ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x/1000)}k"))
+
+# Add legend
+ax.legend(loc="lower left", frameon=True, fontsize=11)
+
+# Add grid for better readability
+ax.grid(axis="both", alpha=0.3, linestyle="--")
+ax.set_axisbelow(True)
+
+# Adjust layout
+plt.tight_layout()
+
+# Save the figure
+plt.savefig(output_filename, dpi=300, bbox_inches="tight")
+print(f"Saved plot to {output_filename}")
+
+# Display the plot
+plt.show()