perf/throughput: Add plotting scripts

This adds few helper scripts to plot throughput results.
This commit is contained in:
Pekka Enberg
2025-10-01 10:11:24 +03:00
parent 3fcb0581ec
commit 51f4f1fb8b
2 changed files with 168 additions and 0 deletions

View File

@@ -0,0 +1,84 @@
import os
import sys
import matplotlib.pyplot as plt
import pandas as pd
import scienceplots # noqa: F401
plt.style.use(["science"])
plt.rcParams.update({
"text.usetex": True,
"font.family": "serif",
"font.serif": ["Times"],
})
# Get CSV filename from command line argument
if len(sys.argv) < 2:
print("Usage: python script.py <csv_filename>")
sys.exit(1)
csv_filename = sys.argv[1]
# Get basename without extension for output filename
basename = os.path.splitext(csv_filename)[0]
output_filename = f"{basename}-compute.png"
# Read data from CSV file
df = pd.read_csv(csv_filename)
# Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6))
# Get unique systems and thread counts
systems = df["system"].unique()
thread_counts = sorted(df["threads"].unique())
# Get colors from the current color cycle
prop_cycle = plt.rcParams["axes.prop_cycle"]
colors_list = prop_cycle.by_key()["color"]
# Plot a line for each system-thread combination
markers = ["o", "s", "^", "D"]
linestyles = ["-", "--", "-.", ":"]
plot_idx = 0
for sys_idx, system in enumerate(systems):
df_system = df[df["system"] == system]
for thread_idx, threads in enumerate(thread_counts):
df_thread = df_system[df_system["threads"] == threads].sort_values("compute")
if len(df_thread) > 0:
ax.plot(df_thread["compute"], df_thread["throughput"],
marker=markers[thread_idx % len(markers)],
color=colors_list[plot_idx % len(colors_list)],
linestyle=linestyles[sys_idx % len(linestyles)],
linewidth=2, markersize=8,
label=f'{system} ({threads} thread{"s" if threads > 1 else ""})')
plot_idx += 1
# Customize the plot
ax.set_xlabel(r"Compute Time (microseconds)", fontsize=14, fontweight="bold")
ax.set_ylabel("Throughput (rows/second)", fontsize=14, fontweight="bold")
# Set y-axis to start from 0 with dynamic upper limit
max_throughput = df["throughput"].max()
ax.set_ylim(0, max_throughput * 1.15) # Add 15% tolerance for legend space
# Format y-axis labels
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x/1000)}k"))
# Add legend
ax.legend(loc="lower left", frameon=True, fontsize=11)
# Add grid for better readability
ax.grid(axis="both", alpha=0.3, linestyle="--")
ax.set_axisbelow(True)
# Adjust layout
plt.tight_layout()
# Save the figure
plt.savefig(output_filename, dpi=300, bbox_inches="tight")
print(f"Saved plot to {output_filename}")
# Display the plot
plt.show()

View File

@@ -0,0 +1,84 @@
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scienceplots # noqa: F401
plt.style.use(["science"])
# Get CSV filename from command line argument
if len(sys.argv) < 2:
print("Usage: python script.py <csv_filename>")
sys.exit(1)
csv_filename = sys.argv[1]
# Get basename without extension for output filename
basename = os.path.splitext(csv_filename)[0]
output_filename = f"{basename}-write.png"
# Read data from CSV file
df = pd.read_csv(csv_filename)
# Filter for compute time = 0
df_filtered = df[df["compute"] == 0].sort_values("threads")
# Get unique systems and threads
systems = df_filtered["system"].unique()
threads = sorted(df_filtered["threads"].unique())
# Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6))
# Set up bar positions
x_pos = np.arange(len(threads))
bar_width = 0.35
# Get colors from the current color cycle
prop_cycle = plt.rcParams["axes.prop_cycle"]
colors_list = prop_cycle.by_key()["color"]
# Plot bars for each system
for i, system in enumerate(systems):
system_data = df_filtered[df_filtered["system"] == system].sort_values("threads")
throughput = system_data["throughput"].tolist()
offset = (i - len(systems)/2 + 0.5) * bar_width
bars = ax.bar(x_pos + offset, throughput, bar_width,
label=system,
color=colors_list[i % len(colors_list)],
edgecolor="black", linewidth=1.2)
# Customize the plot
ax.set_xlabel("Number of Threads", fontsize=14, fontweight="bold")
ax.set_ylabel("Throughput (rows/sec)", fontsize=14, fontweight="bold")
# Set y-axis to start from 0 with dynamic upper limit
max_throughput = df_filtered["throughput"].max()
ax.set_ylim(0, max_throughput * 1.15) # Add 15% tolerance for legend space
# Format y-axis labels
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x/1000)}k"))
# Set x-axis ticks to show thread values
ax.set_xticks(x_pos)
ax.set_xticklabels(threads)
# Add legend
ax.legend(loc="upper left", frameon=True, fontsize=12)
# Add grid for better readability
ax.grid(axis="y", alpha=0.3, linestyle="--")
ax.set_axisbelow(True)
# Adjust layout
plt.tight_layout()
# Save the figure
plt.savefig(output_filename, dpi=300, bbox_inches="tight")
print(f"Saved plot to {output_filename}")
# Display the plot
plt.show()