perf/throughput: Improve reproducibility

Improve reproducibility by documenting the steps needed to run the
benchmarks and generate the plots. Also simplify plot generation a bit.
This commit is contained in:
Pekka Enberg
2025-10-27 10:51:02 +02:00
parent 1fb1fbf210
commit f10431d24f
3 changed files with 41 additions and 18 deletions

25
perf/throughput/README.md Normal file
View File

@@ -0,0 +1,25 @@
# Turso throughput benchmark
This directory contains Turso throughput benchmark.
First, run the benchmarks:
```console
cd rusqlite
./scripts/bench.sh > ../plot/sqlite.csv
cd turso
./scripts/bench.sh > ../plot/turso.csv
```
Then, generate the plots:
```console
cd plot
uv run plot-thread-scaling.py turso.csv sqlite.csv
uv run plot-compute-impact.py turso.csv sqlite.csv
```
This will generate:
- `thread-scaling.pdf`: Write throughput vs. number of threads (scalability test)
- `compute-impact.pdf`: How CPU-bound work affects write throughput

View File

@@ -1,4 +1,3 @@
import os
import sys import sys
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@@ -12,19 +11,19 @@ plt.rcParams.update({
"font.serif": ["Times"], "font.serif": ["Times"],
}) })
# Get CSV filename from command line argument # Get CSV filenames from command line arguments
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python script.py <csv_filename>") print("Usage: python script.py <csv_filename> [<csv_filename> ...]")
sys.exit(1) sys.exit(1)
csv_filename = sys.argv[1] csv_filenames = sys.argv[1:]
# Get basename without extension for output filename # Output filename
basename = os.path.splitext(csv_filename)[0] output_filename = "compute-impact.pdf"
output_filename = f"{basename}-compute.png"
# Read data from CSV file # Read data from all CSV files and concatenate
df = pd.read_csv(csv_filename) dfs = [pd.read_csv(filename) for filename in csv_filenames]
df = pd.concat(dfs, ignore_index=True)
# Create figure and axis # Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6)) fig, ax = plt.subplots(figsize=(10, 6))

View File

@@ -1,4 +1,3 @@
import os
import sys import sys
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@@ -8,19 +7,19 @@ import scienceplots # noqa: F401
plt.style.use(["science"]) plt.style.use(["science"])
# Get CSV filename from command line argument # Get CSV filenames from command line arguments
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: python script.py <csv_filename>") print("Usage: python script.py <csv_filename> [<csv_filename> ...]")
sys.exit(1) sys.exit(1)
csv_filename = sys.argv[1] csv_filenames = sys.argv[1:]
# Get basename without extension for output filename # Output filename
basename = os.path.splitext(csv_filename)[0] output_filename = "thread-scaling.pdf"
output_filename = f"{basename}-write.png"
# Read data from CSV file # Read data from all CSV files and concatenate
df = pd.read_csv(csv_filename) dfs = [pd.read_csv(filename) for filename in csv_filenames]
df = pd.concat(dfs, ignore_index=True)
# Filter for compute time = 0 # Filter for compute time = 0
df_filtered = df[df["compute"] == 0].sort_values("threads") df_filtered = df[df["compute"] == 0].sort_values("threads")