diff --git a/perf/throughput/README.md b/perf/throughput/README.md new file mode 100644 index 000000000..f70132497 --- /dev/null +++ b/perf/throughput/README.md @@ -0,0 +1,25 @@ +# Turso throughput benchmark + +This directory contains Turso throughput benchmark. + +First, run the benchmarks: + +```console +cd rusqlite +./scripts/bench.sh > ../plot/sqlite.csv + +cd turso +./scripts/bench.sh > ../plot/turso.csv +``` + +Then, generate the plots: + +```console +cd plot +uv run plot-thread-scaling.py turso.csv sqlite.csv +uv run plot-compute-impact.py turso.csv sqlite.csv +``` + +This will generate: +- `thread-scaling.pdf`: Write throughput vs. number of threads (scalability test) +- `compute-impact.pdf`: How CPU-bound work affects write throughput diff --git a/perf/throughput/plot/plot-compute-write.py b/perf/throughput/plot/plot-compute-impact.py similarity index 86% rename from perf/throughput/plot/plot-compute-write.py rename to perf/throughput/plot/plot-compute-impact.py index 1807641ad..a84d7bbbf 100644 --- a/perf/throughput/plot/plot-compute-write.py +++ b/perf/throughput/plot/plot-compute-impact.py @@ -1,4 +1,3 @@ -import os import sys import matplotlib.pyplot as plt @@ -12,19 +11,19 @@ plt.rcParams.update({ "font.serif": ["Times"], }) -# Get CSV filename from command line argument +# Get CSV filenames from command line arguments if len(sys.argv) < 2: - print("Usage: python script.py ") + print("Usage: python script.py [ ...]") sys.exit(1) -csv_filename = sys.argv[1] +csv_filenames = sys.argv[1:] -# Get basename without extension for output filename -basename = os.path.splitext(csv_filename)[0] -output_filename = f"{basename}-compute.png" +# Output filename +output_filename = "compute-impact.pdf" -# Read data from CSV file -df = pd.read_csv(csv_filename) +# Read data from all CSV files and concatenate +dfs = [pd.read_csv(filename) for filename in csv_filenames] +df = pd.concat(dfs, ignore_index=True) # Create figure and axis fig, ax = plt.subplots(figsize=(10, 6)) diff --git a/perf/throughput/plot/plot-write.py b/perf/throughput/plot/plot-thread-scaling.py similarity index 85% rename from perf/throughput/plot/plot-write.py rename to perf/throughput/plot/plot-thread-scaling.py index f3fc6ae86..a7bb384a5 100644 --- a/perf/throughput/plot/plot-write.py +++ b/perf/throughput/plot/plot-thread-scaling.py @@ -1,4 +1,3 @@ -import os import sys import matplotlib.pyplot as plt @@ -8,19 +7,19 @@ import scienceplots # noqa: F401 plt.style.use(["science"]) -# Get CSV filename from command line argument +# Get CSV filenames from command line arguments if len(sys.argv) < 2: - print("Usage: python script.py ") + print("Usage: python script.py [ ...]") sys.exit(1) -csv_filename = sys.argv[1] +csv_filenames = sys.argv[1:] -# Get basename without extension for output filename -basename = os.path.splitext(csv_filename)[0] -output_filename = f"{basename}-write.png" +# Output filename +output_filename = "thread-scaling.pdf" -# Read data from CSV file -df = pd.read_csv(csv_filename) +# Read data from all CSV files and concatenate +dfs = [pd.read_csv(filename) for filename in csv_filenames] +df = pd.concat(dfs, ignore_index=True) # Filter for compute time = 0 df_filtered = df[df["compute"] == 0].sort_values("threads")