mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-17 08:34:19 +01:00
Merge 'perf/throughput: Improve reproducibility' from Pekka Enberg
Improve reproducibility by documenting the steps needed to run the benchmarks and generate the plots. Also simplify plot generation a bit. Closes #3843
This commit is contained in:
25
perf/throughput/README.md
Normal file
25
perf/throughput/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Turso throughput benchmark
|
||||||
|
|
||||||
|
This directory contains Turso throughput benchmark.
|
||||||
|
|
||||||
|
First, run the benchmarks:
|
||||||
|
|
||||||
|
```console
|
||||||
|
cd rusqlite
|
||||||
|
./scripts/bench.sh > ../plot/sqlite.csv
|
||||||
|
|
||||||
|
cd turso
|
||||||
|
./scripts/bench.sh > ../plot/turso.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, generate the plots:
|
||||||
|
|
||||||
|
```console
|
||||||
|
cd plot
|
||||||
|
uv run plot-thread-scaling.py turso.csv sqlite.csv
|
||||||
|
uv run plot-compute-impact.py turso.csv sqlite.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
This will generate:
|
||||||
|
- `thread-scaling.pdf`: Write throughput vs. number of threads (scalability test)
|
||||||
|
- `compute-impact.pdf`: How CPU-bound work affects write throughput
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@@ -12,19 +11,19 @@ plt.rcParams.update({
|
|||||||
"font.serif": ["Times"],
|
"font.serif": ["Times"],
|
||||||
})
|
})
|
||||||
|
|
||||||
# Get CSV filename from command line argument
|
# Get CSV filenames from command line arguments
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python script.py <csv_filename>")
|
print("Usage: python script.py <csv_filename> [<csv_filename> ...]")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
csv_filename = sys.argv[1]
|
csv_filenames = sys.argv[1:]
|
||||||
|
|
||||||
# Get basename without extension for output filename
|
# Output filename
|
||||||
basename = os.path.splitext(csv_filename)[0]
|
output_filename = "compute-impact.pdf"
|
||||||
output_filename = f"{basename}-compute.png"
|
|
||||||
|
|
||||||
# Read data from CSV file
|
# Read data from all CSV files and concatenate
|
||||||
df = pd.read_csv(csv_filename)
|
dfs = [pd.read_csv(filename) for filename in csv_filenames]
|
||||||
|
df = pd.concat(dfs, ignore_index=True)
|
||||||
|
|
||||||
# Create figure and axis
|
# Create figure and axis
|
||||||
fig, ax = plt.subplots(figsize=(10, 6))
|
fig, ax = plt.subplots(figsize=(10, 6))
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
@@ -8,19 +7,19 @@ import scienceplots # noqa: F401
|
|||||||
|
|
||||||
plt.style.use(["science"])
|
plt.style.use(["science"])
|
||||||
|
|
||||||
# Get CSV filename from command line argument
|
# Get CSV filenames from command line arguments
|
||||||
if len(sys.argv) < 2:
|
if len(sys.argv) < 2:
|
||||||
print("Usage: python script.py <csv_filename>")
|
print("Usage: python script.py <csv_filename> [<csv_filename> ...]")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
csv_filename = sys.argv[1]
|
csv_filenames = sys.argv[1:]
|
||||||
|
|
||||||
# Get basename without extension for output filename
|
# Output filename
|
||||||
basename = os.path.splitext(csv_filename)[0]
|
output_filename = "thread-scaling.pdf"
|
||||||
output_filename = f"{basename}-write.png"
|
|
||||||
|
|
||||||
# Read data from CSV file
|
# Read data from all CSV files and concatenate
|
||||||
df = pd.read_csv(csv_filename)
|
dfs = [pd.read_csv(filename) for filename in csv_filenames]
|
||||||
|
df = pd.concat(dfs, ignore_index=True)
|
||||||
|
|
||||||
# Filter for compute time = 0
|
# Filter for compute time = 0
|
||||||
df_filtered = df[df["compute"] == 0].sort_values("threads")
|
df_filtered = df[df["compute"] == 0].sort_values("threads")
|
||||||
Reference in New Issue
Block a user