Merge 'TPC-H with criterion and nyrkio' from Pedro Muniz

Added a benchmark to bench TPC-H with criterion and have it uploaded to
Nyrkio. I did not delete the other tpc-h job because maybe someone uses
it and I'm not aware of it.

Closes #1560
This commit is contained in:
Jussi Saurio
2025-05-24 21:54:48 +03:00
4 changed files with 214 additions and 3 deletions

View File

@@ -2,9 +2,9 @@ name: Rust Benchmarks+Nyrkiö
on:
push:
branches: [ "main", "master", "notmain" ]
branches: ["main", "master", "notmain"]
pull_request:
branches: [ "main", "notmain", "master" ]
branches: ["main", "notmain", "master"]
env:
CARGO_TERM_COLOR: never
@@ -22,7 +22,7 @@ jobs:
# run: npm install && npm run build
- name: Bench
run: cargo bench 2>&1 | tee output.txt
run: make bench-exclude-tpc-h 2>&1 | tee output.txt
- name: Analyze benchmark result with Nyrkiö
uses: nyrkio/change-detection@HEAD
with:
@@ -100,6 +100,60 @@ jobs:
never-fail: true
nyrkio-public: true
tpc-h-criterion:
runs-on: ubuntu-latest
env:
DB_FILE: "perf/tpc-h/TPC-H.db"
steps:
- uses: actions/checkout@v3
- uses: Swatinem/rust-cache@v2
with:
prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project)
- name: Cache TPC-H
id: cache-primes
uses: actions/cache@v4
with:
path: ${{ env.DB_FILE }}
key: tpc-h
- name: Download TPC-H
if: steps.cache-primes.outputs.cache-hit != 'true'
env:
DB_URL: "https://github.com/lovasoa/TPCH-sqlite/releases/download/v1.0/TPC-H.db"
run: wget -O $DB_FILE --no-verbose $DB_URL
- name: Bench
run: cargo bench --bench tpc_h_benchmark 2>&1 | tee output.txt
- name: Analyze benchmark result with Nyrkiö
uses: nyrkio/change-detection@HEAD
with:
name: tpc-h
tool: criterion
output-file-path: output.txt
# What to do if a change is immediately detected by Nyrkiö.
# Note that smaller changes are only detected with delay, usually after a change
# persisted over 2-7 commits. Go to nyrkiö.com to view those or configure alerts.
# Note that Nyrkiö will find all changes, also improvements. This means fail-on-alert
# on pull events isn't compatible with this workflow being required to pass branch protection.
fail-on-alert: false
comment-on-alert: true
comment-always: false
# Nyrkiö configuration
# Get yours from https://nyrkio.com/docs/getting-started
nyrkio-token: ${{ secrets.NYRKIO_JWT_TOKEN }}
# HTTP requests will fail for all non-core contributors that don't have their own token.
# Don't want that to spoil the build, so:
never-fail: true
# Make results and change points public, so that any oss contributor can see them
nyrkio-public: true
# parameters of the algorithm. Note: These are global, so we only set them once and for all.
# Smaller p-value = less change points found. Larger p-value = more, but also more false positives.
nyrkio-settings-pvalue: 0.0001
# Ignore changes smaller than this.
nyrkio-settings-threshold: 0%
tpc-h:
runs-on: ubuntu-latest
steps:

View File

@@ -115,3 +115,14 @@ bench-vfs: uv-sync
clickbench:
./perf/clickbench/benchmark.sh
.PHONY: clickbench
bench-exclude-tpc-h:
@benchmarks=$$(cargo bench --bench 2>&1 | grep -A 1000 '^Available bench targets:' | grep -v '^Available bench targets:' | grep -v '^ *$$' | grep -v 'tpc_h_benchmark' | xargs -I {} printf -- "--bench %s " {}); \
if [ -z "$$benchmarks" ]; then \
echo "No benchmarks found (excluding tpc_h_benchmark)."; \
exit 1; \
else \
cargo bench $$benchmarks; \
fi
.PHONY: bench-exclude-tpc-h

View File

@@ -116,3 +116,7 @@ harness = false
[[bench]]
name = "json_benchmark"
harness = false
[[bench]]
name = "tpc_h_benchmark"
harness = false

View File

@@ -0,0 +1,142 @@
use std::sync::Arc;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode};
use limbo_core::{Database, PlatformIO, IO as _};
use pprof::criterion::{Output, PProfProfiler};
const TPC_H_PATH: &'static str = "../perf/tpc-h/TPC-H.db";
macro_rules! tpc_query {
($num:literal) => {
(
$num,
include_str!(concat!("../../perf/tpc-h/queries/", $num, ".sql")),
)
};
}
fn rusqlite_open_tpc_h() -> rusqlite::Connection {
let sqlite_conn = rusqlite::Connection::open(TPC_H_PATH).unwrap();
sqlite_conn
.pragma_update(None, "locking_mode", "EXCLUSIVE")
.unwrap();
sqlite_conn
}
fn bench_tpc_h_queries(criterion: &mut Criterion) {
// https://github.com/tursodatabase/limbo/issues/174
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err();
#[allow(clippy::arc_with_non_send_sync)]
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file(io.clone(), TPC_H_PATH, false).unwrap();
let limbo_conn = db.connect().unwrap();
let queries = [
tpc_query!(1),
// tpc_query!(2), // Skipped as subquery in bind_column references is todo!
tpc_query!(3),
// thread 'main' panicked at core/translate/planner.rs:256:28:
// not yet implemented
// tpc_query!(4),
tpc_query!(5),
tpc_query!(6),
tpc_query!(7),
tpc_query!(8),
tpc_query!(9),
tpc_query!(10),
// tpc_query!(11), // Skipped not implemented
tpc_query!(12),
// thread 'main' panicked at core/storage/btree.rs:3233:26:
// overflow cell with divider cell was not found
// tpc_query!(13),
tpc_query!(14),
// thread 'main' panicked at core/benches/tpc_h_benchmark.rs:71:62:
// called `Result::unwrap()` on an `Err` value: ParseError("CREATE VIEW not supported yet")
// tpc_query!(15),
// thread 'main' panicked at core/translate/planner.rs:267:34:
// not yet implemented
// tpc_query!(16),
// thread 'main' panicked at core/translate/planner.rs:291:30:
// not yet implemented
// tpc_query!(17),
// thread 'main' panicked at core/translate/planner.rs:267:34:
// not yet implemented
// tpc_query!(18),
tpc_query!(19),
// thread 'main' panicked at core/translate/planner.rs:267:34:
// not yet implemented
// tpc_query!(20),
// thread 'main' panicked at core/translate/planner.rs:256:28:
// not yet implemented
// tpc_query!(21),
// thread 'main' panicked at core/translate/planner.rs:291:30:
// not yet implemented
// tpc_query!(22),
];
for (idx, query) in queries.iter() {
let mut group = criterion.benchmark_group(format!("Query `{}` ", idx));
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
group.bench_with_input(
BenchmarkId::new("limbo_tpc_h_query", idx),
query,
|b, query| {
b.iter(|| {
let mut stmt = limbo_conn.prepare(query).unwrap();
loop {
match stmt.step().unwrap() {
limbo_core::StepResult::Row => {
black_box(stmt.row());
}
limbo_core::StepResult::IO => {
let _ = io.run_once();
}
limbo_core::StepResult::Done => {
break;
}
limbo_core::StepResult::Interrupt | limbo_core::StepResult::Busy => {
unreachable!();
}
}
}
stmt.reset();
});
},
);
if enable_rusqlite {
let sqlite_conn = rusqlite_open_tpc_h();
group.bench_with_input(
BenchmarkId::new("sqlite_tpc_h_query", idx),
query,
|b, query| {
let mut stmt = sqlite_conn.prepare(query).unwrap();
b.iter(|| {
let mut rows = stmt.raw_query();
while let Some(row) = rows.next().unwrap() {
black_box(row);
}
});
},
);
}
group.finish();
}
}
criterion_group! {
name = benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
targets = bench_tpc_h_queries
}
criterion_main!(benches);