diff --git a/.github/workflows/rust_perf.yml b/.github/workflows/rust_perf.yml index b67620784..940f60f76 100644 --- a/.github/workflows/rust_perf.yml +++ b/.github/workflows/rust_perf.yml @@ -2,9 +2,9 @@ name: Rust Benchmarks+Nyrkiö on: push: - branches: [ "main", "master", "notmain" ] + branches: ["main", "master", "notmain"] pull_request: - branches: [ "main", "notmain", "master" ] + branches: ["main", "notmain", "master"] env: CARGO_TERM_COLOR: never @@ -22,7 +22,7 @@ jobs: # run: npm install && npm run build - name: Bench - run: cargo bench 2>&1 | tee output.txt + run: make bench-exclude-tpc-h 2>&1 | tee output.txt - name: Analyze benchmark result with Nyrkiö uses: nyrkio/change-detection@HEAD with: @@ -100,6 +100,60 @@ jobs: never-fail: true nyrkio-public: true + tpc-h-criterion: + runs-on: ubuntu-latest + env: + DB_FILE: "perf/tpc-h/TPC-H.db" + steps: + - uses: actions/checkout@v3 + - uses: Swatinem/rust-cache@v2 + with: + prefix-key: "v1-rust" # can be updated if we need to reset caches due to non-trivial change in the dependencies (for example, custom env var were set for single workspace project) + + - name: Cache TPC-H + id: cache-primes + uses: actions/cache@v4 + with: + path: ${{ env.DB_FILE }} + key: tpc-h + - name: Download TPC-H + if: steps.cache-primes.outputs.cache-hit != 'true' + env: + DB_URL: "https://github.com/lovasoa/TPCH-sqlite/releases/download/v1.0/TPC-H.db" + run: wget -O $DB_FILE --no-verbose $DB_URL + + - name: Bench + run: cargo bench --bench tpc_h_benchmark 2>&1 | tee output.txt + - name: Analyze benchmark result with Nyrkiö + uses: nyrkio/change-detection@HEAD + with: + name: tpc-h + tool: criterion + output-file-path: output.txt + + # What to do if a change is immediately detected by Nyrkiö. + # Note that smaller changes are only detected with delay, usually after a change + # persisted over 2-7 commits. Go to nyrkiö.com to view those or configure alerts. + # Note that Nyrkiö will find all changes, also improvements. This means fail-on-alert + # on pull events isn't compatible with this workflow being required to pass branch protection. + fail-on-alert: false + comment-on-alert: true + comment-always: false + # Nyrkiö configuration + # Get yours from https://nyrkio.com/docs/getting-started + nyrkio-token: ${{ secrets.NYRKIO_JWT_TOKEN }} + # HTTP requests will fail for all non-core contributors that don't have their own token. + # Don't want that to spoil the build, so: + never-fail: true + # Make results and change points public, so that any oss contributor can see them + nyrkio-public: true + + # parameters of the algorithm. Note: These are global, so we only set them once and for all. + # Smaller p-value = less change points found. Larger p-value = more, but also more false positives. + nyrkio-settings-pvalue: 0.0001 + # Ignore changes smaller than this. + nyrkio-settings-threshold: 0% + tpc-h: runs-on: ubuntu-latest steps: diff --git a/Makefile b/Makefile index db12b90ff..f13c23c4d 100644 --- a/Makefile +++ b/Makefile @@ -115,3 +115,14 @@ bench-vfs: uv-sync clickbench: ./perf/clickbench/benchmark.sh .PHONY: clickbench + + +bench-exclude-tpc-h: + @benchmarks=$$(cargo bench --bench 2>&1 | grep -A 1000 '^Available bench targets:' | grep -v '^Available bench targets:' | grep -v '^ *$$' | grep -v 'tpc_h_benchmark' | xargs -I {} printf -- "--bench %s " {}); \ + if [ -z "$$benchmarks" ]; then \ + echo "No benchmarks found (excluding tpc_h_benchmark)."; \ + exit 1; \ + else \ + cargo bench $$benchmarks; \ + fi +.PHONY: bench-exclude-tpc-h diff --git a/core/Cargo.toml b/core/Cargo.toml index d32fc4745..393c70cfe 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -116,3 +116,7 @@ harness = false [[bench]] name = "json_benchmark" harness = false + +[[bench]] +name = "tpc_h_benchmark" +harness = false diff --git a/core/benches/tpc_h_benchmark.rs b/core/benches/tpc_h_benchmark.rs new file mode 100644 index 000000000..fe29f8ced --- /dev/null +++ b/core/benches/tpc_h_benchmark.rs @@ -0,0 +1,142 @@ +use std::sync::Arc; + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode}; +use limbo_core::{Database, PlatformIO, IO as _}; +use pprof::criterion::{Output, PProfProfiler}; + +const TPC_H_PATH: &'static str = "../perf/tpc-h/TPC-H.db"; + +macro_rules! tpc_query { + ($num:literal) => { + ( + $num, + include_str!(concat!("../../perf/tpc-h/queries/", $num, ".sql")), + ) + }; +} + +fn rusqlite_open_tpc_h() -> rusqlite::Connection { + let sqlite_conn = rusqlite::Connection::open(TPC_H_PATH).unwrap(); + sqlite_conn + .pragma_update(None, "locking_mode", "EXCLUSIVE") + .unwrap(); + sqlite_conn +} + +fn bench_tpc_h_queries(criterion: &mut Criterion) { + // https://github.com/tursodatabase/limbo/issues/174 + // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features + let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err(); + + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = Database::open_file(io.clone(), TPC_H_PATH, false).unwrap(); + let limbo_conn = db.connect().unwrap(); + + let queries = [ + tpc_query!(1), + // tpc_query!(2), // Skipped as subquery in bind_column references is todo! + tpc_query!(3), + // thread 'main' panicked at core/translate/planner.rs:256:28: + // not yet implemented + // tpc_query!(4), + tpc_query!(5), + tpc_query!(6), + tpc_query!(7), + tpc_query!(8), + tpc_query!(9), + tpc_query!(10), + // tpc_query!(11), // Skipped not implemented + tpc_query!(12), + // thread 'main' panicked at core/storage/btree.rs:3233:26: + // overflow cell with divider cell was not found + // tpc_query!(13), + tpc_query!(14), + // thread 'main' panicked at core/benches/tpc_h_benchmark.rs:71:62: + // called `Result::unwrap()` on an `Err` value: ParseError("CREATE VIEW not supported yet") + // tpc_query!(15), + + // thread 'main' panicked at core/translate/planner.rs:267:34: + // not yet implemented + // tpc_query!(16), + + // thread 'main' panicked at core/translate/planner.rs:291:30: + // not yet implemented + // tpc_query!(17), + + // thread 'main' panicked at core/translate/planner.rs:267:34: + // not yet implemented + // tpc_query!(18), + tpc_query!(19), + // thread 'main' panicked at core/translate/planner.rs:267:34: + // not yet implemented + // tpc_query!(20), + + // thread 'main' panicked at core/translate/planner.rs:256:28: + // not yet implemented + // tpc_query!(21), + // thread 'main' panicked at core/translate/planner.rs:291:30: + // not yet implemented + // tpc_query!(22), + ]; + + for (idx, query) in queries.iter() { + let mut group = criterion.benchmark_group(format!("Query `{}` ", idx)); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(10); + + group.bench_with_input( + BenchmarkId::new("limbo_tpc_h_query", idx), + query, + |b, query| { + b.iter(|| { + let mut stmt = limbo_conn.prepare(query).unwrap(); + loop { + match stmt.step().unwrap() { + limbo_core::StepResult::Row => { + black_box(stmt.row()); + } + limbo_core::StepResult::IO => { + let _ = io.run_once(); + } + limbo_core::StepResult::Done => { + break; + } + limbo_core::StepResult::Interrupt | limbo_core::StepResult::Busy => { + unreachable!(); + } + } + } + stmt.reset(); + }); + }, + ); + + if enable_rusqlite { + let sqlite_conn = rusqlite_open_tpc_h(); + + group.bench_with_input( + BenchmarkId::new("sqlite_tpc_h_query", idx), + query, + |b, query| { + let mut stmt = sqlite_conn.prepare(query).unwrap(); + b.iter(|| { + let mut rows = stmt.raw_query(); + while let Some(row) = rows.next().unwrap() { + black_box(row); + } + }); + }, + ); + } + + group.finish(); + } +} + +criterion_group! { + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_tpc_h_queries +} +criterion_main!(benches);