From 51b097fa3d98d3142fc38e35e9be2c3473e6764e Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Thu, 15 May 2025 17:09:49 +0300 Subject: [PATCH] perf/ci: add basic tpc-h benchmark --- .github/workflows/rust_perf.yml | 6 ++ perf/tpc-h/benchmark.sh | 39 +++++++++++++ perf/tpc-h/queries/1.sql | 21 +++++++ perf/tpc-h/queries/10.sql | 32 +++++++++++ perf/tpc-h/queries/11.sql | 30 ++++++++++ perf/tpc-h/queries/12.sql | 28 ++++++++++ perf/tpc-h/queries/13.sql | 20 +++++++ perf/tpc-h/queries/14.sql | 13 +++++ perf/tpc-h/queries/15.sql | 36 ++++++++++++ perf/tpc-h/queries/16.sql | 33 +++++++++++ perf/tpc-h/queries/17.sql | 20 +++++++ perf/tpc-h/queries/18.sql | 36 ++++++++++++ perf/tpc-h/queries/19.sql | 38 +++++++++++++ perf/tpc-h/queries/2.sql | 47 ++++++++++++++++ perf/tpc-h/queries/20.sql | 40 ++++++++++++++ perf/tpc-h/queries/21.sql | 43 +++++++++++++++ perf/tpc-h/queries/22.sql | 40 ++++++++++++++ perf/tpc-h/queries/3.sql | 23 ++++++++ perf/tpc-h/queries/4.sql | 24 ++++++++ perf/tpc-h/queries/5.sql | 24 ++++++++ perf/tpc-h/queries/6.sql | 9 +++ perf/tpc-h/queries/7.sql | 40 ++++++++++++++ perf/tpc-h/queries/8.sql | 38 +++++++++++++ perf/tpc-h/queries/9.sql | 32 +++++++++++ perf/tpc-h/run.sh | 98 +++++++++++++++++++++++++++++++++ 25 files changed, 810 insertions(+) create mode 100755 perf/tpc-h/benchmark.sh create mode 100644 perf/tpc-h/queries/1.sql create mode 100644 perf/tpc-h/queries/10.sql create mode 100644 perf/tpc-h/queries/11.sql create mode 100644 perf/tpc-h/queries/12.sql create mode 100644 perf/tpc-h/queries/13.sql create mode 100644 perf/tpc-h/queries/14.sql create mode 100644 perf/tpc-h/queries/15.sql create mode 100644 perf/tpc-h/queries/16.sql create mode 100644 perf/tpc-h/queries/17.sql create mode 100644 perf/tpc-h/queries/18.sql create mode 100644 perf/tpc-h/queries/19.sql create mode 100644 perf/tpc-h/queries/2.sql create mode 100644 perf/tpc-h/queries/20.sql create mode 100644 perf/tpc-h/queries/21.sql create mode 100644 perf/tpc-h/queries/22.sql create mode 100644 perf/tpc-h/queries/3.sql create mode 100644 perf/tpc-h/queries/4.sql create mode 100644 perf/tpc-h/queries/5.sql create mode 100644 perf/tpc-h/queries/6.sql create mode 100644 perf/tpc-h/queries/7.sql create mode 100644 perf/tpc-h/queries/8.sql create mode 100644 perf/tpc-h/queries/9.sql create mode 100755 perf/tpc-h/run.sh diff --git a/.github/workflows/rust_perf.yml b/.github/workflows/rust_perf.yml index 1d6cd6f25..5b052c2fb 100644 --- a/.github/workflows/rust_perf.yml +++ b/.github/workflows/rust_perf.yml @@ -126,3 +126,9 @@ jobs: external-data-json-path: null gh-repository: null + tpc-h: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: TPC-H + run: ./perf/tpc-h/benchmark.sh \ No newline at end of file diff --git a/perf/tpc-h/benchmark.sh b/perf/tpc-h/benchmark.sh new file mode 100755 index 000000000..c39ec1759 --- /dev/null +++ b/perf/tpc-h/benchmark.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# TPC-H benchmark script + +REPO_ROOT=$(git rev-parse --show-toplevel) +RELEASE_BUILD_DIR="$REPO_ROOT/target/release" +TPCH_DIR="$REPO_ROOT/perf/tpc-h" +DB_FILE="$TPCH_DIR/TPC-H.db" + +# If sqlite3 doesn't exist, bail +if ! command -v sqlite3 >/dev/null 2>&1; then + echo "Error: sqlite3 is not installed" + exit 1 +fi + +# Build Limbo in release mode if it's not already built +if [ ! -f "$RELEASE_BUILD_DIR/limbo" ]; then + echo "Building Limbo..." + cargo build --bin limbo --release +fi + +# Download the TPC-H database if it doesn't exist +DB_URL="https://github.com/lovasoa/TPCH-sqlite/releases/download/v1.0/TPC-H.db" +if [ ! -f "$DB_FILE" ]; then + echo "Downloading TPC-H database..." + if command -v wget >/dev/null 2>&1; then + wget -O "$DB_FILE" --no-verbose "$DB_URL" + elif command -v curl >/dev/null 2>&1; then + curl -sL -o "$DB_FILE" "$DB_URL" + else + echo "Error: Neither wget nor curl is available" + exit 1 + fi +else + echo "Using existing TPC-H.db file" +fi + +# Run the benchmark +echo "Running TPC-H benchmark..." +"$TPCH_DIR/run.sh" \ No newline at end of file diff --git a/perf/tpc-h/queries/1.sql b/perf/tpc-h/queries/1.sql new file mode 100644 index 000000000..9a7fbc6c4 --- /dev/null +++ b/perf/tpc-h/queries/1.sql @@ -0,0 +1,21 @@ +select + l_returnflag, + l_linestatus, + sum(l_quantity) as sum_qty, + sum(l_extendedprice) as sum_base_price, + sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + avg(l_quantity) as avg_qty, + avg(l_extendedprice) as avg_price, + avg(l_discount) as avg_disc, + count(*) as count_order +from + lineitem +where + l_shipdate <= cast('1998-12-01' as datetime) -- modified not to include cast({'day': 71} as interval) +group by + l_returnflag, + l_linestatus +order by + l_returnflag, + l_linestatus; diff --git a/perf/tpc-h/queries/10.sql b/perf/tpc-h/queries/10.sql new file mode 100644 index 000000000..a9e16acee --- /dev/null +++ b/perf/tpc-h/queries/10.sql @@ -0,0 +1,32 @@ +select + c_custkey, + c_name, + sum(l_extendedprice * (1 - l_discount)) as revenue, + c_acctbal, + n_name, + c_address, + c_phone, + c_comment +from + customer, + orders, + lineitem, + nation +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate >= cast('1994-01-01' as datetime) + and o_orderdate < cast('1994-04-01' as datetime) -- modified not to include cast({'month': 3} as interval) + and l_returnflag = 'R' + and c_nationkey = n_nationkey +group by + c_custkey, + c_name, + c_acctbal, + c_phone, + n_name, + c_address, + c_comment +order by + revenue desc +limit 20; diff --git a/perf/tpc-h/queries/11.sql b/perf/tpc-h/queries/11.sql new file mode 100644 index 000000000..f64407357 --- /dev/null +++ b/perf/tpc-h/queries/11.sql @@ -0,0 +1,30 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + ps_partkey, + sum(ps_supplycost * ps_availqty) as value +from + partsupp, + supplier, + nation +where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ARGENTINA' +group by + ps_partkey having + sum(ps_supplycost * ps_availqty) > ( + select + sum(ps_supplycost * ps_availqty) * 0.0001000000 + from + partsupp, + supplier, + nation + where + ps_suppkey = s_suppkey + and s_nationkey = n_nationkey + and n_name = 'ARGENTINA' + ) +order by + value desc; diff --git a/perf/tpc-h/queries/12.sql b/perf/tpc-h/queries/12.sql new file mode 100644 index 000000000..038af1d66 --- /dev/null +++ b/perf/tpc-h/queries/12.sql @@ -0,0 +1,28 @@ +select + l_shipmode, + sum(case + when o_orderpriority = '1-URGENT' + or o_orderpriority = '2-HIGH' + then 1 + else 0 + end) as high_line_count, + sum(case + when o_orderpriority <> '1-URGENT' + and o_orderpriority <> '2-HIGH' + then 1 + else 0 + end) as low_line_count +from + orders, + lineitem +where + o_orderkey = l_orderkey + and l_shipmode in ('FOB', 'SHIP') + and l_commitdate < l_receiptdate + and l_shipdate < l_commitdate + and l_receiptdate >= cast('1994-01-01' as datetime) + and l_receiptdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) +group by + l_shipmode +order by + l_shipmode; diff --git a/perf/tpc-h/queries/13.sql b/perf/tpc-h/queries/13.sql new file mode 100644 index 000000000..fc6a41bda --- /dev/null +++ b/perf/tpc-h/queries/13.sql @@ -0,0 +1,20 @@ +select + c_count, + count(*) as custdist +from + ( + select + c_custkey, + count(o_orderkey) as c_count + from + customer left outer join orders on + c_custkey = o_custkey + and o_comment not like '%express%packages%' + group by + c_custkey + ) as c_orders +group by + c_count +order by + custdist desc, + c_count desc; diff --git a/perf/tpc-h/queries/14.sql b/perf/tpc-h/queries/14.sql new file mode 100644 index 000000000..439042916 --- /dev/null +++ b/perf/tpc-h/queries/14.sql @@ -0,0 +1,13 @@ +select + 100.00 * sum(cast(case + when p_type like 'PROMO%' + then l_extendedprice * (1 - l_discount) + else 0 + end as number)) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue +from + lineitem, + part +where + l_partkey = p_partkey + and l_shipdate >= cast('1994-03-01' as datetime) + and l_shipdate < cast('1994-04-01' as datetime); -- modified not to include cast({'month': 1} as interval) diff --git a/perf/tpc-h/queries/15.sql b/perf/tpc-h/queries/15.sql new file mode 100644 index 000000000..c4d750c97 --- /dev/null +++ b/perf/tpc-h/queries/15.sql @@ -0,0 +1,36 @@ +-- LIMBO_SKIP: views not supported + +create view revenue0 (supplier_no, total_revenue) as + select + l_suppkey, + sum(l_extendedprice * (1 - l_discount)) + from + lineitem + where + l_shipdate >= cast('1993-01-01' as datetime) + and l_shipdate < cast('1993-04-01' as datetime) -- modified not to include cast({'month': 3} as interval) + group by + l_suppkey; + + +select + s_suppkey, + s_name, + s_address, + s_phone, + total_revenue +from + supplier, + revenue0 +where + s_suppkey = supplier_no + and total_revenue = ( + select + max(total_revenue) + from + revenue0 + ) +order by + s_suppkey; + +drop view revenue0; diff --git a/perf/tpc-h/queries/16.sql b/perf/tpc-h/queries/16.sql new file mode 100644 index 000000000..10095d24d --- /dev/null +++ b/perf/tpc-h/queries/16.sql @@ -0,0 +1,33 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + p_brand, + p_type, + p_size, + count(distinct ps_suppkey) as supplier_cnt +from + partsupp, + part +where + p_partkey = ps_partkey + and p_brand <> 'Brand#45' + and p_type not like 'SMALL PLATED%' + and p_size in (19, 17, 16, 23, 10, 4, 38, 11) + and ps_suppkey not in ( + select + s_suppkey + from + supplier + where + s_comment like '%Customer%Complaints%' + ) +group by + p_brand, + p_type, + p_size +order by + supplier_cnt desc, + p_brand, + p_type, + p_size; diff --git a/perf/tpc-h/queries/17.sql b/perf/tpc-h/queries/17.sql new file mode 100644 index 000000000..2a9a24c5a --- /dev/null +++ b/perf/tpc-h/queries/17.sql @@ -0,0 +1,20 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + sum(l_extendedprice) / 7.0 as avg_yearly +from + lineitem, + part +where + p_partkey = l_partkey + and p_brand = 'Brand#52' + and p_container = 'LG CAN' + and l_quantity < ( + select + 0.2 * avg(l_quantity) + from + lineitem + where + l_partkey = p_partkey + ); diff --git a/perf/tpc-h/queries/18.sql b/perf/tpc-h/queries/18.sql new file mode 100644 index 000000000..158ac6b9b --- /dev/null +++ b/perf/tpc-h/queries/18.sql @@ -0,0 +1,36 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice, + sum(l_quantity) +from + customer, + orders, + lineitem +where + o_orderkey in ( + select + l_orderkey + from + lineitem + group by + l_orderkey having + sum(l_quantity) > 313 + ) + and c_custkey = o_custkey + and o_orderkey = l_orderkey +group by + c_name, + c_custkey, + o_orderkey, + o_orderdate, + o_totalprice +order by + o_totalprice desc, + o_orderdate +limit 100; diff --git a/perf/tpc-h/queries/19.sql b/perf/tpc-h/queries/19.sql new file mode 100644 index 000000000..dfd0b09e0 --- /dev/null +++ b/perf/tpc-h/queries/19.sql @@ -0,0 +1,38 @@ +-- LIMBO_SKIP: bad-query-plan-takes-forever + + +select + sum(l_extendedprice* (1 - l_discount)) as revenue +from + lineitem, + part +where + ( + p_partkey = l_partkey + and p_brand = 'Brand#22' + and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + and l_quantity >= 8 and l_quantity <= 8 + 10 + and p_size between 1 and 5 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#23' + and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + and l_quantity >= 10 and l_quantity <= 10 + 10 + and p_size between 1 and 10 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ) + or + ( + p_partkey = l_partkey + and p_brand = 'Brand#12' + and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + and l_quantity >= 24 and l_quantity <= 24 + 10 + and p_size between 1 and 15 + and l_shipmode in ('AIR', 'AIR REG') + and l_shipinstruct = 'DELIVER IN PERSON' + ); diff --git a/perf/tpc-h/queries/2.sql b/perf/tpc-h/queries/2.sql new file mode 100644 index 000000000..2eb53b209 --- /dev/null +++ b/perf/tpc-h/queries/2.sql @@ -0,0 +1,47 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + s_acctbal, + s_name, + n_name, + p_partkey, + p_mfgr, + s_address, + s_phone, + s_comment +from + part, + supplier, + partsupp, + nation, + region +where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and p_size = 38 + and p_type like '%TIN' + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'MIDDLE EAST' + and ps_supplycost = ( + select + min(ps_supplycost) + from + partsupp, + supplier, + nation, + region + where + p_partkey = ps_partkey + and s_suppkey = ps_suppkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'MIDDLE EAST' + ) +order by + s_acctbal desc, + n_name, + s_name, + p_partkey +limit 100; diff --git a/perf/tpc-h/queries/20.sql b/perf/tpc-h/queries/20.sql new file mode 100644 index 000000000..cfcf520a6 --- /dev/null +++ b/perf/tpc-h/queries/20.sql @@ -0,0 +1,40 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + s_name, + s_address +from + supplier, + nation +where + s_suppkey in ( + select + ps_suppkey + from + partsupp + where + ps_partkey in ( + select + p_partkey + from + part + where + p_name like 'frosted%' + ) + and ps_availqty > ( + select + 0.5 * sum(l_quantity) + from + lineitem + where + l_partkey = ps_partkey + and l_suppkey = ps_suppkey + and l_shipdate >= cast('1994-01-01' as datetime) + and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + ) + ) + and s_nationkey = n_nationkey + and n_name = 'IRAN' +order by + s_name; diff --git a/perf/tpc-h/queries/21.sql b/perf/tpc-h/queries/21.sql new file mode 100644 index 000000000..39e94a308 --- /dev/null +++ b/perf/tpc-h/queries/21.sql @@ -0,0 +1,43 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + s_name, + count(*) as numwait +from + supplier, + lineitem l1, + orders, + nation +where + s_suppkey = l1.l_suppkey + and o_orderkey = l1.l_orderkey + and o_orderstatus = 'F' + and l1.l_receiptdate > l1.l_commitdate + and exists ( + select + * + from + lineitem l2 + where + l2.l_orderkey = l1.l_orderkey + and l2.l_suppkey <> l1.l_suppkey + ) + and not exists ( + select + * + from + lineitem l3 + where + l3.l_orderkey = l1.l_orderkey + and l3.l_suppkey <> l1.l_suppkey + and l3.l_receiptdate > l3.l_commitdate + ) + and s_nationkey = n_nationkey + and n_name = 'GERMANY' +group by + s_name +order by + numwait desc, + s_name +limit 100; diff --git a/perf/tpc-h/queries/22.sql b/perf/tpc-h/queries/22.sql new file mode 100644 index 000000000..dc40c8b77 --- /dev/null +++ b/perf/tpc-h/queries/22.sql @@ -0,0 +1,40 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + cntrycode, + count(*) as numcust, + sum(c_acctbal) as totacctbal +from + ( + select + substr(c_phone, 1, 2) as cntrycode, + c_acctbal + from + customer + where + substr(c_phone, 1, 2) in + ('20', '14', '21', '28', '15', '24', '27') + and c_acctbal > ( + select + avg(c_acctbal) + from + customer + where + c_acctbal > 0.00 + and substr(c_phone, 1, 2) in + ('20', '14', '21', '28', '15', '24', '27') + ) + and not exists ( + select + * + from + orders + where + o_custkey = c_custkey + ) + ) as custsale +group by + cntrycode +order by + cntrycode; diff --git a/perf/tpc-h/queries/3.sql b/perf/tpc-h/queries/3.sql new file mode 100644 index 000000000..cbea08906 --- /dev/null +++ b/perf/tpc-h/queries/3.sql @@ -0,0 +1,23 @@ +select + l_orderkey, + sum(l_extendedprice * (1 - l_discount)) as revenue, + o_orderdate, + o_shippriority +from + customer, + orders, + lineitem +where + c_mktsegment = 'FURNITURE' + and c_custkey = o_custkey + and l_orderkey = o_orderkey + and o_orderdate < cast('1995-03-29' as datetime) + and l_shipdate > cast('1995-03-29' as datetime) +group by + l_orderkey, + o_orderdate, + o_shippriority +order by + revenue desc, + o_orderdate +limit 10; diff --git a/perf/tpc-h/queries/4.sql b/perf/tpc-h/queries/4.sql new file mode 100644 index 000000000..649d55eef --- /dev/null +++ b/perf/tpc-h/queries/4.sql @@ -0,0 +1,24 @@ +-- LIMBO_SKIP: subquery in where not supported + + +select + o_orderpriority, + count(*) as order_count +from + orders +where + o_orderdate >= cast('1997-06-01' as datetime) + and o_orderdate < cast('1997-09-01' as datetime) -- modified not to include cast({'month': 3} as interval) + and exists ( + select + * + from + lineitem + where + l_orderkey = o_orderkey + and l_commitdate < l_receiptdate + ) +group by + o_orderpriority +order by + o_orderpriority; diff --git a/perf/tpc-h/queries/5.sql b/perf/tpc-h/queries/5.sql new file mode 100644 index 000000000..01bcc19a5 --- /dev/null +++ b/perf/tpc-h/queries/5.sql @@ -0,0 +1,24 @@ +select + n_name, + sum(l_extendedprice * (1 - l_discount)) as revenue +from + customer, + orders, + lineitem, + supplier, + nation, + region +where + c_custkey = o_custkey + and l_orderkey = o_orderkey + and l_suppkey = s_suppkey + and c_nationkey = s_nationkey + and s_nationkey = n_nationkey + and n_regionkey = r_regionkey + and r_name = 'MIDDLE EAST' + and o_orderdate >= cast('1994-01-01' as datetime) + and o_orderdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) +group by + n_name +order by + revenue desc; diff --git a/perf/tpc-h/queries/6.sql b/perf/tpc-h/queries/6.sql new file mode 100644 index 000000000..9114d1f40 --- /dev/null +++ b/perf/tpc-h/queries/6.sql @@ -0,0 +1,9 @@ +select + sum(l_extendedprice * l_discount) as revenue +from + lineitem +where + l_shipdate >= cast('1994-01-01' as datetime) + and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval) + and l_discount between 0.08 - 0.01 and 0.08 + 0.01 + and l_quantity < 24; diff --git a/perf/tpc-h/queries/7.sql b/perf/tpc-h/queries/7.sql new file mode 100644 index 000000000..509548d03 --- /dev/null +++ b/perf/tpc-h/queries/7.sql @@ -0,0 +1,40 @@ +select + supp_nation, + cust_nation, + l_year, + sum(volume) as revenue +from + ( + select + n1.n_name as supp_nation, + n2.n_name as cust_nation, + substr(l_shipdate, 1, 4) as l_year, -- modified not to include date_part('year', l_shipdate) + l_extendedprice * (1 - l_discount) as volume + from + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2 + where + s_suppkey = l_suppkey + and o_orderkey = l_orderkey + and c_custkey = o_custkey + and s_nationkey = n1.n_nationkey + and c_nationkey = n2.n_nationkey + and ( + (n1.n_name = 'ROMANIA' and n2.n_name = 'INDIA') + or (n1.n_name = 'INDIA' and n2.n_name = 'ROMANIA') + ) + and l_shipdate between + cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime) + ) as shipping +group by + supp_nation, + cust_nation, + l_year +order by + supp_nation, + cust_nation, + l_year; diff --git a/perf/tpc-h/queries/8.sql b/perf/tpc-h/queries/8.sql new file mode 100644 index 000000000..ba7fa2f73 --- /dev/null +++ b/perf/tpc-h/queries/8.sql @@ -0,0 +1,38 @@ +select + o_year, + sum(cast(case + when nation = 'INDIA' then volume + else 0 + end as number)) / sum(volume) as mkt_share +from + ( + select + substr(o_orderdate, 1, 4) as o_year, -- modified not to include date_part('year', o_orderdate) + l_extendedprice * (1 - l_discount) as volume, + n2.n_name as nation + from + part, + supplier, + lineitem, + orders, + customer, + nation n1, + nation n2, + region + where + p_partkey = l_partkey + and s_suppkey = l_suppkey + and l_orderkey = o_orderkey + and o_custkey = c_custkey + and c_nationkey = n1.n_nationkey + and n1.n_regionkey = r_regionkey + and r_name = 'ASIA' + and s_nationkey = n2.n_nationkey + and o_orderdate between + cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime) + and p_type = 'PROMO BRUSHED COPPER' + ) as all_nations +group by + o_year +order by + o_year; diff --git a/perf/tpc-h/queries/9.sql b/perf/tpc-h/queries/9.sql new file mode 100644 index 000000000..40ccddd08 --- /dev/null +++ b/perf/tpc-h/queries/9.sql @@ -0,0 +1,32 @@ +select + nation, + o_year, + sum(amount) as sum_profit +from + ( + select + n_name as nation, + substr(o_orderdate, 1, 4) as o_year, -- modified not to include date_part('year', o_orderdate) + l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount + from + part, + supplier, + lineitem, + partsupp, + orders, + nation + where + s_suppkey = l_suppkey + and ps_suppkey = l_suppkey + and ps_partkey = l_partkey + and p_partkey = l_partkey + and o_orderkey = l_orderkey + and s_nationkey = n_nationkey + and p_name like '%yellow%' + ) as profit +group by + nation, + o_year +order by + nation, + o_year desc; diff --git a/perf/tpc-h/run.sh b/perf/tpc-h/run.sh new file mode 100755 index 000000000..cafe0d523 --- /dev/null +++ b/perf/tpc-h/run.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# This script will run the TPC-H queries and compare timings. + +REPO_ROOT=$(git rev-parse --show-toplevel) +RELEASE_BUILD_DIR="$REPO_ROOT/target/release" +TPCH_DIR="$REPO_ROOT/perf/tpc-h" +DB_FILE="$TPCH_DIR/TPC-H.db" +QUERIES_DIR="$TPCH_DIR/queries" +LIMBO_BIN="$RELEASE_BUILD_DIR/limbo" +SQLITE_BIN="sqlite3" # Assuming sqlite3 is in PATH + +# Function to clear system caches based on OS +clear_caches() { + if [[ "$OSTYPE" == "darwin"* ]]; then + # macOS + sync + sudo purge + elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then + # Linux + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null + else + echo "Warning: Cache clearing not supported on this OS ($OSTYPE)." >&2 + fi +} + +# Ensure the Limbo binary exists +if [ ! -f "$LIMBO_BIN" ]; then + echo "Error: Limbo binary not found at $LIMBO_BIN" + echo "Please build Limbo first (e.g., by running benchmark.sh or 'cargo build --bin limbo --release')" + exit 1 +fi + +# Ensure the SQLite binary exists +if ! command -v $SQLITE_BIN >/dev/null 2>&1; then + echo "Error: sqlite3 command not found. Please install sqlite3." + exit 1 +fi + +# Ensure the database file exists +if [ ! -f "$DB_FILE" ]; then + echo "Error: TPC-H database not found at $DB_FILE" + echo "Please ensure the database is downloaded (e.g., by running benchmark.sh)" + exit 1 +fi + +echo "Starting TPC-H query timing comparison..." + +# Initial cache clear +echo "The script might ask you to enter the password for sudo, in order to clear system caches." +clear_caches + +for query_file in $(ls "$QUERIES_DIR"/*.sql | sort -V); do + if [ -f "$query_file" ]; then + query_name=$(basename "$query_file") + + # If the query file starts with "-- LIMBO_SKIP: ...", skip it and print the reason + if head -n1 "$query_file" | grep -q "^-- LIMBO_SKIP: "; then + skip_reason=$(head -n1 "$query_file" | sed 's/^-- LIMBO_SKIP: //') + echo "Skipping $query_name, reason: $skip_reason" + echo "-----------------------------------------------------------" + continue + fi + + echo "Running $query_name with Limbo..." >&2 + # Clear caches before Limbo run + clear_caches + # Run Limbo + limbo_output=$(/usr/bin/time "$LIMBO_BIN" "$DB_FILE" --quiet --output-mode list "$(cat "$query_file")" 2>&1) + limbo_non_time_lines=$(echo "$limbo_output" | grep -v -e "real" -e "user" -e "sys") + limbo_real_time=$(echo "$limbo_output" | grep "real" | awk '{print $1}') + echo "Running $query_name with SQLite3..." >&2 + # Clear caches before SQLite execution + clear_caches + sqlite_output=$(/usr/bin/time $SQLITE_BIN "$DB_FILE" "$(cat "$query_file")" 2>&1) + sqlite_non_time_lines=$(echo "$sqlite_output" | grep -v -e "real" -e "user" -e "sys") + sqlite_real_time=$(echo "$sqlite_output" | grep "real" | awk '{print $1}') + echo "Limbo real time: $limbo_real_time" + echo "SQLite3 real time: $sqlite_real_time" + echo "Limbo output:" + echo "$limbo_non_time_lines" + echo "SQLite3 output:" + echo "$sqlite_non_time_lines" + output_diff=$(diff <(echo "$limbo_non_time_lines") <(echo "$sqlite_non_time_lines")) + if [ -n "$output_diff" ]; then + echo "Output difference:" + echo "$output_diff" + else + echo "No output difference" + fi + else + echo "Warning: Skipping non-file item $query_file" + fi + echo "-----------------------------------------------------------" +done + +echo "-----------------------------------------------------------" +echo "TPC-H query timing comparison completed." \ No newline at end of file