perf/ci: add basic tpc-h benchmark

This commit is contained in:
Jussi Saurio
2025-05-15 17:09:49 +03:00
parent 4553adf317
commit 51b097fa3d
25 changed files with 810 additions and 0 deletions

View File

@@ -126,3 +126,9 @@ jobs:
external-data-json-path: null
gh-repository: null
tpc-h:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: TPC-H
run: ./perf/tpc-h/benchmark.sh

39
perf/tpc-h/benchmark.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/bin/bash
# TPC-H benchmark script
REPO_ROOT=$(git rev-parse --show-toplevel)
RELEASE_BUILD_DIR="$REPO_ROOT/target/release"
TPCH_DIR="$REPO_ROOT/perf/tpc-h"
DB_FILE="$TPCH_DIR/TPC-H.db"
# If sqlite3 doesn't exist, bail
if ! command -v sqlite3 >/dev/null 2>&1; then
echo "Error: sqlite3 is not installed"
exit 1
fi
# Build Limbo in release mode if it's not already built
if [ ! -f "$RELEASE_BUILD_DIR/limbo" ]; then
echo "Building Limbo..."
cargo build --bin limbo --release
fi
# Download the TPC-H database if it doesn't exist
DB_URL="https://github.com/lovasoa/TPCH-sqlite/releases/download/v1.0/TPC-H.db"
if [ ! -f "$DB_FILE" ]; then
echo "Downloading TPC-H database..."
if command -v wget >/dev/null 2>&1; then
wget -O "$DB_FILE" --no-verbose "$DB_URL"
elif command -v curl >/dev/null 2>&1; then
curl -sL -o "$DB_FILE" "$DB_URL"
else
echo "Error: Neither wget nor curl is available"
exit 1
fi
else
echo "Using existing TPC-H.db file"
fi
# Run the benchmark
echo "Running TPC-H benchmark..."
"$TPCH_DIR/run.sh"

21
perf/tpc-h/queries/1.sql Normal file
View File

@@ -0,0 +1,21 @@
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= cast('1998-12-01' as datetime) -- modified not to include cast({'day': 71} as interval)
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;

32
perf/tpc-h/queries/10.sql Normal file
View File

@@ -0,0 +1,32 @@
select
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
from
customer,
orders,
lineitem,
nation
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate >= cast('1994-01-01' as datetime)
and o_orderdate < cast('1994-04-01' as datetime) -- modified not to include cast({'month': 3} as interval)
and l_returnflag = 'R'
and c_nationkey = n_nationkey
group by
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
order by
revenue desc
limit 20;

30
perf/tpc-h/queries/11.sql Normal file
View File

@@ -0,0 +1,30 @@
-- LIMBO_SKIP: subquery in where not supported
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'ARGENTINA'
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
sum(ps_supplycost * ps_availqty) * 0.0001000000
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'ARGENTINA'
)
order by
value desc;

28
perf/tpc-h/queries/12.sql Normal file
View File

@@ -0,0 +1,28 @@
select
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
or o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
and o_orderpriority <> '2-HIGH'
then 1
else 0
end) as low_line_count
from
orders,
lineitem
where
o_orderkey = l_orderkey
and l_shipmode in ('FOB', 'SHIP')
and l_commitdate < l_receiptdate
and l_shipdate < l_commitdate
and l_receiptdate >= cast('1994-01-01' as datetime)
and l_receiptdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval)
group by
l_shipmode
order by
l_shipmode;

20
perf/tpc-h/queries/13.sql Normal file
View File

@@ -0,0 +1,20 @@
select
c_count,
count(*) as custdist
from
(
select
c_custkey,
count(o_orderkey) as c_count
from
customer left outer join orders on
c_custkey = o_custkey
and o_comment not like '%express%packages%'
group by
c_custkey
) as c_orders
group by
c_count
order by
custdist desc,
c_count desc;

13
perf/tpc-h/queries/14.sql Normal file
View File

@@ -0,0 +1,13 @@
select
100.00 * sum(cast(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end as number)) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
lineitem,
part
where
l_partkey = p_partkey
and l_shipdate >= cast('1994-03-01' as datetime)
and l_shipdate < cast('1994-04-01' as datetime); -- modified not to include cast({'month': 1} as interval)

36
perf/tpc-h/queries/15.sql Normal file
View File

@@ -0,0 +1,36 @@
-- LIMBO_SKIP: views not supported
create view revenue0 (supplier_no, total_revenue) as
select
l_suppkey,
sum(l_extendedprice * (1 - l_discount))
from
lineitem
where
l_shipdate >= cast('1993-01-01' as datetime)
and l_shipdate < cast('1993-04-01' as datetime) -- modified not to include cast({'month': 3} as interval)
group by
l_suppkey;
select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from
supplier,
revenue0
where
s_suppkey = supplier_no
and total_revenue = (
select
max(total_revenue)
from
revenue0
)
order by
s_suppkey;
drop view revenue0;

33
perf/tpc-h/queries/16.sql Normal file
View File

@@ -0,0 +1,33 @@
-- LIMBO_SKIP: subquery in where not supported
select
p_brand,
p_type,
p_size,
count(distinct ps_suppkey) as supplier_cnt
from
partsupp,
part
where
p_partkey = ps_partkey
and p_brand <> 'Brand#45'
and p_type not like 'SMALL PLATED%'
and p_size in (19, 17, 16, 23, 10, 4, 38, 11)
and ps_suppkey not in (
select
s_suppkey
from
supplier
where
s_comment like '%Customer%Complaints%'
)
group by
p_brand,
p_type,
p_size
order by
supplier_cnt desc,
p_brand,
p_type,
p_size;

20
perf/tpc-h/queries/17.sql Normal file
View File

@@ -0,0 +1,20 @@
-- LIMBO_SKIP: subquery in where not supported
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
lineitem,
part
where
p_partkey = l_partkey
and p_brand = 'Brand#52'
and p_container = 'LG CAN'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
lineitem
where
l_partkey = p_partkey
);

36
perf/tpc-h/queries/18.sql Normal file
View File

@@ -0,0 +1,36 @@
-- LIMBO_SKIP: subquery in where not supported
select
c_name,
c_custkey,
o_orderkey,
o_orderdate,
o_totalprice,
sum(l_quantity)
from
customer,
orders,
lineitem
where
o_orderkey in (
select
l_orderkey
from
lineitem
group by
l_orderkey having
sum(l_quantity) > 313
)
and c_custkey = o_custkey
and o_orderkey = l_orderkey
group by
c_name,
c_custkey,
o_orderkey,
o_orderdate,
o_totalprice
order by
o_totalprice desc,
o_orderdate
limit 100;

38
perf/tpc-h/queries/19.sql Normal file
View File

@@ -0,0 +1,38 @@
-- LIMBO_SKIP: bad-query-plan-takes-forever
select
sum(l_extendedprice* (1 - l_discount)) as revenue
from
lineitem,
part
where
(
p_partkey = l_partkey
and p_brand = 'Brand#22'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 8 and l_quantity <= 8 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 24 and l_quantity <= 24 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
);

47
perf/tpc-h/queries/2.sql Normal file
View File

@@ -0,0 +1,47 @@
-- LIMBO_SKIP: subquery in where not supported
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 38
and p_type like '%TIN'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'MIDDLE EAST'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'MIDDLE EAST'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100;

40
perf/tpc-h/queries/20.sql Normal file
View File

@@ -0,0 +1,40 @@
-- LIMBO_SKIP: subquery in where not supported
select
s_name,
s_address
from
supplier,
nation
where
s_suppkey in (
select
ps_suppkey
from
partsupp
where
ps_partkey in (
select
p_partkey
from
part
where
p_name like 'frosted%'
)
and ps_availqty > (
select
0.5 * sum(l_quantity)
from
lineitem
where
l_partkey = ps_partkey
and l_suppkey = ps_suppkey
and l_shipdate >= cast('1994-01-01' as datetime)
and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval)
)
)
and s_nationkey = n_nationkey
and n_name = 'IRAN'
order by
s_name;

43
perf/tpc-h/queries/21.sql Normal file
View File

@@ -0,0 +1,43 @@
-- LIMBO_SKIP: subquery in where not supported
select
s_name,
count(*) as numwait
from
supplier,
lineitem l1,
orders,
nation
where
s_suppkey = l1.l_suppkey
and o_orderkey = l1.l_orderkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
lineitem l2
where
l2.l_orderkey = l1.l_orderkey
and l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
lineitem l3
where
l3.l_orderkey = l1.l_orderkey
and l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by
s_name
order by
numwait desc,
s_name
limit 100;

40
perf/tpc-h/queries/22.sql Normal file
View File

@@ -0,0 +1,40 @@
-- LIMBO_SKIP: subquery in where not supported
select
cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
from
(
select
substr(c_phone, 1, 2) as cntrycode,
c_acctbal
from
customer
where
substr(c_phone, 1, 2) in
('20', '14', '21', '28', '15', '24', '27')
and c_acctbal > (
select
avg(c_acctbal)
from
customer
where
c_acctbal > 0.00
and substr(c_phone, 1, 2) in
('20', '14', '21', '28', '15', '24', '27')
)
and not exists (
select
*
from
orders
where
o_custkey = c_custkey
)
) as custsale
group by
cntrycode
order by
cntrycode;

23
perf/tpc-h/queries/3.sql Normal file
View File

@@ -0,0 +1,23 @@
select
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
customer,
orders,
lineitem
where
c_mktsegment = 'FURNITURE'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < cast('1995-03-29' as datetime)
and l_shipdate > cast('1995-03-29' as datetime)
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10;

24
perf/tpc-h/queries/4.sql Normal file
View File

@@ -0,0 +1,24 @@
-- LIMBO_SKIP: subquery in where not supported
select
o_orderpriority,
count(*) as order_count
from
orders
where
o_orderdate >= cast('1997-06-01' as datetime)
and o_orderdate < cast('1997-09-01' as datetime) -- modified not to include cast({'month': 3} as interval)
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority;

24
perf/tpc-h/queries/5.sql Normal file
View File

@@ -0,0 +1,24 @@
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer,
orders,
lineitem,
supplier,
nation,
region
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'MIDDLE EAST'
and o_orderdate >= cast('1994-01-01' as datetime)
and o_orderdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval)
group by
n_name
order by
revenue desc;

9
perf/tpc-h/queries/6.sql Normal file
View File

@@ -0,0 +1,9 @@
select
sum(l_extendedprice * l_discount) as revenue
from
lineitem
where
l_shipdate >= cast('1994-01-01' as datetime)
and l_shipdate < cast('1995-01-01' as datetime) -- modified not to include cast({'year': 1} as interval)
and l_discount between 0.08 - 0.01 and 0.08 + 0.01
and l_quantity < 24;

40
perf/tpc-h/queries/7.sql Normal file
View File

@@ -0,0 +1,40 @@
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from
(
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
substr(l_shipdate, 1, 4) as l_year, -- modified not to include date_part('year', l_shipdate)
l_extendedprice * (1 - l_discount) as volume
from
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'ROMANIA' and n2.n_name = 'INDIA')
or (n1.n_name = 'INDIA' and n2.n_name = 'ROMANIA')
)
and l_shipdate between
cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime)
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year;

38
perf/tpc-h/queries/8.sql Normal file
View File

@@ -0,0 +1,38 @@
select
o_year,
sum(cast(case
when nation = 'INDIA' then volume
else 0
end as number)) / sum(volume) as mkt_share
from
(
select
substr(o_orderdate, 1, 4) as o_year, -- modified not to include date_part('year', o_orderdate)
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
part,
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2,
region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'ASIA'
and s_nationkey = n2.n_nationkey
and o_orderdate between
cast('1995-01-01' as datetime) and cast('1996-12-31' as datetime)
and p_type = 'PROMO BRUSHED COPPER'
) as all_nations
group by
o_year
order by
o_year;

32
perf/tpc-h/queries/9.sql Normal file
View File

@@ -0,0 +1,32 @@
select
nation,
o_year,
sum(amount) as sum_profit
from
(
select
n_name as nation,
substr(o_orderdate, 1, 4) as o_year, -- modified not to include date_part('year', o_orderdate)
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from
part,
supplier,
lineitem,
partsupp,
orders,
nation
where
s_suppkey = l_suppkey
and ps_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
and p_name like '%yellow%'
) as profit
group by
nation,
o_year
order by
nation,
o_year desc;

98
perf/tpc-h/run.sh Executable file
View File

@@ -0,0 +1,98 @@
#!/bin/bash
# This script will run the TPC-H queries and compare timings.
REPO_ROOT=$(git rev-parse --show-toplevel)
RELEASE_BUILD_DIR="$REPO_ROOT/target/release"
TPCH_DIR="$REPO_ROOT/perf/tpc-h"
DB_FILE="$TPCH_DIR/TPC-H.db"
QUERIES_DIR="$TPCH_DIR/queries"
LIMBO_BIN="$RELEASE_BUILD_DIR/limbo"
SQLITE_BIN="sqlite3" # Assuming sqlite3 is in PATH
# Function to clear system caches based on OS
clear_caches() {
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS
sync
sudo purge
elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$OSTYPE" == "linux"* ]]; then
# Linux
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
else
echo "Warning: Cache clearing not supported on this OS ($OSTYPE)." >&2
fi
}
# Ensure the Limbo binary exists
if [ ! -f "$LIMBO_BIN" ]; then
echo "Error: Limbo binary not found at $LIMBO_BIN"
echo "Please build Limbo first (e.g., by running benchmark.sh or 'cargo build --bin limbo --release')"
exit 1
fi
# Ensure the SQLite binary exists
if ! command -v $SQLITE_BIN >/dev/null 2>&1; then
echo "Error: sqlite3 command not found. Please install sqlite3."
exit 1
fi
# Ensure the database file exists
if [ ! -f "$DB_FILE" ]; then
echo "Error: TPC-H database not found at $DB_FILE"
echo "Please ensure the database is downloaded (e.g., by running benchmark.sh)"
exit 1
fi
echo "Starting TPC-H query timing comparison..."
# Initial cache clear
echo "The script might ask you to enter the password for sudo, in order to clear system caches."
clear_caches
for query_file in $(ls "$QUERIES_DIR"/*.sql | sort -V); do
if [ -f "$query_file" ]; then
query_name=$(basename "$query_file")
# If the query file starts with "-- LIMBO_SKIP: ...", skip it and print the reason
if head -n1 "$query_file" | grep -q "^-- LIMBO_SKIP: "; then
skip_reason=$(head -n1 "$query_file" | sed 's/^-- LIMBO_SKIP: //')
echo "Skipping $query_name, reason: $skip_reason"
echo "-----------------------------------------------------------"
continue
fi
echo "Running $query_name with Limbo..." >&2
# Clear caches before Limbo run
clear_caches
# Run Limbo
limbo_output=$(/usr/bin/time "$LIMBO_BIN" "$DB_FILE" --quiet --output-mode list "$(cat "$query_file")" 2>&1)
limbo_non_time_lines=$(echo "$limbo_output" | grep -v -e "real" -e "user" -e "sys")
limbo_real_time=$(echo "$limbo_output" | grep "real" | awk '{print $1}')
echo "Running $query_name with SQLite3..." >&2
# Clear caches before SQLite execution
clear_caches
sqlite_output=$(/usr/bin/time $SQLITE_BIN "$DB_FILE" "$(cat "$query_file")" 2>&1)
sqlite_non_time_lines=$(echo "$sqlite_output" | grep -v -e "real" -e "user" -e "sys")
sqlite_real_time=$(echo "$sqlite_output" | grep "real" | awk '{print $1}')
echo "Limbo real time: $limbo_real_time"
echo "SQLite3 real time: $sqlite_real_time"
echo "Limbo output:"
echo "$limbo_non_time_lines"
echo "SQLite3 output:"
echo "$sqlite_non_time_lines"
output_diff=$(diff <(echo "$limbo_non_time_lines") <(echo "$sqlite_non_time_lines"))
if [ -n "$output_diff" ]; then
echo "Output difference:"
echo "$output_diff"
else
echo "No output difference"
fi
else
echo "Warning: Skipping non-file item $query_file"
fi
echo "-----------------------------------------------------------"
done
echo "-----------------------------------------------------------"
echo "TPC-H query timing comparison completed."