Merge 'Sqlite3 parser perf improvements' from Jussi Saurio

Manually vendored in some changes from [lemon-
rs](https://github.com/gwenn/lemon-rs), including a merged change from
@krishvishal and [an unmerged PR ](https://github.com/gwenn/lemon-
rs/pull/81) from user ignatz that boxes Limit. I also boxed `OneSelect`
because it also improved perf in the benchmarks. 40-50% more throughput
with these changes to our existing admittedly simple benchmarks. Added a
new more complex prepare benchmark that includes group by and having as
well, which is also 42% faster with the new code.
**Runs on my local machine:**
```
main:

limbo/Prepare statement: 'SELECT 1'
                        time:   [1.2902 µs 1.2927 µs 1.2958 µs]
                        thrpt:  [771.73 Kelem/s 773.56 Kelem/s 775.07 Kelem/s]
                 change:
                        time:   [+0.2770% +0.6013% +0.9243%] (p = 0.00 < 0.05)
                        thrpt:  [-0.9158% -0.5977% -0.2762%]
limbo/Prepare statement: 'SELECT * FROM users LIMIT 1'
                        time:   [2.4885 µs 2.4927 µs 2.4971 µs]
                        thrpt:  [400.47 Kelem/s 401.18 Kelem/s 401.84 Kelem/s]
                 change:
                        time:   [+1.2859% +1.6970% +2.0993%] (p = 0.00 < 0.05)
                        thrpt:  [-2.0561% -1.6687% -1.2696%]
limbo/Prepare statement: 'SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1)...
                        time:   [5.6867 µs 5.6994 µs 5.7164 µs]
                        thrpt:  [174.93 Kelem/s 175.46 Kelem/s 175.85 Kelem/s]
                 change:
                        time:   [+16.921% +17.332% +17.765%] (p = 0.00 < 0.05)
                        thrpt:  [-15.085% -14.772% -14.472%]

this branch:

limbo/Prepare statement: 'SELECT 1'
                        time:   [861.48 ns 862.60 ns 863.79 ns]
                        thrpt:  [1.1577 Melem/s 1.1593 Melem/s 1.1608 Melem/s]
                 change:
                        time:   [-33.293% -33.042% -32.754%] (p = 0.00 < 0.05)
                        thrpt:  [+48.709% +49.347% +49.909%]
                        Performance has improved.
limbo/Prepare statement: 'SELECT * FROM users LIMIT 1'
                        time:   [1.6080 µs 1.6106 µs 1.6140 µs]
                        thrpt:  [619.58 Kelem/s 620.87 Kelem/s 621.88 Kelem/s]
                 change:
                        time:   [-35.838% -35.611% -35.380%] (p = 0.00 < 0.05)
                        thrpt:  [+54.750% +55.305% +55.857%]
                        Performance has improved.
Benchmarking limbo/Prepare statement: 'SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1)...: Collecting 100 samples in estimated 5.0125 s (1.
limbo/Prepare statement: 'SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1)...
                        time:   [4.0161 µs 4.0301 µs 4.0473 µs]
                        thrpt:  [247.08 Kelem/s 248.13 Kelem/s 249.00 Kelem/s]
                 change:
                        time:   [-29.791% -29.596% -29.399%] (p = 0.00 < 0.05)
                        thrpt:  [+41.642% +42.038% +42.431%]
                        Performance has improved.
```
**Runs in CI:**
```
most recent commit on main:

limbo/Prepare statement: 'SELECT 1'
                        time:   [2.7085 µs 2.7113 µs 2.7153 µs]
                        thrpt:  [368.28 Kelem/s 368.83 Kelem/s 369.21 Kelem/s]
limbo/Prepare statement: 'SELECT * FROM users LIMIT 1'
                        time:   [4.8688 µs 4.8713 µs 4.8741 µs]
                        thrpt:  [205.17 Kelem/s 205.29 Kelem/s 205.39 Kelem/s]

this branch:

limbo/Prepare statement: 'SELECT 1'
                        time:   [1.9278 µs 1.9329 µs 1.9405 µs]
                        thrpt:  [515.33 Kelem/s 517.35 Kelem/s 518.73 Kelem/s]
limbo/Prepare statement: 'SELECT * FROM users LIMIT 1'
                        time:   [3.5708 µs 3.5 µs 3.5794 µs]
                        thrpt:  [279.38 Kelem/s 279.75 Kelem/s 280.05 Kelem/s]
```
**Discussion:**
Generally I think we should probably just, philosophically, hard fork
this vendored code and start making whatever modifications we want to
it... thoughts?
Also I guess there's a way to add a co-authored by XXX to these commits
so that they don't show up under my name only, because I didn't write
most of it.

Closes #620
This commit is contained in:
Pekka Enberg
2025-01-05 18:03:58 +02:00
11 changed files with 47 additions and 39 deletions

View File

@@ -34,6 +34,12 @@ fn limbo_bench(criterion: &mut Criterion) {
});
});
group.bench_function("Prepare statement: 'SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY count(1) LIMIT 1'", |b| {
b.iter(|| {
conn.prepare("SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY count(1) LIMIT 1").unwrap();
});
});
let mut stmt = conn.prepare("SELECT 1").unwrap();
group.bench_function("Execute prepared statement: 'SELECT 1'", |b| {
let io = io.clone();

View File

@@ -304,7 +304,7 @@ impl Connection {
Cmd::ExplainQueryPlan(stmt) => {
match stmt {
ast::Stmt::Select(select) => {
let mut plan = prepare_select_plan(&self.schema.borrow(), select)?;
let mut plan = prepare_select_plan(&self.schema.borrow(), *select)?;
optimize_plan(&mut plan)?;
println!("{}", plan);
}

View File

@@ -15,7 +15,7 @@ pub fn translate_delete(
schema: &Schema,
tbl_name: &QualifiedName,
where_clause: Option<Expr>,
limit: Option<Limit>,
limit: Option<Box<Limit>>,
database_header: Rc<RefCell<DatabaseHeader>>,
connection: Weak<Connection>,
syms: &SymbolTable,
@@ -29,7 +29,7 @@ pub fn prepare_delete_plan(
schema: &Schema,
tbl_name: &QualifiedName,
where_clause: Option<Expr>,
limit: Option<Limit>,
limit: Option<Box<Limit>>,
) -> Result<Plan> {
let table = match schema.get_table(tbl_name.name.0.as_str()) {
Some(table) => table,
@@ -48,7 +48,7 @@ pub fn prepare_delete_plan(
let resolved_where_clauses = parse_where(where_clause, &referenced_tables)?;
// Parse the LIMIT clause
let resolved_limit = limit.and_then(parse_limit);
let resolved_limit = limit.and_then(|l| parse_limit(*l));
let plan = DeletePlan {
source: SourceOperator::Scan {

View File

@@ -70,7 +70,7 @@ pub fn translate_insert(
Table::Pseudo(_) => todo!(),
};
let values = match body {
InsertBody::Select(select, None) => match &select.body.select {
InsertBody::Select(select, None) => match &select.body.select.deref() {
sqlite3_parser::ast::OneSelect::Values(values) => values,
_ => todo!(),
},

View File

@@ -104,7 +104,7 @@ pub fn translate(
ast::Stmt::Rollback { .. } => bail_parse_error!("ROLLBACK not supported yet"),
ast::Stmt::Savepoint(_) => bail_parse_error!("SAVEPOINT not supported yet"),
ast::Stmt::Select(select) => {
translate_select(schema, select, database_header, connection, syms)
translate_select(schema, *select, database_header, connection, syms)
}
ast::Stmt::Update { .. } => bail_parse_error!("UPDATE not supported yet"),
ast::Stmt::Vacuum(_, _) => bail_parse_error!("VACUUM not supported yet"),

View File

@@ -289,7 +289,7 @@ fn parse_from_clause_table(
))
}
ast::SelectTable::Select(subselect, maybe_alias) => {
let Plan::Select(mut subplan) = prepare_select_plan(schema, subselect)? else {
let Plan::Select(mut subplan) = prepare_select_plan(schema, *subselect)? else {
unreachable!();
};
subplan.query_type = SelectQueryType::Subquery {

View File

@@ -31,7 +31,7 @@ pub fn translate_select(
}
pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result<Plan> {
match select.body.select {
match *select.body.select {
ast::OneSelect::Select {
mut columns,
from,
@@ -273,7 +273,7 @@ pub fn prepare_select_plan(schema: &Schema, select: ast::Select) -> Result<Plan>
}
// Parse the LIMIT clause
plan.limit = select.limit.and_then(parse_limit);
plan.limit = select.limit.and_then(|l| parse_limit(*l));
// Return the unoptimized query plan
Ok(Plan::Select(plan))

View File

@@ -144,7 +144,7 @@ pub enum Stmt {
/// columns
columns: Option<Vec<IndexedColumn>>,
/// query
select: Select,
select: Box<Select>,
},
/// `CREATE VIRTUAL TABLE`
CreateVirtualTable {
@@ -172,7 +172,7 @@ pub enum Stmt {
/// `ORDER BY`
order_by: Option<Vec<SortedColumn>>,
/// `LIMIT`
limit: Option<Limit>,
limit: Option<Box<Limit>>,
},
/// `DETACH DATABASE`: db name
Detach(Expr), // TODO distinction between DETACH and DETACH DATABASE
@@ -238,7 +238,7 @@ pub enum Stmt {
/// `SAVEPOINT`: savepoint name
Savepoint(Name),
/// `SELECT`
Select(Select),
Select(Box<Select>),
/// `UPDATE`
Update {
/// CTE
@@ -260,7 +260,7 @@ pub enum Stmt {
/// `ORDER BY`
order_by: Option<Vec<SortedColumn>>,
/// `LIMIT`
limit: Option<Limit>,
limit: Option<Box<Limit>>,
},
/// `VACUUM`: database name, into expr
Vacuum(Option<Name>, Option<Expr>),
@@ -700,14 +700,14 @@ pub struct Select {
/// `ORDER BY`
pub order_by: Option<Vec<SortedColumn>>, // ORDER BY term does not match any column in the result set
/// `LIMIT`
pub limit: Option<Limit>,
pub limit: Option<Box<Limit>>,
}
/// `SELECT` body
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct SelectBody {
/// first select
pub select: OneSelect,
pub select: Box<OneSelect>,
/// compounds
pub compounds: Option<Vec<CompoundSelect>>,
}
@@ -740,7 +740,7 @@ pub struct CompoundSelect {
/// operator
pub operator: CompoundOperator,
/// select
pub select: OneSelect,
pub select: Box<OneSelect>,
}
/// Compound operators
@@ -888,7 +888,7 @@ pub enum SelectTable {
/// table function call
TableCall(QualifiedName, Option<Vec<Expr>>, Option<As>),
/// `SELECT` subquery
Select(Select, Option<As>),
Select(Box<Select>, Option<As>),
/// subquery
Sub(FromClause, Option<As>),
}
@@ -1222,7 +1222,7 @@ pub enum CreateTableBody {
options: TableOptions,
},
/// `AS` select
AsSelect(Select),
AsSelect(Box<Select>),
}
impl CreateTableBody {
@@ -1265,10 +1265,10 @@ impl ColumnDefinition {
let mut split = col_type.name.split_ascii_whitespace();
let truncate = if split
.next_back()
.map_or(false, |s| s.eq_ignore_ascii_case("ALWAYS"))
.is_some_and(|s| s.eq_ignore_ascii_case("ALWAYS"))
&& split
.next_back()
.map_or(false, |s| s.eq_ignore_ascii_case("GENERATED"))
.is_some_and(|s| s.eq_ignore_ascii_case("GENERATED"))
{
let mut generated = false;
for constraint in &cd.constraints {
@@ -1549,7 +1549,7 @@ pub struct Limit {
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum InsertBody {
/// `SELECT` or `VALUES`
Select(Select, Option<Upsert>),
Select(Box<Select>, Option<Upsert>),
/// `DEFAULT VALUES`
DefaultValues,
}
@@ -1649,7 +1649,7 @@ pub enum TriggerCmd {
/// `COLUMNS`
col_names: Option<DistinctNames>,
/// `SELECT` or `VALUES`
select: Select,
select: Box<Select>,
/// `ON CONLICT` clause
upsert: Option<Upsert>,
/// `RETURNING`
@@ -1663,7 +1663,7 @@ pub enum TriggerCmd {
where_clause: Option<Expr>,
},
/// `SELECT`
Select(Select),
Select(Box<Select>),
}
/// Conflict resolution types
@@ -1714,7 +1714,7 @@ pub struct CommonTableExpr {
/// `MATERIALIZED`
pub materialized: Materialized,
/// query
pub select: Select,
pub select: Box<Select>,
}
impl CommonTableExpr {

View File

@@ -127,7 +127,7 @@ create_table_args(A) ::= LP columnlist(C) conslist_opt(X) RP table_option_set(F)
A = CreateTableBody::columns_and_constraints(C, X, F)?;
}
create_table_args(A) ::= AS select(S). {
A = CreateTableBody::AsSelect(S);
A = CreateTableBody::AsSelect(Box::new(S));
}
%type table_option_set {TableOptions}
%type table_option {TableOptions}
@@ -476,7 +476,7 @@ ifexists(A) ::= . {A = false;}
cmd ::= createkw temp(T) VIEW ifnotexists(E) fullname(Y) eidlist_opt(C)
AS select(S). {
self.ctx.stmt = Some(Stmt::CreateView{ temporary: T, if_not_exists: E, view_name: Y, columns: C,
select: S });
select: Box::new(S) });
}
cmd ::= DROP VIEW ifexists(E) fullname(X). {
self.ctx.stmt = Some(Stmt::DropView{ if_exists: E, view_name: X });
@@ -486,7 +486,7 @@ cmd ::= DROP VIEW ifexists(E) fullname(X). {
//////////////////////// The SELECT statement /////////////////////////////////
//
cmd ::= select(X). {
self.ctx.stmt = Some(Stmt::Select(X));
self.ctx.stmt = Some(Stmt::Select(Box::new(X)));
}
%type select {Select}
@@ -509,11 +509,11 @@ select(A) ::= selectnowith(X) orderby_opt(Z) limit_opt(L). {
}
selectnowith(A) ::= oneselect(X). {
A = SelectBody{ select: X, compounds: None };
A = SelectBody{ select: Box::new(X), compounds: None };
}
%ifndef SQLITE_OMIT_COMPOUND_SELECT
selectnowith(A) ::= selectnowith(A) multiselect_op(Y) oneselect(Z). {
let cs = CompoundSelect{ operator: Y, select: Z };
let cs = CompoundSelect{ operator: Y, select: Box::new(Z) };
A.push(cs)?;
}
%type multiselect_op {CompoundOperator}
@@ -621,7 +621,7 @@ seltablist(A) ::= stl_prefix(A) fullname(Y) LP exprlist(E) RP as(Z)
%ifndef SQLITE_OMIT_SUBQUERY
seltablist(A) ::= stl_prefix(A) LP select(S) RP
as(Z) on_using(N). {
let st = SelectTable::Select(S, Z);
let st = SelectTable::Select(Box::new(S), Z);
let jc = N;
A.push(st, jc)?;
}
@@ -737,7 +737,7 @@ groupby_opt(A) ::= GROUP BY nexprlist(X) having_opt(Y). {A = Some(GroupBy{ exprs
having_opt(A) ::= . {A = None;}
having_opt(A) ::= HAVING expr(X). {A = Some(X);}
%type limit_opt {Option<Limit>}
%type limit_opt {Option<Box<Limit>>}
// The destructor for limit_opt will never fire in the current grammar.
// The limit_opt non-terminal only occurs at the end of a single production
@@ -749,11 +749,11 @@ having_opt(A) ::= HAVING expr(X). {A = Some(X);}
//%destructor limit_opt {sqlite3ExprDelete(pParse->db, $$);}
limit_opt(A) ::= . {A = None;}
limit_opt(A) ::= LIMIT expr(X).
{A = Some(Limit{ expr: X, offset: None });}
{A = Some(Box::new(Limit{ expr: X, offset: None }));}
limit_opt(A) ::= LIMIT expr(X) OFFSET expr(Y).
{A = Some(Limit{ expr: X, offset: Some(Y) });}
{A = Some(Box::new(Limit{ expr: X, offset: Some(Y) }));}
limit_opt(A) ::= LIMIT expr(X) COMMA expr(Y).
{A = Some(Limit{ expr: X, offset: Some(Y) });}
{A = Some(Box::new(Limit{ expr: X, offset: Some(Y) }));}
/////////////////////////// The DELETE statement /////////////////////////////
//
@@ -826,7 +826,7 @@ setlist(A) ::= LP idlist(X) RP EQ expr(Y). {
cmd ::= with(W) insert_cmd(R) INTO xfullname(X) idlist_opt(F) select(S)
upsert(U). {
let (upsert, returning) = U;
let body = InsertBody::Select(S, upsert);
let body = InsertBody::Select(Box::new(S), upsert);
self.ctx.stmt = Some(Stmt::Insert{ with: W, or_conflict: R, tbl_name: X, columns: F,
body, returning });
}
@@ -1241,7 +1241,7 @@ trigger_cmd(A) ::=
trigger_cmd(A) ::= insert_cmd(R) INTO
trnm(X) idlist_opt(F) select(S) upsert(U). {
let (upsert, returning) = U;
A = TriggerCmd::Insert{ or_conflict: R, tbl_name: X, col_names: F, select: S, upsert, returning };/*A-overwrites-R*/
A = TriggerCmd::Insert{ or_conflict: R, tbl_name: X, col_names: F, select: Box::new(S), upsert, returning };/*A-overwrites-R*/
}
// DELETE
trigger_cmd(A) ::= DELETE FROM trnm(X) tridxby where_opt(Y).
@@ -1249,7 +1249,7 @@ trigger_cmd(A) ::= DELETE FROM trnm(X) tridxby where_opt(Y).
// SELECT
trigger_cmd(A) ::= select(X).
{A = TriggerCmd::Select(X); /*A-overwrites-X*/}
{A = TriggerCmd::Select(Box::new(X)); /*A-overwrites-X*/}
// The special RAISE expression that may occur in trigger programs
expr(A) ::= RAISE LP IGNORE RP. {
@@ -1368,7 +1368,7 @@ wqas(A) ::= AS. {A = Materialized::Any;}
wqas(A) ::= AS MATERIALIZED. {A = Materialized::Yes;}
wqas(A) ::= AS NOT MATERIALIZED. {A = Materialized::No;}
wqitem(A) ::= nm(X) eidlist_opt(Y) wqas(M) LP select(Z) RP. {
A = CommonTableExpr{ tbl_name: X, columns: Y, materialized: M, select: Z }; /*A-overwrites-X*/
A = CommonTableExpr{ tbl_name: X, columns: Y, materialized: M, select: Box::new(Z) }; /*A-overwrites-X*/
}
wqlist(A) ::= wqitem(X). {
A = vec![X]; /*A-overwrites-X*/

View File

@@ -4518,6 +4518,8 @@ void ReportTable(
print_stack_union(out,lemp,&lineno);
if( lemp->stacksize ){
fprintf(out,"const YYSTACKDEPTH: usize = %s;\n",lemp->stacksize); lineno++;
} else {
fprintf(out, "const YYSTACKDEPTH: usize = 128;\n"); lineno++;
}
if( lemp->errsym && lemp->errsym->useCnt ){
fprintf(out,"const YYERRORSYMBOL: YYCODETYPE = %d;\n",lemp->errsym->index); lineno++;

View File

@@ -285,7 +285,7 @@ impl yyParser<'_> {
yyidx: 0,
#[cfg(feature = "YYTRACKMAXSTACKDEPTH")]
yyhwm: 0,
yystack: Vec::new(),
yystack: Vec::with_capacity(YYSTACKDEPTH),
//#[cfg(not(feature = "YYNOERRORRECOVERY"))]
yyerrcnt: -1,
%% /* Optional %extra_context store */