Merge 'prepare perf: make ProgramBuilder aware of plan to count/estimate required memory' from Jussi Saurio

Use knowledge of query plan to inform how much memory to initially
allocate for `ProgramBuilder` vectors
Some of them are exact, some are semi-random estimates
```sql
Prepare `SELECT 1`/Limbo/SELECT 1
                        time:   [756.93 ns 758.11 ns 759.59 ns]
                        change: [-4.5974% -4.3153% -4.0393%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
  2 (2.00%) low severe
  1 (1.00%) low mild
  3 (3.00%) high mild
  1 (1.00%) high severe

Prepare `SELECT * FROM users LIMIT 1`/Limbo/SELECT * FROM users LIMIT 1
                        time:   [1.4739 µs 1.4769 µs 1.4800 µs]
                        change: [-7.9364% -7.7171% -7.4979%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild

Prepare `SELECT first_name, count(1) FROM users GROUP BY first_name HAVING count(1) > 1 ORDER BY cou...`
                        time:   [3.7440 µs 3.7520 µs 3.7596 µs]
                        change: [-5.4627% -5.1578% -4.8445%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high severe
```

Closes #899
This commit is contained in:
Pekka Enberg
2025-02-05 18:24:16 +02:00
7 changed files with 175 additions and 57 deletions

2
Cargo.lock generated
View File

@@ -1538,7 +1538,7 @@ dependencies = [
[[package]]
name = "limbo"
version = "0.0.13"
version = "0.0.14"
dependencies = [
"anyhow",
"clap",

View File

@@ -3,23 +3,33 @@ use crate::translate::emitter::emit_program;
use crate::translate::optimizer::optimize_plan;
use crate::translate::plan::{DeletePlan, Operation, Plan};
use crate::translate::planner::{parse_limit, parse_where};
use crate::vdbe::builder::ProgramBuilder;
use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode};
use crate::{schema::Schema, Result, SymbolTable};
use sqlite3_parser::ast::{Expr, Limit, QualifiedName};
use super::plan::TableReference;
pub fn translate_delete(
program: &mut ProgramBuilder,
query_mode: QueryMode,
schema: &Schema,
tbl_name: &QualifiedName,
where_clause: Option<Expr>,
limit: Option<Box<Limit>>,
syms: &SymbolTable,
) -> Result<()> {
) -> Result<ProgramBuilder> {
let mut delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?;
optimize_plan(&mut delete_plan, schema)?;
emit_program(program, delete_plan, syms)
let Plan::Delete(ref delete) = delete_plan else {
panic!("delete_plan is not a DeletePlan");
};
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: 1,
approx_num_insns: estimate_num_instructions(&delete),
approx_num_labels: 0,
});
emit_program(&mut program, delete_plan, syms)?;
Ok(program)
}
pub fn prepare_delete_plan(
@@ -60,3 +70,11 @@ pub fn prepare_delete_plan(
Ok(Plan::Delete(plan))
}
fn estimate_num_instructions(plan: &DeletePlan) -> usize {
let base = 20;
let num_instructions = base + plan.table_references.len() * 10;
num_instructions
}

View File

@@ -7,6 +7,7 @@ use sqlite3_parser::ast::{
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
use crate::schema::BTreeTable;
use crate::util::normalize_ident;
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
use crate::vdbe::BranchOffset;
use crate::Result;
use crate::{
@@ -23,7 +24,7 @@ use super::emitter::Resolver;
#[allow(clippy::too_many_arguments)]
pub fn translate_insert(
program: &mut ProgramBuilder,
query_mode: QueryMode,
schema: &Schema,
with: &Option<With>,
on_conflict: &Option<ResolveType>,
@@ -32,7 +33,13 @@ pub fn translate_insert(
body: &InsertBody,
_returning: &Option<Vec<ResultColumn>>,
syms: &SymbolTable,
) -> Result<()> {
) -> Result<ProgramBuilder> {
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: 1,
approx_num_insns: 30,
approx_num_labels: 5,
});
if with.is_some() {
crate::bail_parse_error!("WITH clause is not supported");
}
@@ -113,7 +120,7 @@ pub fn translate_insert(
for value in values {
populate_column_registers(
program,
&mut program,
value,
&column_mappings,
column_registers_start,
@@ -152,7 +159,7 @@ pub fn translate_insert(
program.emit_insn(Insn::OpenWriteAwait {});
populate_column_registers(
program,
&mut program,
&values[0],
&column_mappings,
column_registers_start,
@@ -264,7 +271,7 @@ pub fn translate_insert(
target_pc: start_offset,
});
Ok(())
Ok(program)
}
#[derive(Debug)]

View File

@@ -28,7 +28,7 @@ use crate::storage::pager::Pager;
use crate::storage::sqlite3_ondisk::DatabaseHeader;
use crate::translate::delete::translate_delete;
use crate::util::PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX;
use crate::vdbe::builder::{CursorType, QueryMode};
use crate::vdbe::builder::{CursorType, ProgramBuilderOpts, QueryMode};
use crate::vdbe::{builder::ProgramBuilder, insn::Insn, Program};
use crate::{bail_parse_error, Connection, LimboError, Result, SymbolTable};
use insert::translate_insert;
@@ -48,10 +48,9 @@ pub fn translate(
syms: &SymbolTable,
query_mode: QueryMode,
) -> Result<Program> {
let mut program = ProgramBuilder::new(query_mode);
let mut change_cnt_on = false;
match stmt {
let program = match stmt {
ast::Stmt::AlterTable(_, _) => bail_parse_error!("ALTER TABLE not supported yet"),
ast::Stmt::Analyze(_) => bail_parse_error!("ANALYZE not supported yet"),
ast::Stmt::Attach { .. } => bail_parse_error!("ATTACH not supported yet"),
@@ -68,7 +67,7 @@ pub fn translate(
bail_parse_error!("TEMPORARY table not supported yet");
}
translate_create_table(&mut program, tbl_name, body, if_not_exists, schema)?;
translate_create_table(query_mode, tbl_name, body, if_not_exists, schema)?
}
ast::Stmt::CreateTrigger { .. } => bail_parse_error!("CREATE TRIGGER not supported yet"),
ast::Stmt::CreateView { .. } => bail_parse_error!("CREATE VIEW not supported yet"),
@@ -82,30 +81,26 @@ pub fn translate(
..
} => {
change_cnt_on = true;
translate_delete(&mut program, schema, &tbl_name, where_clause, limit, syms)?;
translate_delete(query_mode, schema, &tbl_name, where_clause, limit, syms)?
}
ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"),
ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"),
ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"),
ast::Stmt::DropTrigger { .. } => bail_parse_error!("DROP TRIGGER not supported yet"),
ast::Stmt::DropView { .. } => bail_parse_error!("DROP VIEW not supported yet"),
ast::Stmt::Pragma(name, body) => {
pragma::translate_pragma(
&mut program,
&schema,
&name,
body,
database_header.clone(),
pager,
)?;
}
ast::Stmt::Pragma(name, body) => pragma::translate_pragma(
query_mode,
&schema,
&name,
body,
database_header.clone(),
pager,
)?,
ast::Stmt::Reindex { .. } => bail_parse_error!("REINDEX not supported yet"),
ast::Stmt::Release(_) => bail_parse_error!("RELEASE not supported yet"),
ast::Stmt::Rollback { .. } => bail_parse_error!("ROLLBACK not supported yet"),
ast::Stmt::Savepoint(_) => bail_parse_error!("SAVEPOINT not supported yet"),
ast::Stmt::Select(select) => {
translate_select(&mut program, schema, *select, syms)?;
}
ast::Stmt::Select(select) => translate_select(query_mode, schema, *select, syms)?,
ast::Stmt::Update { .. } => bail_parse_error!("UPDATE not supported yet"),
ast::Stmt::Vacuum(_, _) => bail_parse_error!("VACUUM not supported yet"),
ast::Stmt::Insert {
@@ -118,7 +113,7 @@ pub fn translate(
} => {
change_cnt_on = true;
translate_insert(
&mut program,
query_mode,
schema,
&with,
&or_conflict,
@@ -127,9 +122,9 @@ pub fn translate(
&body,
&returning,
syms,
)?;
)?
}
}
};
Ok(program.build(database_header, connection, change_cnt_on))
}
@@ -377,12 +372,18 @@ fn check_automatic_pk_index_required(
}
fn translate_create_table(
program: &mut ProgramBuilder,
query_mode: QueryMode,
tbl_name: ast::QualifiedName,
body: ast::CreateTableBody,
if_not_exists: bool,
schema: &Schema,
) -> Result<()> {
) -> Result<ProgramBuilder> {
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: 1,
approx_num_insns: 30,
approx_num_labels: 1,
});
if schema.get_table(tbl_name.name.0.as_str()).is_some() {
if if_not_exists {
let init_label = program.emit_init();
@@ -393,7 +394,7 @@ fn translate_create_table(
program.emit_constant_insns();
program.emit_goto(start_offset);
return Ok(());
return Ok(program);
}
bail_parse_error!("Table {} already exists", tbl_name);
}
@@ -440,7 +441,7 @@ fn translate_create_table(
// https://github.com/sqlite/sqlite/blob/95f6df5b8d55e67d1e34d2bff217305a2f21b1fb/src/build.c#L2856-L2871
// https://github.com/sqlite/sqlite/blob/95f6df5b8d55e67d1e34d2bff217305a2f21b1fb/src/build.c#L1334C5-L1336C65
let index_root_reg = check_automatic_pk_index_required(&body, program, &tbl_name.name.0)?;
let index_root_reg = check_automatic_pk_index_required(&body, &mut program, &tbl_name.name.0)?;
if let Some(index_root_reg) = index_root_reg {
program.emit_insn(Insn::CreateBtree {
db: 0,
@@ -463,7 +464,7 @@ fn translate_create_table(
// Add the table entry to sqlite_schema
emit_schema_entry(
program,
&mut program,
sqlite_schema_cursor_id,
SchemaEntryType::Table,
&tbl_name.name.0,
@@ -479,7 +480,7 @@ fn translate_create_table(
PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX, tbl_name.name.0
);
emit_schema_entry(
program,
&mut program,
sqlite_schema_cursor_id,
SchemaEntryType::Index,
&index_name,
@@ -506,7 +507,7 @@ fn translate_create_table(
program.emit_constant_insns();
program.emit_goto(start_offset);
Ok(())
Ok(program)
}
enum PrimaryKeyDefinitionType<'a> {

View File

@@ -10,7 +10,7 @@ use crate::schema::Schema;
use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE};
use crate::storage::wal::CheckpointMode;
use crate::util::normalize_ident;
use crate::vdbe::builder::ProgramBuilder;
use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode};
use crate::vdbe::insn::Insn;
use crate::vdbe::BranchOffset;
use crate::{bail_parse_error, Pager};
@@ -34,20 +34,26 @@ fn list_pragmas(
}
pub fn translate_pragma(
program: &mut ProgramBuilder,
query_mode: QueryMode,
schema: &Schema,
name: &ast::QualifiedName,
body: Option<ast::PragmaBody>,
database_header: Rc<RefCell<DatabaseHeader>>,
pager: Rc<Pager>,
) -> crate::Result<()> {
) -> crate::Result<ProgramBuilder> {
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: 0,
approx_num_insns: 20,
approx_num_labels: 0,
});
let init_label = program.emit_init();
let start_offset = program.offset();
let mut write = false;
if name.name.0.to_lowercase() == "pragma_list" {
list_pragmas(program, init_label, start_offset);
return Ok(());
list_pragmas(&mut program, init_label, start_offset);
return Ok(program);
}
let pragma = match PragmaName::from_str(&name.name.0) {
@@ -57,7 +63,7 @@ pub fn translate_pragma(
match body {
None => {
query_pragma(pragma, schema, None, database_header.clone(), program)?;
query_pragma(pragma, schema, None, database_header.clone(), &mut program)?;
}
Some(ast::PragmaBody::Equals(value)) => match pragma {
PragmaName::TableInfo => {
@@ -66,7 +72,7 @@ pub fn translate_pragma(
schema,
Some(value),
database_header.clone(),
program,
&mut program,
)?;
}
_ => {
@@ -77,7 +83,7 @@ pub fn translate_pragma(
value,
database_header.clone(),
pager,
program,
&mut program,
)?;
}
},
@@ -88,7 +94,7 @@ pub fn translate_pragma(
schema,
Some(value),
database_header.clone(),
program,
&mut program,
)?;
}
_ => {
@@ -102,7 +108,7 @@ pub fn translate_pragma(
program.emit_constant_insns();
program.emit_goto(start_offset);
Ok(())
Ok(program)
}
fn update_pragma(

View File

@@ -1,5 +1,5 @@
use super::emitter::emit_program;
use super::plan::{select_star, SelectQueryType};
use super::plan::{select_star, Operation, Search, SelectQueryType};
use crate::function::{AggFunc, ExtFunc, Func};
use crate::translate::optimizer::optimize_plan;
use crate::translate::plan::{Aggregate, Direction, GroupBy, Plan, ResultSetColumn, SelectPlan};
@@ -8,20 +8,32 @@ use crate::translate::planner::{
parse_where, resolve_aggregates,
};
use crate::util::normalize_ident;
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
use crate::SymbolTable;
use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
use sqlite3_parser::ast::ResultColumn;
use sqlite3_parser::ast::{self};
pub fn translate_select(
program: &mut ProgramBuilder,
query_mode: QueryMode,
schema: &Schema,
select: ast::Select,
syms: &SymbolTable,
) -> Result<()> {
) -> Result<ProgramBuilder> {
let mut select_plan = prepare_select_plan(schema, select, syms)?;
optimize_plan(&mut select_plan, schema)?;
emit_program(program, select_plan, syms)
let Plan::Select(ref select) = select_plan else {
panic!("select_plan is not a SelectPlan");
};
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
query_mode,
num_cursors: count_plan_required_cursors(&select),
approx_num_insns: estimate_num_instructions(&select),
approx_num_labels: estimate_num_labels(&select),
});
emit_program(&mut program, select_plan, syms)?;
Ok(program)
}
pub fn prepare_select_plan(
@@ -373,3 +385,70 @@ pub fn prepare_select_plan(
_ => todo!(),
}
}
fn count_plan_required_cursors(plan: &SelectPlan) -> usize {
let num_table_cursors: usize = plan
.table_references
.iter()
.map(|t| match &t.op {
Operation::Scan { .. } => 1,
Operation::Search(search) => match search {
Search::RowidEq { .. } | Search::RowidSearch { .. } => 1,
Search::IndexSearch { .. } => 2, // btree cursor and index cursor
},
Operation::Subquery { plan, .. } => count_plan_required_cursors(plan),
})
.sum();
let num_sorter_cursors = plan.group_by.is_some() as usize + plan.order_by.is_some() as usize;
let num_pseudo_cursors = plan.group_by.is_some() as usize + plan.order_by.is_some() as usize;
num_table_cursors + num_sorter_cursors + num_pseudo_cursors
}
fn estimate_num_instructions(select: &SelectPlan) -> usize {
let table_instructions: usize = select
.table_references
.iter()
.map(|t| match &t.op {
Operation::Scan { .. } => 10,
Operation::Search(_) => 15,
Operation::Subquery { plan, .. } => 10 + estimate_num_instructions(plan),
})
.sum();
let group_by_instructions = select.group_by.is_some() as usize * 10;
let order_by_instructions = select.order_by.is_some() as usize * 10;
let condition_instructions = select.where_clause.len() * 3;
let num_instructions = 20
+ table_instructions
+ group_by_instructions
+ order_by_instructions
+ condition_instructions;
num_instructions
}
fn estimate_num_labels(select: &SelectPlan) -> usize {
let init_halt_labels = 2;
// 3 loop labels for each table in main loop + 1 to signify end of main loop
let table_labels = select
.table_references
.iter()
.map(|t| match &t.op {
Operation::Scan { .. } => 3,
Operation::Search(_) => 3,
Operation::Subquery { plan, .. } => 3 + estimate_num_labels(plan),
})
.sum::<usize>()
+ 1;
let group_by_labels = select.group_by.is_some() as usize * 10;
let order_by_labels = select.order_by.is_some() as usize * 10;
let condition_labels = select.where_clause.len() * 2;
let num_labels =
init_halt_labels + table_labels + group_by_labels + order_by_labels + condition_labels;
num_labels
}

View File

@@ -54,18 +54,25 @@ pub enum QueryMode {
Explain,
}
pub struct ProgramBuilderOpts {
pub query_mode: QueryMode,
pub num_cursors: usize,
pub approx_num_insns: usize,
pub approx_num_labels: usize,
}
impl ProgramBuilder {
pub fn new(query_mode: QueryMode) -> Self {
pub fn new(opts: ProgramBuilderOpts) -> Self {
Self {
next_free_register: 1,
next_free_cursor_id: 0,
insns: Vec::new(),
insns: Vec::with_capacity(opts.approx_num_insns),
next_insn_label: None,
cursor_ref: Vec::new(),
cursor_ref: Vec::with_capacity(opts.num_cursors),
constant_insns: Vec::new(),
label_to_resolved_offset: Vec::with_capacity(4), // 4 is arbitrary, we guess to assign at least this much
label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels),
seekrowid_emitted_bitmask: 0,
comments: if query_mode == QueryMode::Explain {
comments: if opts.query_mode == QueryMode::Explain {
Some(HashMap::new())
} else {
None