mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-26 04:24:21 +01:00
Merge 'Count optimization' from Pedro Muniz
After reading #1123, I wanted to see what optimizations I could do. Sqlite optimizes `count` aggregation for the following case: `SELECT count() FROM <tbl>`. This is so widely used, that they made an optimization just for it in the form of the `COUNT` opcode. This PR thus implements this optimization by creating the `COUNT` opcode, and checking in the select emitter if we the query is a Simple Count Query. If it is, we just emit the Opcode instead of going through a Rewind loop, saving on execution time. The screenshots below show a huge decrease in execution time. - **Main** <img width="383" alt="image" src="https://github.com/user- attachments/assets/99a9dec4-e7c5-41db-ba67-4eafa80dd2e6" /> - **Count Optimization** <img width="435" alt="image" src="https://github.com/user- attachments/assets/e93b3233-92e6-4736-aa60-b52b2477179f" /> Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com> Closes #1443
This commit is contained in:
@@ -180,9 +180,62 @@ fn bench_execute_select_1(criterion: &mut Criterion) {
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_execute_select_count(criterion: &mut Criterion) {
|
||||
// https://github.com/tursodatabase/limbo/issues/174
|
||||
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
|
||||
let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err();
|
||||
|
||||
#[allow(clippy::arc_with_non_send_sync)]
|
||||
let io = Arc::new(PlatformIO::new().unwrap());
|
||||
let db = Database::open_file(io.clone(), "../testing/testing.db", false).unwrap();
|
||||
let limbo_conn = db.connect().unwrap();
|
||||
|
||||
let mut group = criterion.benchmark_group("Execute `SELECT count() FROM users`");
|
||||
|
||||
group.bench_function("limbo_execute_select_count", |b| {
|
||||
let mut stmt = limbo_conn.prepare("SELECT count() FROM users").unwrap();
|
||||
let io = io.clone();
|
||||
b.iter(|| {
|
||||
loop {
|
||||
match stmt.step().unwrap() {
|
||||
limbo_core::StepResult::Row => {
|
||||
black_box(stmt.row());
|
||||
}
|
||||
limbo_core::StepResult::IO => {
|
||||
let _ = io.run_once();
|
||||
}
|
||||
limbo_core::StepResult::Done => {
|
||||
break;
|
||||
}
|
||||
limbo_core::StepResult::Interrupt | limbo_core::StepResult::Busy => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
stmt.reset();
|
||||
});
|
||||
});
|
||||
|
||||
if enable_rusqlite {
|
||||
let sqlite_conn = rusqlite_open();
|
||||
|
||||
group.bench_function("sqlite_execute_select_count", |b| {
|
||||
let mut stmt = sqlite_conn.prepare("SELECT count() FROM users").unwrap();
|
||||
b.iter(|| {
|
||||
let mut rows = stmt.raw_query();
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
black_box(row);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = benches;
|
||||
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows
|
||||
targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count
|
||||
}
|
||||
criterion_main!(benches);
|
||||
|
||||
@@ -365,6 +365,8 @@ pub struct BTreeCursor {
|
||||
reusable_immutable_record: RefCell<Option<ImmutableRecord>>,
|
||||
empty_record: Cell<bool>,
|
||||
pub index_key_sort_order: IndexKeySortOrder,
|
||||
/// Maintain count of the number of records in the btree. Used for the `Count` opcode
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl BTreeCursor {
|
||||
@@ -390,6 +392,7 @@ impl BTreeCursor {
|
||||
reusable_immutable_record: RefCell::new(None),
|
||||
empty_record: Cell::new(true),
|
||||
index_key_sort_order: IndexKeySortOrder::default(),
|
||||
count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4228,6 +4231,112 @@ impl BTreeCursor {
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Count the number of entries in the b-tree
|
||||
///
|
||||
/// Only supposed to be used in the context of a simple Count Select Statement
|
||||
pub fn count(&mut self) -> Result<CursorResult<usize>> {
|
||||
if self.count == 0 {
|
||||
self.move_to_root();
|
||||
}
|
||||
|
||||
if let Some(_mv_cursor) = &self.mv_cursor {
|
||||
todo!("Implement count for mvcc");
|
||||
}
|
||||
|
||||
let mut mem_page_rc;
|
||||
let mut mem_page;
|
||||
let mut contents;
|
||||
|
||||
loop {
|
||||
mem_page_rc = self.stack.top();
|
||||
return_if_locked_maybe_load!(self.pager, mem_page_rc);
|
||||
mem_page = mem_page_rc.get();
|
||||
contents = mem_page.contents.as_ref().unwrap();
|
||||
|
||||
/* If this is a leaf page or the tree is not an int-key tree, then
|
||||
** this page contains countable entries. Increment the entry counter
|
||||
** accordingly.
|
||||
*/
|
||||
if !matches!(contents.page_type(), PageType::TableInterior) {
|
||||
self.count += contents.cell_count();
|
||||
}
|
||||
|
||||
let cell_idx = self.stack.current_cell_index() as usize;
|
||||
|
||||
// Second condition is necessary in case we return if the page is locked in the loop below
|
||||
if contents.is_leaf() || cell_idx > contents.cell_count() {
|
||||
loop {
|
||||
if !self.stack.has_parent() {
|
||||
// All pages of the b-tree have been visited. Return successfully
|
||||
self.move_to_root();
|
||||
|
||||
return Ok(CursorResult::Ok(self.count));
|
||||
}
|
||||
|
||||
// Move to parent
|
||||
self.going_upwards = true;
|
||||
self.stack.pop();
|
||||
|
||||
mem_page_rc = self.stack.top();
|
||||
return_if_locked_maybe_load!(self.pager, mem_page_rc);
|
||||
mem_page = mem_page_rc.get();
|
||||
contents = mem_page.contents.as_ref().unwrap();
|
||||
|
||||
let cell_idx = self.stack.current_cell_index() as usize;
|
||||
|
||||
if cell_idx <= contents.cell_count() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let cell_idx = self.stack.current_cell_index() as usize;
|
||||
|
||||
assert!(cell_idx <= contents.cell_count(),);
|
||||
assert!(!contents.is_leaf());
|
||||
|
||||
if cell_idx == contents.cell_count() {
|
||||
// Move to right child
|
||||
// should be safe as contents is not a leaf page
|
||||
let right_most_pointer = contents.rightmost_pointer().unwrap();
|
||||
self.stack.advance();
|
||||
let mem_page = self.pager.read_page(right_most_pointer as usize)?;
|
||||
self.going_upwards = false;
|
||||
self.stack.push(mem_page);
|
||||
} else {
|
||||
// Move to child left page
|
||||
let cell = contents.cell_get(
|
||||
cell_idx,
|
||||
payload_overflow_threshold_max(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
payload_overflow_threshold_min(
|
||||
contents.page_type(),
|
||||
self.usable_space() as u16,
|
||||
),
|
||||
self.usable_space(),
|
||||
)?;
|
||||
|
||||
match cell {
|
||||
BTreeCell::TableInteriorCell(TableInteriorCell {
|
||||
_left_child_page: left_child_page,
|
||||
..
|
||||
})
|
||||
| BTreeCell::IndexInteriorCell(IndexInteriorCell {
|
||||
left_child_page, ..
|
||||
}) => {
|
||||
self.stack.advance();
|
||||
let mem_page = self.pager.read_page(left_child_page as usize)?;
|
||||
self.going_upwards = false;
|
||||
self.stack.push(mem_page);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -24,6 +24,7 @@ use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMeta
|
||||
use super::order_by::{emit_order_by, init_order_by, SortMetadata};
|
||||
use super::plan::{JoinOrderMember, Operation, SelectPlan, TableReference, UpdatePlan};
|
||||
use super::schema::ParseSchema;
|
||||
use super::select::emit_simple_count;
|
||||
use super::subquery::emit_subqueries;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -288,6 +289,11 @@ pub fn emit_query<'a>(
|
||||
OperationMode::SELECT,
|
||||
)?;
|
||||
|
||||
if plan.is_simple_count() {
|
||||
emit_simple_count(program, t_ctx, plan)?;
|
||||
return Ok(t_ctx.reg_result_cols_start.unwrap());
|
||||
}
|
||||
|
||||
for where_term in plan
|
||||
.where_clause
|
||||
.iter()
|
||||
@@ -323,6 +329,7 @@ pub fn emit_query<'a>(
|
||||
|
||||
// Clean up and close the main execution loop
|
||||
close_loop(program, t_ctx, &plan.table_references, &plan.join_order)?;
|
||||
|
||||
program.preassign_label_to_next_insn(after_main_loop_label);
|
||||
|
||||
let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition;
|
||||
|
||||
@@ -343,6 +343,48 @@ impl SelectPlan {
|
||||
pub fn group_by_sorter_column_count(&self) -> usize {
|
||||
self.agg_args_count() + self.group_by_col_count() + self.non_group_by_non_agg_column_count()
|
||||
}
|
||||
|
||||
/// Reference: https://github.com/sqlite/sqlite/blob/5db695197b74580c777b37ab1b787531f15f7f9f/src/select.c#L8613
|
||||
///
|
||||
/// Checks to see if the query is of the format `SELECT count(*) FROM <tbl>`
|
||||
pub fn is_simple_count(&self) -> bool {
|
||||
if !self.where_clause.is_empty()
|
||||
|| self.aggregates.len() != 1
|
||||
|| matches!(self.query_type, SelectQueryType::Subquery { .. })
|
||||
|| self.table_references.len() != 1
|
||||
|| self.result_columns.len() != 1
|
||||
|| self.group_by.is_some()
|
||||
|| self.contains_constant_false_condition
|
||||
// TODO: (pedrocarlo) maybe can optimize to use the count optmization with more columns
|
||||
{
|
||||
return false;
|
||||
}
|
||||
let table_ref = self.table_references.first().unwrap();
|
||||
if !matches!(table_ref.table, crate::schema::Table::BTree(..)) {
|
||||
return false;
|
||||
}
|
||||
let agg = self.aggregates.first().unwrap();
|
||||
if !matches!(agg.func, AggFunc::Count0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let count = limbo_sqlite3_parser::ast::Expr::FunctionCall {
|
||||
name: limbo_sqlite3_parser::ast::Id("count".to_string()),
|
||||
distinctness: None,
|
||||
args: None,
|
||||
order_by: None,
|
||||
filter_over: None,
|
||||
};
|
||||
let count_star = limbo_sqlite3_parser::ast::Expr::FunctionCallStar {
|
||||
name: limbo_sqlite3_parser::ast::Id("count".to_string()),
|
||||
filter_over: None,
|
||||
};
|
||||
let result_col_expr = &self.result_columns.get(0).unwrap().expr;
|
||||
if *result_col_expr != count && *result_col_expr != count_star {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use super::emitter::emit_program;
|
||||
use super::emitter::{emit_program, TranslateCtx};
|
||||
use super::plan::{select_star, JoinOrderMember, Operation, Search, SelectQueryType};
|
||||
use super::planner::Scope;
|
||||
use crate::function::{AggFunc, ExtFunc, Func};
|
||||
@@ -10,6 +10,7 @@ use crate::translate::planner::{
|
||||
};
|
||||
use crate::util::normalize_ident;
|
||||
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
|
||||
use crate::vdbe::insn::Insn;
|
||||
use crate::SymbolTable;
|
||||
use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
|
||||
use limbo_sqlite3_parser::ast::{self, SortOrder};
|
||||
@@ -484,3 +485,41 @@ fn estimate_num_labels(select: &SelectPlan) -> usize {
|
||||
|
||||
num_labels
|
||||
}
|
||||
|
||||
pub fn emit_simple_count<'a>(
|
||||
program: &mut ProgramBuilder,
|
||||
_t_ctx: &mut TranslateCtx<'a>,
|
||||
plan: &'a SelectPlan,
|
||||
) -> Result<()> {
|
||||
let cursors = plan
|
||||
.table_references
|
||||
.get(0)
|
||||
.unwrap()
|
||||
.resolve_cursors(program)?;
|
||||
|
||||
let cursor_id = {
|
||||
match cursors {
|
||||
(_, Some(cursor_id)) | (Some(cursor_id), None) => cursor_id,
|
||||
_ => panic!("cursor for table should have been opened"),
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: I think this allocation can be avoided if we are smart with the `TranslateCtx`
|
||||
let target_reg = program.alloc_register();
|
||||
|
||||
program.emit_insn(Insn::Count {
|
||||
cursor_id,
|
||||
target_reg,
|
||||
exact: true,
|
||||
});
|
||||
|
||||
program.emit_insn(Insn::Close { cursor_id });
|
||||
let output_reg = program.alloc_register();
|
||||
program.emit_insn(Insn::Copy {
|
||||
src_reg: target_reg,
|
||||
dst_reg: output_reg,
|
||||
amount: 0,
|
||||
});
|
||||
program.emit_result_row(output_reg, 1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -4749,6 +4749,35 @@ pub fn op_affinity(
|
||||
Ok(InsnFunctionStepResult::Step)
|
||||
}
|
||||
|
||||
pub fn op_count(
|
||||
program: &Program,
|
||||
state: &mut ProgramState,
|
||||
insn: &Insn,
|
||||
pager: &Rc<Pager>,
|
||||
mv_store: Option<&Rc<MvStore>>,
|
||||
) -> Result<InsnFunctionStepResult> {
|
||||
let Insn::Count {
|
||||
cursor_id,
|
||||
target_reg,
|
||||
exact,
|
||||
} = insn
|
||||
else {
|
||||
unreachable!("unexpected Insn {:?}", insn)
|
||||
};
|
||||
|
||||
let count = {
|
||||
let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Count");
|
||||
let cursor = cursor.as_btree_mut();
|
||||
let count = return_if_io!(cursor.count());
|
||||
count
|
||||
};
|
||||
|
||||
state.registers[*target_reg] = Register::OwnedValue(OwnedValue::Integer(count as i64));
|
||||
|
||||
state.pc += 1;
|
||||
Ok(InsnFunctionStepResult::Step)
|
||||
}
|
||||
|
||||
fn exec_lower(reg: &OwnedValue) -> Option<OwnedValue> {
|
||||
match reg {
|
||||
OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())),
|
||||
|
||||
@@ -1448,6 +1448,19 @@ pub fn insn_to_str(
|
||||
.join(", ")
|
||||
),
|
||||
),
|
||||
Insn::Count {
|
||||
cursor_id,
|
||||
target_reg,
|
||||
exact,
|
||||
} => (
|
||||
"Count",
|
||||
*cursor_id as i32,
|
||||
*target_reg as i32,
|
||||
if *exact { 0 } else { 1 },
|
||||
OwnedValue::build_text(""),
|
||||
0,
|
||||
"".to_string(),
|
||||
),
|
||||
};
|
||||
format!(
|
||||
"{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}",
|
||||
|
||||
@@ -857,6 +857,16 @@ pub enum Insn {
|
||||
count: NonZeroUsize,
|
||||
affinities: String,
|
||||
},
|
||||
|
||||
/// Store the number of entries (an integer value) in the table or index opened by cursor P1 in register P2.
|
||||
///
|
||||
/// If P3==0, then an exact count is obtained, which involves visiting every btree page of the table.
|
||||
/// But if P3 is non-zero, an estimate is returned based on the current cursor position.
|
||||
Count {
|
||||
cursor_id: CursorID,
|
||||
target_reg: usize,
|
||||
exact: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl Insn {
|
||||
@@ -977,6 +987,7 @@ impl Insn {
|
||||
Insn::NotFound { .. } => execute::op_not_found,
|
||||
Insn::Affinity { .. } => execute::op_affinity,
|
||||
Insn::IdxDelete { .. } => execute::op_idx_delete,
|
||||
Insn::Count { .. } => execute::op_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user