diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 1fed5308b..388fcf933 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -180,9 +180,62 @@ fn bench_execute_select_1(criterion: &mut Criterion) { group.finish(); } +fn bench_execute_select_count(criterion: &mut Criterion) { + // https://github.com/tursodatabase/limbo/issues/174 + // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features + let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err(); + + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = Database::open_file(io.clone(), "../testing/testing.db", false).unwrap(); + let limbo_conn = db.connect().unwrap(); + + let mut group = criterion.benchmark_group("Execute `SELECT count() FROM users`"); + + group.bench_function("limbo_execute_select_count", |b| { + let mut stmt = limbo_conn.prepare("SELECT count() FROM users").unwrap(); + let io = io.clone(); + b.iter(|| { + loop { + match stmt.step().unwrap() { + limbo_core::StepResult::Row => { + black_box(stmt.row()); + } + limbo_core::StepResult::IO => { + let _ = io.run_once(); + } + limbo_core::StepResult::Done => { + break; + } + limbo_core::StepResult::Interrupt | limbo_core::StepResult::Busy => { + unreachable!(); + } + } + } + stmt.reset(); + }); + }); + + if enable_rusqlite { + let sqlite_conn = rusqlite_open(); + + group.bench_function("sqlite_execute_select_count", |b| { + let mut stmt = sqlite_conn.prepare("SELECT count() FROM users").unwrap(); + b.iter(|| { + let mut rows = stmt.raw_query(); + while let Some(row) = rows.next().unwrap() { + black_box(row); + } + }); + }); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows + targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count } criterion_main!(benches); diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 0523edcb8..208900951 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -365,6 +365,8 @@ pub struct BTreeCursor { reusable_immutable_record: RefCell>, empty_record: Cell, pub index_key_sort_order: IndexKeySortOrder, + /// Maintain count of the number of records in the btree. Used for the `Count` opcode + count: usize, } impl BTreeCursor { @@ -390,6 +392,7 @@ impl BTreeCursor { reusable_immutable_record: RefCell::new(None), empty_record: Cell::new(true), index_key_sort_order: IndexKeySortOrder::default(), + count: 0, } } @@ -4228,6 +4231,112 @@ impl BTreeCursor { _ => false, } } + + /// Count the number of entries in the b-tree + /// + /// Only supposed to be used in the context of a simple Count Select Statement + pub fn count(&mut self) -> Result> { + if self.count == 0 { + self.move_to_root(); + } + + if let Some(_mv_cursor) = &self.mv_cursor { + todo!("Implement count for mvcc"); + } + + let mut mem_page_rc; + let mut mem_page; + let mut contents; + + loop { + mem_page_rc = self.stack.top(); + return_if_locked_maybe_load!(self.pager, mem_page_rc); + mem_page = mem_page_rc.get(); + contents = mem_page.contents.as_ref().unwrap(); + + /* If this is a leaf page or the tree is not an int-key tree, then + ** this page contains countable entries. Increment the entry counter + ** accordingly. + */ + if !matches!(contents.page_type(), PageType::TableInterior) { + self.count += contents.cell_count(); + } + + let cell_idx = self.stack.current_cell_index() as usize; + + // Second condition is necessary in case we return if the page is locked in the loop below + if contents.is_leaf() || cell_idx > contents.cell_count() { + loop { + if !self.stack.has_parent() { + // All pages of the b-tree have been visited. Return successfully + self.move_to_root(); + + return Ok(CursorResult::Ok(self.count)); + } + + // Move to parent + self.going_upwards = true; + self.stack.pop(); + + mem_page_rc = self.stack.top(); + return_if_locked_maybe_load!(self.pager, mem_page_rc); + mem_page = mem_page_rc.get(); + contents = mem_page.contents.as_ref().unwrap(); + + let cell_idx = self.stack.current_cell_index() as usize; + + if cell_idx <= contents.cell_count() { + break; + } + } + } + + let cell_idx = self.stack.current_cell_index() as usize; + + assert!(cell_idx <= contents.cell_count(),); + assert!(!contents.is_leaf()); + + if cell_idx == contents.cell_count() { + // Move to right child + // should be safe as contents is not a leaf page + let right_most_pointer = contents.rightmost_pointer().unwrap(); + self.stack.advance(); + let mem_page = self.pager.read_page(right_most_pointer as usize)?; + self.going_upwards = false; + self.stack.push(mem_page); + } else { + // Move to child left page + let cell = contents.cell_get( + cell_idx, + payload_overflow_threshold_max( + contents.page_type(), + self.usable_space() as u16, + ), + payload_overflow_threshold_min( + contents.page_type(), + self.usable_space() as u16, + ), + self.usable_space(), + )?; + + match cell { + BTreeCell::TableInteriorCell(TableInteriorCell { + _left_child_page: left_child_page, + .. + }) + | BTreeCell::IndexInteriorCell(IndexInteriorCell { + left_child_page, .. + }) => { + self.stack.advance(); + let mem_page = self.pager.read_page(left_child_page as usize)?; + self.going_upwards = false; + self.stack.push(mem_page); + } + _ => unreachable!(), + } + } + } + } } #[cfg(debug_assertions)] diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 4dc72249b..6557ca169 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -24,6 +24,7 @@ use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMeta use super::order_by::{emit_order_by, init_order_by, SortMetadata}; use super::plan::{JoinOrderMember, Operation, SelectPlan, TableReference, UpdatePlan}; use super::schema::ParseSchema; +use super::select::emit_simple_count; use super::subquery::emit_subqueries; #[derive(Debug)] @@ -288,6 +289,11 @@ pub fn emit_query<'a>( OperationMode::SELECT, )?; + if plan.is_simple_count() { + emit_simple_count(program, t_ctx, plan)?; + return Ok(t_ctx.reg_result_cols_start.unwrap()); + } + for where_term in plan .where_clause .iter() @@ -323,6 +329,7 @@ pub fn emit_query<'a>( // Clean up and close the main execution loop close_loop(program, t_ctx, &plan.table_references, &plan.join_order)?; + program.preassign_label_to_next_insn(after_main_loop_label); let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index dd73e7e26..e34efb29b 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -343,6 +343,48 @@ impl SelectPlan { pub fn group_by_sorter_column_count(&self) -> usize { self.agg_args_count() + self.group_by_col_count() + self.non_group_by_non_agg_column_count() } + + /// Reference: https://github.com/sqlite/sqlite/blob/5db695197b74580c777b37ab1b787531f15f7f9f/src/select.c#L8613 + /// + /// Checks to see if the query is of the format `SELECT count(*) FROM ` + pub fn is_simple_count(&self) -> bool { + if !self.where_clause.is_empty() + || self.aggregates.len() != 1 + || matches!(self.query_type, SelectQueryType::Subquery { .. }) + || self.table_references.len() != 1 + || self.result_columns.len() != 1 + || self.group_by.is_some() + || self.contains_constant_false_condition + // TODO: (pedrocarlo) maybe can optimize to use the count optmization with more columns + { + return false; + } + let table_ref = self.table_references.first().unwrap(); + if !matches!(table_ref.table, crate::schema::Table::BTree(..)) { + return false; + } + let agg = self.aggregates.first().unwrap(); + if !matches!(agg.func, AggFunc::Count0) { + return false; + } + + let count = limbo_sqlite3_parser::ast::Expr::FunctionCall { + name: limbo_sqlite3_parser::ast::Id("count".to_string()), + distinctness: None, + args: None, + order_by: None, + filter_over: None, + }; + let count_star = limbo_sqlite3_parser::ast::Expr::FunctionCallStar { + name: limbo_sqlite3_parser::ast::Id("count".to_string()), + filter_over: None, + }; + let result_col_expr = &self.result_columns.get(0).unwrap().expr; + if *result_col_expr != count && *result_col_expr != count_star { + return false; + } + true + } } #[allow(dead_code)] diff --git a/core/translate/select.rs b/core/translate/select.rs index 1bf6905da..d6766a20f 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,4 +1,4 @@ -use super::emitter::emit_program; +use super::emitter::{emit_program, TranslateCtx}; use super::plan::{select_star, JoinOrderMember, Operation, Search, SelectQueryType}; use super::planner::Scope; use crate::function::{AggFunc, ExtFunc, Func}; @@ -10,6 +10,7 @@ use crate::translate::planner::{ }; use crate::util::normalize_ident; use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode}; +use crate::vdbe::insn::Insn; use crate::SymbolTable; use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result}; use limbo_sqlite3_parser::ast::{self, SortOrder}; @@ -484,3 +485,41 @@ fn estimate_num_labels(select: &SelectPlan) -> usize { num_labels } + +pub fn emit_simple_count<'a>( + program: &mut ProgramBuilder, + _t_ctx: &mut TranslateCtx<'a>, + plan: &'a SelectPlan, +) -> Result<()> { + let cursors = plan + .table_references + .get(0) + .unwrap() + .resolve_cursors(program)?; + + let cursor_id = { + match cursors { + (_, Some(cursor_id)) | (Some(cursor_id), None) => cursor_id, + _ => panic!("cursor for table should have been opened"), + } + }; + + // TODO: I think this allocation can be avoided if we are smart with the `TranslateCtx` + let target_reg = program.alloc_register(); + + program.emit_insn(Insn::Count { + cursor_id, + target_reg, + exact: true, + }); + + program.emit_insn(Insn::Close { cursor_id }); + let output_reg = program.alloc_register(); + program.emit_insn(Insn::Copy { + src_reg: target_reg, + dst_reg: output_reg, + amount: 0, + }); + program.emit_result_row(output_reg, 1); + Ok(()) +} diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 173233bf4..d6ab6ba4d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -4749,6 +4749,35 @@ pub fn op_affinity( Ok(InsnFunctionStepResult::Step) } +pub fn op_count( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::Count { + cursor_id, + target_reg, + exact, + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + + let count = { + let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Count"); + let cursor = cursor.as_btree_mut(); + let count = return_if_io!(cursor.count()); + count + }; + + state.registers[*target_reg] = Register::OwnedValue(OwnedValue::Integer(count as i64)); + + state.pc += 1; + Ok(InsnFunctionStepResult::Step) +} + fn exec_lower(reg: &OwnedValue) -> Option { match reg { OwnedValue::Text(t) => Some(OwnedValue::build_text(&t.as_str().to_lowercase())), diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index e2720dcb7..ff5892ddb 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1448,6 +1448,19 @@ pub fn insn_to_str( .join(", ") ), ), + Insn::Count { + cursor_id, + target_reg, + exact, + } => ( + "Count", + *cursor_id as i32, + *target_reg as i32, + if *exact { 0 } else { 1 }, + OwnedValue::build_text(""), + 0, + "".to_string(), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 8eea17e48..587695d30 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -857,6 +857,16 @@ pub enum Insn { count: NonZeroUsize, affinities: String, }, + + /// Store the number of entries (an integer value) in the table or index opened by cursor P1 in register P2. + /// + /// If P3==0, then an exact count is obtained, which involves visiting every btree page of the table. + /// But if P3 is non-zero, an estimate is returned based on the current cursor position. + Count { + cursor_id: CursorID, + target_reg: usize, + exact: bool, + }, } impl Insn { @@ -977,6 +987,7 @@ impl Insn { Insn::NotFound { .. } => execute::op_not_found, Insn::Affinity { .. } => execute::op_affinity, Insn::IdxDelete { .. } => execute::op_idx_delete, + Insn::Count { .. } => execute::op_count, } } }