correct count implementation

This commit is contained in:
pedrocarlo
2025-05-03 20:28:32 -03:00
parent b69debb8c0
commit 655ceeca45
4 changed files with 277 additions and 14 deletions

View File

@@ -200,6 +200,20 @@ enum ReadPayloadOverflow {
},
}
struct CountInfo {
state: CountState,
/// Maintain count of the number of records in the btree. Used for the `Count` opcode
count: usize,
}
/// State Machine of Count Operation
#[derive(Debug, Clone, Copy)]
enum CountState {
LoadPage,
MoveParent,
MoveChild,
}
#[derive(Clone, Debug)]
pub enum BTreeKey<'a> {
TableRowId((u64, Option<&'a ImmutableRecord>)),
@@ -289,6 +303,7 @@ enum CursorState {
Write(WriteInfo),
Destroy(DestroyInfo),
Delete(DeleteInfo),
Count(CountInfo),
}
impl CursorState {
@@ -331,6 +346,20 @@ impl CursorState {
_ => None,
}
}
fn count_info(&self) -> Option<&CountInfo> {
match self {
CursorState::Count(x) => Some(x),
_ => None,
}
}
fn mut_count_info(&mut self) -> Option<&mut CountInfo> {
match self {
CursorState::Count(x) => Some(x),
_ => None,
}
}
}
enum OverflowState {
@@ -4228,6 +4257,147 @@ impl BTreeCursor {
_ => false,
}
}
/// Count the number of entries in the b-tree
///
/// Only supposed to be used in the context of a simple Count Select Statement
pub fn count(&mut self) -> Result<CursorResult<usize>> {
if let Some(_mv_cursor) = &self.mv_cursor {
todo!("Implement count for mvcc");
}
if let CursorState::None = &self.state {
self.move_to_root();
self.state = CursorState::Count(CountInfo {
state: CountState::LoadPage,
count: 0,
});
}
loop {
let count_state = self
.state
.count_info()
.expect("count info should have been set")
.state;
match count_state {
CountState::LoadPage => {
let mem_page_rc = self.stack.top();
return_if_locked_maybe_load!(self.pager, mem_page_rc);
let mem_page = mem_page_rc.get();
let contents = mem_page.contents.as_ref().unwrap();
let cell_idx = self.stack.current_cell_index() as usize;
let count_info = self
.state
.mut_count_info()
.expect("count info should have been set");
/* If this is a leaf page or the tree is not an int-key tree, then
** this page contains countable entries. Increment the entry counter
** accordingly.
*/
// TODO: (pedrocarlo) does not take into account if "tree is not an int-key tree" condition
if contents.is_leaf() {
count_info.count += contents.cell_count();
}
if contents.is_leaf() || cell_idx > contents.cell_count() {
count_info.state = CountState::MoveParent;
} else {
count_info.state = CountState::MoveChild;
}
}
CountState::MoveParent => {
let mem_page_rc = self.stack.top();
assert!(mem_page_rc.is_loaded());
if !self.stack.has_parent() {
// All pages of the b-tree have been visited. Return successfully
self.move_to_root();
// Borrow count_info here like this due to borrow checking
let count_info = self
.state
.mut_count_info()
.expect("count info should have been set");
return Ok(CursorResult::Ok(count_info.count));
}
let count_info = self
.state
.mut_count_info()
.expect("count info should have been set");
// Move to parent
self.going_upwards = true;
self.stack.pop();
count_info.state = CountState::LoadPage;
}
CountState::MoveChild => {
let mem_page_rc = self.stack.top();
assert!(mem_page_rc.is_loaded());
let contents = mem_page_rc.get().contents.as_ref().unwrap();
let cell_idx = self.stack.current_cell_index() as usize;
assert!(!contents.is_leaf());
assert!(
cell_idx <= contents.cell_count(),
"cell_idx={} cell_count={}",
cell_idx,
contents.cell_count()
);
if cell_idx == contents.cell_count() {
// Move to right child
// should be safe as contents is not a leaf page
let right_most_pointer = contents.rightmost_pointer().unwrap();
self.stack.advance();
let mem_page = self.pager.read_page(right_most_pointer as usize)?;
self.going_upwards = false;
self.stack.push(mem_page);
} else {
// Move to child left page
let cell = contents.cell_get(
cell_idx,
payload_overflow_threshold_max(
contents.page_type(),
self.usable_space() as u16,
),
payload_overflow_threshold_min(
contents.page_type(),
self.usable_space() as u16,
),
self.usable_space(),
)?;
match cell {
BTreeCell::TableInteriorCell(TableInteriorCell {
_left_child_page: left_child_page,
..
})
| BTreeCell::IndexInteriorCell(IndexInteriorCell {
left_child_page,
..
}) => {
self.stack.advance();
let mem_page = self.pager.read_page(left_child_page as usize)?;
self.going_upwards = false;
self.stack.push(mem_page);
}
_ => unreachable!(),
}
}
let count_info = self
.state
.mut_count_info()
.expect("count info should have been set");
count_info.state = CountState::LoadPage;
}
}
}
}
}
#[cfg(debug_assertions)]

View File

@@ -23,6 +23,7 @@ use super::group_by::{
use super::main_loop::{close_loop, emit_loop, init_loop, open_loop, LeftJoinMetadata, LoopLabels};
use super::order_by::{emit_order_by, init_order_by, SortMetadata};
use super::plan::{JoinOrderMember, Operation, SelectPlan, TableReference, UpdatePlan};
use super::select::{emit_simple_count, is_simple_count};
use super::schema::ParseSchema;
use super::subquery::emit_subqueries;
@@ -232,6 +233,8 @@ pub fn emit_query<'a>(
plan: &'a mut SelectPlan,
t_ctx: &'a mut TranslateCtx<'a>,
) -> Result<usize> {
let is_simple_count = is_simple_count(plan);
// Emit subqueries first so the results can be read in the main query loop.
emit_subqueries(program, t_ctx, &mut plan.table_references)?;
@@ -309,20 +312,23 @@ pub fn emit_query<'a>(
program.preassign_label_to_next_insn(jump_target_when_true);
}
// Set up main query execution loop
open_loop(
program,
t_ctx,
&plan.table_references,
&plan.join_order,
&plan.where_clause,
)?;
if !is_simple_count {
// Set up main query execution loop
open_loop(
program,
t_ctx,
&plan.table_references,
&plan.join_order,
&plan.where_clause,
)?;
// Process result columns and expressions in the inner loop
emit_loop(program, t_ctx, plan)?;
// Process result columns and expressions in the inner loop
emit_loop(program, t_ctx, plan)?;
// Clean up and close the main execution loop
close_loop(program, t_ctx, &plan.table_references, &plan.join_order)?;
}
// Clean up and close the main execution loop
close_loop(program, t_ctx, &plan.table_references, &plan.join_order)?;
program.preassign_label_to_next_insn(after_main_loop_label);
let mut order_by_necessary = plan.order_by.is_some() && !plan.contains_constant_false_condition;
@@ -341,7 +347,11 @@ pub fn emit_query<'a>(
group_by_emit_row_phase(program, t_ctx, plan)?;
} else if !plan.aggregates.is_empty() {
// Handle aggregation without GROUP BY
emit_ungrouped_aggregation(program, t_ctx, plan)?;
if is_simple_count {
emit_simple_count(program, t_ctx, plan)?;
} else {
emit_ungrouped_aggregation(program, t_ctx, plan)?;
}
// Single row result for aggregates without GROUP BY, so ORDER BY not needed
order_by_necessary = false;
}

View File

@@ -1,4 +1,4 @@
use super::emitter::emit_program;
use super::emitter::{emit_program, TranslateCtx};
use super::plan::{select_star, JoinOrderMember, Operation, Search, SelectQueryType};
use super::planner::Scope;
use crate::function::{AggFunc, ExtFunc, Func};
@@ -10,6 +10,7 @@ use crate::translate::planner::{
};
use crate::util::normalize_ident;
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
use crate::vdbe::insn::Insn;
use crate::SymbolTable;
use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
use limbo_sqlite3_parser::ast::{self, SortOrder};
@@ -484,3 +485,67 @@ fn estimate_num_labels(select: &SelectPlan) -> usize {
num_labels
}
/// Reference: https://github.com/sqlite/sqlite/blob/5db695197b74580c777b37ab1b787531f15f7f9f/src/select.c#L8613
///
/// Checks to see if the query is of the format `SELECT count(*) FROM <tbl>`
pub fn is_simple_count(plan: &SelectPlan) -> bool {
// TODO: (pedrocarlo) check for HAVING clause
if !plan.where_clause.is_empty()
|| plan.aggregates.len() != 1
|| matches!(plan.query_type, SelectQueryType::Subquery { .. })
|| plan.table_references.len() != 1
|| plan.result_columns.len() != 1
|| plan.group_by.is_some()
// TODO: (pedrocarlo) maybe can optimize to use the count optmization with more columns
{
return false;
}
let table_ref = plan.table_references.first().unwrap();
if !matches!(table_ref.table, crate::schema::Table::BTree(..)) {
return false;
}
let agg = plan.aggregates.first().unwrap();
if !matches!(agg.func, AggFunc::Count0) {
return false;
}
true
}
pub fn emit_simple_count<'a>(
program: &mut ProgramBuilder,
_t_ctx: &mut TranslateCtx<'a>,
plan: &'a SelectPlan,
) -> Result<()> {
let cursors = plan
.table_references
.get(0)
.unwrap()
.resolve_cursors(program)?;
let cursor_id = {
match cursors {
(_, Some(cursor_id)) | (Some(cursor_id), None) => cursor_id,
_ => panic!("cursor for table should have been opened"),
}
};
// TODO: I think this allocation can be avoided if we are smart with the `TranslateCtx`
let target_reg = program.alloc_register();
program.emit_insn(Insn::Count {
cursor_id,
target_reg,
exact: true,
});
program.emit_insn(Insn::Close { cursor_id });
let output_reg = program.alloc_register();
program.emit_insn(Insn::Copy {
src_reg: target_reg,
dst_reg: output_reg,
amount: 0,
});
program.emit_result_row(output_reg, 1);
Ok(())
}

View File

@@ -4756,6 +4756,24 @@ pub fn op_count(
pager: &Rc<Pager>,
mv_store: Option<&Rc<MvStore>>,
) -> Result<InsnFunctionStepResult> {
let Insn::Count {
cursor_id,
target_reg,
exact,
} = insn
else {
unreachable!("unexpected Insn {:?}", insn)
};
let count = {
let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Count");
let cursor = cursor.as_btree_mut();
let count = return_if_io!(cursor.count());
count
};
state.registers[*target_reg] = Register::OwnedValue(OwnedValue::Integer(count as i64));
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
}