Merge 'Small fixes' from Nikita Sivukhin

Attempt to improve performance of `SELECT COUNT(*) FROM t WHERE 1 = 1`
(redundant filter added to force sqlite and tursodb to iterate over rows
instead of applying count optimization).
Before that PR turso with hot cache runs for ~440ms.
After that PR turso with hot cache runs for ~200ms.
This PR eliminates small things on a hot path and also introduce
separate flow for `get_next_record` which just increment cell idx if its
safe to do that - without much additional overhead.
Also, this PR eliminates RefCell/Cell from PageStack because there were
completely unnecessary.

Reviewed-by: Preston Thorpe <preston@turso.tech>
Reviewed-by: Pere Diaz Bou <pere-altea@homail.com>

Closes #2887
This commit is contained in:
Jussi Saurio
2025-09-08 09:45:35 +03:00
committed by GitHub
7 changed files with 177 additions and 179 deletions

View File

@@ -595,9 +595,9 @@ impl BTreeCursor {
},
overflow_state: OverflowState::Start,
stack: PageStack {
current_page: Cell::new(-1),
node_states: RefCell::new([BTreeNodeState::default(); BTCURSOR_MAX_DEPTH + 1]),
stack: RefCell::new([const { None }; BTCURSOR_MAX_DEPTH + 1]),
current_page: -1,
node_states: [BTreeNodeState::default(); BTCURSOR_MAX_DEPTH + 1],
stack: [const { None }; BTCURSOR_MAX_DEPTH + 1],
},
reusable_immutable_record: RefCell::new(None),
index_info: None,
@@ -721,6 +721,7 @@ impl BTreeCursor {
continue;
}
}
if cell_idx >= cell_count as i32 {
self.stack.set_cell_index(cell_count as i32 - 1);
} else if !self.stack.current_cell_index_less_than_min() {
@@ -1182,7 +1183,7 @@ impl BTreeCursor {
/// Check if any ancestor pages still have cells to iterate.
/// If not, traversing back up to parent is of no use because we are at the end of the tree.
fn ancestor_pages_have_more_children(&self) -> bool {
let node_states = self.stack.node_states.borrow();
let node_states = self.stack.node_states;
(0..self.stack.current())
.rev()
.any(|idx| !node_states[idx].is_at_end())
@@ -1204,9 +1205,18 @@ impl BTreeCursor {
}
}
loop {
let mem_page = self.stack.top();
let mem_page = self.stack.top_ref();
let contents = mem_page.get_contents();
let cell_idx = self.stack.current_cell_index();
let cell_count = contents.cell_count();
let is_leaf = contents.is_leaf();
if cell_idx != -1 && is_leaf && cell_idx as usize + 1 < cell_count {
self.stack.advance();
return Ok(IOResult::Done(true));
}
let mem_page = mem_page.clone();
let contents = mem_page.get_contents();
tracing::debug!(
id = mem_page.get().id,
cell = self.stack.current_cell_index(),
@@ -1264,15 +1274,15 @@ impl BTreeCursor {
}
turso_assert!(
cell_idx < contents.cell_count(),
cell_idx < cell_count,
"cell index out of bounds: cell_idx={}, cell_count={}, page_type={:?} page_id={}",
cell_idx,
contents.cell_count(),
cell_count,
contents.page_type(),
mem_page.get().id
);
if contents.is_leaf() {
if is_leaf {
return Ok(IOResult::Done(true));
}
if is_index && self.going_upwards {
@@ -4203,7 +4213,6 @@ impl BTreeCursor {
let cursor_has_record = return_if_io!(self.get_next_record());
self.has_record.replace(cursor_has_record);
self.invalidate_record();
self.advance_state = AdvanceState::Start;
return Ok(IOResult::Done(cursor_has_record));
}
}
@@ -4231,7 +4240,6 @@ impl BTreeCursor {
let cursor_has_record = return_if_io!(self.get_prev_record());
self.has_record.replace(cursor_has_record);
self.invalidate_record();
self.advance_state = AdvanceState::Start;
return Ok(IOResult::Done(cursor_has_record));
}
}
@@ -4586,7 +4594,6 @@ impl BTreeCursor {
// Ensure we keep the parent page at the same position as before the replacement.
self.stack
.node_states
.borrow_mut()
.get_mut(btree_depth)
.expect("parent page should be on the stack")
.cell_idx = cell_idx as i32;
@@ -5149,7 +5156,8 @@ impl BTreeCursor {
}
fn get_immutable_record_or_create(&self) -> std::cell::RefMut<'_, Option<ImmutableRecord>> {
if self.reusable_immutable_record.borrow().is_none() {
let mut reusable_immutable_record = self.reusable_immutable_record.borrow_mut();
if reusable_immutable_record.is_none() {
let page_size = self
.pager
.page_size
@@ -5157,9 +5165,9 @@ impl BTreeCursor {
.expect("page size is not set")
.get();
let record = ImmutableRecord::new(page_size as usize);
self.reusable_immutable_record.replace(Some(record));
reusable_immutable_record.replace(record);
}
self.reusable_immutable_record.borrow_mut()
reusable_immutable_record
}
fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option<ImmutableRecord>> {
@@ -5884,9 +5892,9 @@ impl CoverageChecker {
/// the parent. Using current_page + 1 or higher is undefined behaviour.
struct PageStack {
/// Pointer to the current page being consumed
current_page: Cell<i32>,
current_page: i32,
/// List of pages in the stack. Root page will be in index 0
pub stack: RefCell<[Option<PageRef>; BTCURSOR_MAX_DEPTH + 1]>,
pub stack: [Option<PageRef>; BTCURSOR_MAX_DEPTH + 1],
/// List of cell indices in the stack.
/// node_states[current_page] is the current cell index being consumed. Similarly
/// node_states[current_page-1] is the cell index of the parent of the current page
@@ -5894,32 +5902,21 @@ struct PageStack {
/// There are two points that need special attention:
/// If node_states[current_page] = -1, it indicates that the current iteration has reached the start of the current_page
/// If node_states[current_page] = `cell_count`, it means that the current iteration has reached the end of the current_page
node_states: RefCell<[BTreeNodeState; BTCURSOR_MAX_DEPTH + 1]>,
node_states: [BTreeNodeState; BTCURSOR_MAX_DEPTH + 1],
}
impl PageStack {
fn increment_current(&self) {
self.current_page.set(self.current_page.get() + 1);
}
fn decrement_current(&self) {
assert!(self.current_page.get() > 0);
self.current_page.set(self.current_page.get() - 1);
}
/// Push a new page onto the stack.
/// This effectively means traversing to a child page.
#[instrument(skip_all, level = Level::DEBUG, name = "pagestack::push")]
fn _push(&self, page: PageRef, starting_cell_idx: i32) {
tracing::trace!(
current = self.current_page.get(),
new_page_id = page.get().id,
);
fn _push(&mut self, page: PageRef, starting_cell_idx: i32) {
tracing::trace!(current = self.current_page, new_page_id = page.get().id,);
'validate: {
let current = self.current_page.get();
let current = self.current_page;
if current == -1 {
break 'validate;
}
let stack = self.stack.borrow();
let current_top = stack[current as usize].as_ref();
let current_top = self.stack[current as usize].as_ref();
if let Some(current_top) = current_top {
turso_assert!(
current_top.get().id != page.get().id,
@@ -5929,19 +5926,19 @@ impl PageStack {
}
}
self.populate_parent_cell_count();
self.increment_current();
let current = self.current_page.get();
self.current_page += 1;
assert!(self.current_page >= 0);
let current = self.current_page as usize;
assert!(
current < BTCURSOR_MAX_DEPTH as i32,
current < BTCURSOR_MAX_DEPTH,
"corrupted database, stack is bigger than expected"
);
assert!(current >= 0);
// Pin the page to prevent it from being evicted while on the stack
page.pin();
self.stack.borrow_mut()[current as usize] = Some(page);
self.node_states.borrow_mut()[current as usize] = BTreeNodeState {
self.stack[current] = Some(page);
self.node_states[current] = BTreeNodeState {
cell_idx: starting_cell_idx,
cell_count: None, // we don't know the cell count yet, so we set it to None. any code pushing a child page onto the stack MUST set the parent page's cell_count.
};
@@ -5953,14 +5950,13 @@ impl PageStack {
///
/// This rests on the assumption that the parent page is already in memory whenever a child is pushed onto the stack.
/// We currently ensure this by pinning all the pages on [PageStack] to the page cache so that they cannot be evicted.
fn populate_parent_cell_count(&self) {
let stack_empty = self.current_page.get() == -1;
fn populate_parent_cell_count(&mut self) {
let stack_empty = self.current_page == -1;
if stack_empty {
return;
}
let current = self.current();
let stack = self.stack.borrow();
let page = stack[current].as_ref().unwrap();
let page = self.stack[current].as_ref().unwrap();
turso_assert!(
page.is_pinned(),
"parent page {} is not pinned",
@@ -5973,59 +5969,64 @@ impl PageStack {
);
let contents = page.get_contents();
let cell_count = contents.cell_count() as i32;
self.node_states.borrow_mut()[current].cell_count = Some(cell_count);
self.node_states[current].cell_count = Some(cell_count);
}
fn push(&self, page: PageRef) {
fn push(&mut self, page: PageRef) {
self._push(page, -1);
}
fn push_backwards(&self, page: PageRef) {
fn push_backwards(&mut self, page: PageRef) {
self._push(page, i32::MAX);
}
/// Pop a page off the stack.
/// This effectively means traversing back up to a parent page.
#[instrument(skip_all, level = Level::DEBUG, name = "pagestack::pop")]
fn pop(&self) {
let current = self.current_page.get();
fn pop(&mut self) {
let current = self.current_page;
assert!(current >= 0);
tracing::trace!(current);
let current = current as usize;
// Unpin the page before removing it from the stack
if let Some(page) = &self.stack.borrow()[current as usize] {
if let Some(page) = &self.stack[current] {
page.unpin();
}
self.node_states.borrow_mut()[current as usize] = BTreeNodeState::default();
self.stack.borrow_mut()[current as usize] = None;
self.decrement_current();
assert!(current > 0);
self.node_states[current] = BTreeNodeState::default();
self.stack[current] = None;
self.current_page -= 1;
}
/// Get the top page on the stack.
/// This is the page that is currently being traversed.
#[instrument(skip(self), level = Level::DEBUG, name = "pagestack::top", )]
fn top(&self) -> Arc<Page> {
let page = self.stack.borrow()[self.current()]
.as_ref()
.unwrap()
.clone();
tracing::trace!(current = self.current(), page_id = page.get().id);
let current = self.current();
let page = self.stack[current].clone().unwrap();
turso_assert!(page.is_loaded(), "page should be loaded");
page
}
fn top_ref(&self) -> &Arc<Page> {
let current = self.current();
let page = self.stack[current].as_ref().unwrap();
turso_assert!(page.is_loaded(), "page should be loaded");
page
}
/// Current page pointer being used
#[inline(always)]
fn current(&self) -> usize {
let current = self.current_page.get() as usize;
assert!(self.current_page.get() >= 0);
current
assert!(self.current_page >= 0);
self.current_page as usize
}
/// Cell index of the current page
fn current_cell_index(&self) -> i32 {
let current = self.current();
self.node_states.borrow()[current].cell_idx
self.node_states[current].cell_idx
}
/// Check if the current cell index is less than 0.
@@ -6037,59 +6038,50 @@ impl PageStack {
/// Advance the current cell index of the current page to the next cell.
/// We usually advance after going traversing a new page
#[instrument(skip(self), level = Level::DEBUG, name = "pagestack::advance",)]
fn advance(&self) {
#[inline(always)]
fn advance(&mut self) {
let current = self.current();
tracing::trace!(
curr_cell_index = self.node_states.borrow()[current].cell_idx,
node_states = ?self.node_states.borrow().iter().map(|state| state.cell_idx).collect::<Vec<_>>(),
);
self.node_states.borrow_mut()[current].cell_idx += 1;
self.node_states[current].cell_idx += 1;
}
#[instrument(skip(self), level = Level::DEBUG, name = "pagestack::retreat")]
fn retreat(&self) {
fn retreat(&mut self) {
let current = self.current();
tracing::trace!(
curr_cell_index = self.node_states.borrow()[current].cell_idx,
node_states = ?self.node_states.borrow().iter().map(|state| state.cell_idx).collect::<Vec<_>>(),
curr_cell_index = self.node_states[current].cell_idx,
node_states = ?self.node_states.iter().map(|state| state.cell_idx).collect::<Vec<_>>(),
);
self.node_states.borrow_mut()[current].cell_idx -= 1;
self.node_states[current].cell_idx -= 1;
}
fn set_cell_index(&self, idx: i32) {
fn set_cell_index(&mut self, idx: i32) {
let current = self.current();
self.node_states.borrow_mut()[current].cell_idx = idx;
self.node_states[current].cell_idx = idx;
}
fn has_parent(&self) -> bool {
self.current_page.get() > 0
self.current_page > 0
}
/// Get a page at a specific level in the stack (0 = root, 1 = first child, etc.)
fn get_page_at_level(&self, level: usize) -> Option<PageRef> {
let stack = self.stack.borrow();
if level < stack.len() {
stack[level].clone()
if level < self.stack.len() {
self.stack[level].clone()
} else {
None
}
}
fn unpin_all_if_pinned(&self) {
self.stack
.borrow_mut()
.iter_mut()
.flatten()
.for_each(|page| {
let _ = page.try_unpin();
});
fn unpin_all_if_pinned(&mut self) {
self.stack.iter_mut().flatten().for_each(|page| {
let _ = page.try_unpin();
});
}
fn clear(&self) {
fn clear(&mut self) {
self.unpin_all_if_pinned();
self.current_page.set(-1);
self.current_page = -1;
}
}

View File

@@ -759,83 +759,89 @@ impl Ord for Value {
impl std::ops::Add<Value> for Value {
type Output = Value;
fn add(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Self::Integer(int_left), Self::Integer(int_right)) => {
Self::Integer(int_left + int_right)
}
(Self::Integer(int_left), Self::Float(float_right)) => {
Self::Float(int_left as f64 + float_right)
}
(Self::Float(float_left), Self::Integer(int_right)) => {
Self::Float(float_left + int_right as f64)
}
(Self::Float(float_left), Self::Float(float_right)) => {
Self::Float(float_left + float_right)
}
(Self::Text(string_left), Self::Text(string_right)) => {
Self::build_text(&(string_left.as_str().to_string() + string_right.as_str()))
}
(Self::Text(string_left), Self::Integer(int_right)) => {
Self::build_text(&(string_left.as_str().to_string() + &int_right.to_string()))
}
(Self::Integer(int_left), Self::Text(string_right)) => {
Self::build_text(&(int_left.to_string() + string_right.as_str()))
}
(Self::Text(string_left), Self::Float(float_right)) => {
let string_right = Self::Float(float_right).to_string();
Self::build_text(&(string_left.as_str().to_string() + &string_right))
}
(Self::Float(float_left), Self::Text(string_right)) => {
let string_left = Self::Float(float_left).to_string();
Self::build_text(&(string_left + string_right.as_str()))
}
(lhs, Self::Null) => lhs,
(Self::Null, rhs) => rhs,
_ => Self::Float(0.0),
}
fn add(mut self, rhs: Self) -> Self::Output {
self += rhs;
self
}
}
impl std::ops::Add<f64> for Value {
type Output = Value;
fn add(self, rhs: f64) -> Self::Output {
match self {
Self::Integer(int_left) => Self::Float(int_left as f64 + rhs),
Self::Float(float_left) => Self::Float(float_left + rhs),
_ => unreachable!(),
}
fn add(mut self, rhs: f64) -> Self::Output {
self += rhs;
self
}
}
impl std::ops::Add<i64> for Value {
type Output = Value;
fn add(self, rhs: i64) -> Self::Output {
match self {
Self::Integer(int_left) => Self::Integer(int_left + rhs),
Self::Float(float_left) => Self::Float(float_left + rhs as f64),
_ => unreachable!(),
}
fn add(mut self, rhs: i64) -> Self::Output {
self += rhs;
self
}
}
impl std::ops::AddAssign for Value {
fn add_assign(&mut self, rhs: Self) {
*self = self.clone() + rhs;
fn add_assign(mut self: &mut Self, rhs: Self) {
match (&mut self, rhs) {
(Self::Integer(int_left), Self::Integer(int_right)) => *int_left += int_right,
(Self::Integer(int_left), Self::Float(float_right)) => {
*self = Self::Float(*int_left as f64 + float_right)
}
(Self::Float(float_left), Self::Integer(int_right)) => {
*self = Self::Float(*float_left + int_right as f64)
}
(Self::Float(float_left), Self::Float(float_right)) => {
*float_left += float_right;
}
(Self::Text(string_left), Self::Text(string_right)) => {
string_left.value.extend_from_slice(&string_right.value);
string_left.subtype = TextSubtype::Text;
}
(Self::Text(string_left), Self::Integer(int_right)) => {
let string_right = int_right.to_string();
string_left.value.extend_from_slice(string_right.as_bytes());
string_left.subtype = TextSubtype::Text;
}
(Self::Integer(int_left), Self::Text(string_right)) => {
let string_left = int_left.to_string();
*self = Self::build_text(&(string_left + string_right.as_str()));
}
(Self::Text(string_left), Self::Float(float_right)) => {
let string_right = Self::Float(float_right).to_string();
string_left.value.extend_from_slice(string_right.as_bytes());
string_left.subtype = TextSubtype::Text;
}
(Self::Float(float_left), Self::Text(string_right)) => {
let string_left = Self::Float(*float_left).to_string();
*self = Self::build_text(&(string_left + string_right.as_str()));
}
(_, Self::Null) => {}
(Self::Null, rhs) => *self = rhs,
_ => *self = Self::Float(0.0),
}
}
}
impl std::ops::AddAssign<i64> for Value {
fn add_assign(&mut self, rhs: i64) {
*self = self.clone() + rhs;
match self {
Self::Integer(int_left) => *int_left += rhs,
Self::Float(float_left) => *float_left += rhs as f64,
_ => unreachable!(),
}
}
}
impl std::ops::AddAssign<f64> for Value {
fn add_assign(&mut self, rhs: f64) {
*self = self.clone() + rhs;
match self {
Self::Integer(int_left) => *self = Self::Float(*int_left as f64 + rhs),
Self::Float(float_left) => *float_left += rhs,
_ => unreachable!(),
}
}
}
@@ -2475,9 +2481,10 @@ impl<T> IOResult<T> {
#[macro_export]
macro_rules! return_if_io {
($expr:expr) => {
match $expr? {
IOResult::Done(v) => v,
IOResult::IO(io) => return Ok(IOResult::IO(io)),
match $expr {
Ok(IOResult::Done(v)) => v,
Ok(IOResult::IO(io)) => return Ok(IOResult::IO(io)),
Err(err) => return Err(err),
}
};
}

View File

@@ -118,9 +118,10 @@ macro_rules! load_insn {
macro_rules! return_if_io {
($expr:expr) => {
match $expr? {
IOResult::Done(v) => v,
IOResult::IO(io) => return Ok(InsnFunctionStepResult::IO(io)),
match $expr {
Ok(IOResult::Done(v)) => v,
Ok(IOResult::IO(io)) => return Ok(InsnFunctionStepResult::IO(io)),
Err(err) => return Err(err),
}
};
}
@@ -3494,22 +3495,22 @@ pub fn op_agg_step(
}
}
AggFunc::Count | AggFunc::Count0 => {
let col = state.registers[*col].get_value().clone();
let skip = (matches!(func, AggFunc::Count)
&& matches!(state.registers[*col].get_value(), Value::Null));
if matches!(&state.registers[*acc_reg], Register::Value(Value::Null)) {
state.registers[*acc_reg] =
Register::Aggregate(AggContext::Count(Value::Integer(0)));
}
let Register::Aggregate(agg) = state.registers[*acc_reg].borrow_mut() else {
let Register::Aggregate(agg) = &mut state.registers[*acc_reg] else {
panic!(
"Unexpected value {:?} in AggStep at register {}",
state.registers[*acc_reg], *acc_reg
);
};
let AggContext::Count(count) = agg.borrow_mut() else {
let AggContext::Count(count) = agg else {
unreachable!();
};
if !(matches!(func, AggFunc::Count) && matches!(col, Value::Null)) {
if !skip {
*count += 1;
};
}

View File

@@ -415,15 +415,16 @@ impl Register {
macro_rules! must_be_btree_cursor {
($cursor_id:expr, $cursor_ref:expr, $state:expr, $insn_name:expr) => {{
let (_, cursor_type) = $cursor_ref.get($cursor_id).unwrap();
let cursor = match cursor_type {
CursorType::BTreeTable(_) => $crate::get_cursor!($state, $cursor_id),
CursorType::BTreeIndex(_) => $crate::get_cursor!($state, $cursor_id),
CursorType::MaterializedView(_, _) => $crate::get_cursor!($state, $cursor_id),
CursorType::Pseudo(_) => panic!("{} on pseudo cursor", $insn_name),
CursorType::Sorter => panic!("{} on sorter cursor", $insn_name),
CursorType::VirtualTable(_) => panic!("{} on virtual table cursor", $insn_name),
};
cursor
if matches!(
cursor_type,
CursorType::BTreeTable(_)
| CursorType::BTreeIndex(_)
| CursorType::MaterializedView(_, _)
) {
$crate::get_cursor!($state, $cursor_id)
} else {
panic!("{} on unexpected cursor", $insn_name)
}
}};
}
@@ -471,6 +472,7 @@ impl Program {
mv_store: Option<Arc<MvStore>>,
pager: Rc<Pager>,
) -> Result<StepResult> {
let enable_tracing = tracing::enabled!(tracing::Level::TRACE);
loop {
if self.connection.closed.get() {
// Connection is closed for whatever reason, rollback the transaction.
@@ -497,7 +499,9 @@ impl Program {
// invalidate row
let _ = state.result_row.take();
let (insn, insn_function) = &self.insns[state.pc as usize];
trace_insn(self, state.pc as InsnReference, insn);
if enable_tracing {
trace_insn(self, state.pc as InsnReference, insn);
}
// Always increment VM steps for every loop iteration
state.metrics.vm_steps = state.metrics.vm_steps.saturating_add(1);
@@ -832,9 +836,6 @@ pub fn registers_to_ref_values(registers: &[Register]) -> Vec<RefValue> {
#[instrument(skip(program), level = Level::DEBUG)]
fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) {
if !tracing::enabled!(tracing::Level::TRACE) {
return;
}
tracing::trace!(
"\n{}",
explain::insn_to_str(

View File

@@ -64,9 +64,6 @@ impl TempDatabase {
}
pub fn new_with_rusqlite(table_sql: &str, enable_indexes: bool) -> Self {
let _ = tracing_subscriber::fmt()
.with_max_level(tracing::Level::TRACE)
.finish();
let mut path = TempDir::new().unwrap().keep();
path.push("test.db");
{

View File

@@ -1,16 +1,3 @@
mod test_cdc;
mod test_function_rowid;
mod test_wal_api;
#[cfg(test)]
mod tests {
use tracing_subscriber::EnvFilter;
#[ctor::ctor]
fn init() {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_ansi(false)
.init();
}
}

View File

@@ -5,3 +5,16 @@ mod fuzz_transaction;
mod pragma;
mod query_processing;
mod wal;
#[cfg(test)]
mod tests {
use tracing_subscriber::EnvFilter;
#[ctor::ctor]
fn init() {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_ansi(false)
.init();
}
}