From 08b2e685d5315e7181789c3f63d1b678efe72296 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 5 Sep 2025 07:04:33 -0500 Subject: [PATCH 1/4] Persistence for DBSP-based materialized views This fairly long commit implements persistence for materialized view. It is hard to split because of all the interdependencies between components, so it is a one big thing. This commit message will at least try to go into details about the basic architecture. Materialized Views as tables ============================ Materialized views are now a normal table - whereas before they were a virtual table. By making a materialized view a table, we can reuse all the infrastructure for dealing with tables (cursors, etc). One of the advantages of doing this is that we can create indexes on view columns. Later, we should also be able to write those views to separate files with ATTACH write. Materialized Views as Zsets =========================== The contents of the table are a ZSet: rowid, values, weight. Readers will notice that because of this, the usage of the ZSet data structure dwindles throughout the codebase. The main difference between our materialized ZSet and the standard DBSP ZSet, is that obviously ours is backed by a BTree, not a Hash (since SQLite tables are BTrees) Aggregator State ================ In DBSP, the aggregator nodes also have state. To store that state, there is a second table. The table holds all aggregators in the view, and there is one table per view. That is __turso_internal_dbsp_state_{view_name}. The format of that table is similar to a ZSet: rowid, serialized_values, weight. We serialize the values because there will be many aggregators in the table. We can't rely on a particular format for the values. The Materialized View Cursor ============================ Reading from a Materialized View essentially means reading from the persisted ZSet, and enhancing that with data that exists within the transaction. Transaction data is ephemeral, so we do not materialize this anywhere: we have a carefully crafted implementation of seek that takes care of merging weights and stitching the two sets together. --- core/incremental/compiler.rs | 1324 +++++++++++++++-------- core/incremental/cursor.rs | 1618 ++++++++++++++++++++++++++++ core/incremental/dbsp.rs | 198 +++- core/incremental/hashable_row.rs | 20 + core/incremental/mod.rs | 1 + core/incremental/operator.rs | 1712 +++++++++++++++++++++--------- core/incremental/view.rs | 566 ++++++---- core/lib.rs | 25 +- core/schema.rs | 192 ++-- core/translate/delete.rs | 6 + core/translate/insert.rs | 6 + core/translate/main_loop.rs | 3 +- core/translate/plan.rs | 18 +- core/translate/planner.rs | 44 +- core/translate/schema.rs | 8 + core/translate/update.rs | 6 + core/translate/view.rs | 259 +++-- core/types.rs | 33 + core/util.rs | 137 ++- core/vdbe/builder.rs | 5 + core/vdbe/execute.rs | 329 ++++-- core/vdbe/explain.rs | 9 +- core/vdbe/insn.rs | 9 +- core/vdbe/mod.rs | 123 ++- core/vtab.rs | 30 +- core/vtab_view.rs | 101 -- 26 files changed, 5177 insertions(+), 1605 deletions(-) create mode 100644 core/incremental/cursor.rs delete mode 100644 core/vtab_view.rs diff --git a/core/incremental/compiler.rs b/core/incremental/compiler.rs index 9cd2e3702..6b9996a85 100644 --- a/core/incremental/compiler.rs +++ b/core/incremental/compiler.rs @@ -5,16 +5,221 @@ //! //! Based on the DBSP paper: "DBSP: Automatic Incremental View Maintenance for Rich Query Languages" +use crate::incremental::dbsp::Delta; use crate::incremental::expr_compiler::CompiledExpression; +use crate::incremental::hashable_row::HashableRow; use crate::incremental::operator::{ - Delta, FilterOperator, FilterPredicate, IncrementalOperator, ProjectOperator, + EvalState, FilterOperator, FilterPredicate, IncrementalOperator, InputOperator, ProjectOperator, }; +use crate::storage::btree::{BTreeCursor, BTreeKey}; // Note: logical module must be made pub(crate) in translate/mod.rs -use crate::translate::logical::{BinaryOperator, LogicalExpr, LogicalPlan, SchemaRef}; -use crate::types::Value; -use crate::{LimboError, Result}; +use crate::translate::logical::{ + BinaryOperator, LogicalExpr, LogicalPlan, LogicalSchema, SchemaRef, +}; +use crate::types::{IOResult, SeekKey, SeekOp, SeekResult, Value}; +use crate::Pager; +use crate::{return_and_restore_if_io, return_if_io, LimboError, Result}; use std::collections::HashMap; use std::fmt::{self, Display, Formatter}; +use std::rc::Rc; +use std::sync::Arc; + +// The state table is always a key-value store with 3 columns: key, state, and weight. +const OPERATOR_COLUMNS: usize = 3; + +/// State machine for writing a row to the materialized view +#[derive(Debug)] +pub enum WriteViewRow { + /// Initial empty state + Empty, + + /// Reading existing record to get current weight + GetRecord, + + /// Deleting the row (when final weight <= 0) + Delete, + + /// Inserting/updating the row with new weight + Insert { + /// The final weight to write + final_weight: isize, + }, + + /// Completed processing this row + Done, +} + +impl WriteViewRow { + fn new() -> Self { + Self::Empty + } + fn write_row( + &mut self, + cursor: &mut BTreeCursor, + row: HashableRow, + weight: isize, + ) -> Result> { + loop { + match self { + WriteViewRow::Empty => { + let key = SeekKey::TableRowId(row.rowid); + let res = return_if_io!(cursor.seek(key, SeekOp::GE { eq_only: true })); + match res { + SeekResult::Found => *self = WriteViewRow::GetRecord, + _ => { + *self = WriteViewRow::Insert { + final_weight: weight, + } + } + } + } + WriteViewRow::GetRecord => { + let existing_record = return_if_io!(cursor.record()); + let r = existing_record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found rowid {} in storage but could not read record", + row.rowid + )) + })?; + let values = r.get_values(); + + // last value should contain the weight + let existing_weight = match values.last() { + Some(ref_val) => match ref_val.to_owned() { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid weight value in storage for rowid {}", + row.rowid + ))) + } + }, + None => { + return Err(crate::LimboError::InternalError(format!( + "No weight value found in storage for rowid {}", + row.rowid + ))) + } + }; + let final_weight = existing_weight + weight; + if final_weight <= 0 { + *self = WriteViewRow::Delete + } else { + *self = WriteViewRow::Insert { final_weight } + } + } + WriteViewRow::Delete => { + // Delete the row. Important: when delete returns I/O, the btree operation + // has already completed in memory, so mark as Done to avoid retry + *self = WriteViewRow::Done; + return_if_io!(cursor.delete()); + } + WriteViewRow::Insert { final_weight } => { + let key = SeekKey::TableRowId(row.rowid); + return_if_io!(cursor.seek(key, SeekOp::GE { eq_only: true })); + + // Create the record values: row values + weight + let mut values = row.values.clone(); + values.push(Value::Integer(*final_weight as i64)); + + // Create an ImmutableRecord from the values + let immutable_record = + crate::types::ImmutableRecord::from_values(&values, values.len()); + let btree_key = BTreeKey::new_table_rowid(row.rowid, Some(&immutable_record)); + // Insert the row. Important: when insert returns I/O, the btree operation + // has already completed in memory, so mark as Done to avoid retry + *self = WriteViewRow::Done; + return_if_io!(cursor.insert(&btree_key)); + } + WriteViewRow::Done => { + break; + } + } + } + Ok(IOResult::Done(())) + } +} + +/// State machine for commit operations +pub enum CommitState { + /// Initial state - ready to start commit + Init, + + /// Running circuit with commit_operators flag set to true + CommitOperators { + /// Execute state for running the circuit + execute_state: Box, + /// Persistent cursor for operator state btree (internal_state_root) + state_cursor: Box, + }, + + /// Updating the materialized view with the delta + UpdateView { + /// Delta to write to the view + delta: Delta, + /// Current index in delta.changes being processed + current_index: usize, + /// State for writing individual rows + write_row_state: WriteViewRow, + /// Cursor for view data btree - created fresh for each row + view_cursor: Box, + }, +} + +impl std::fmt::Debug for CommitState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Init => write!(f, "Init"), + Self::CommitOperators { execute_state, .. } => f + .debug_struct("CommitOperators") + .field("execute_state", execute_state) + .field("has_state_cursor", &true) + .finish(), + Self::UpdateView { + delta, + current_index, + write_row_state, + .. + } => f + .debug_struct("UpdateView") + .field("delta", delta) + .field("current_index", current_index) + .field("write_row_state", write_row_state) + .field("has_view_cursor", &true) + .finish(), + } + } +} + +/// State machine for circuit execution across I/O operations +/// Similar to EvalState but for tracking execution state through the circuit +#[derive(Debug)] +pub enum ExecuteState { + /// Empty state so we can allocate the space without executing + Uninitialized, + + /// Initial state - starting circuit execution + Init { + /// Input deltas to process + input_data: DeltaSet, + }, + + /// Processing multiple inputs (for recursive node processing) + ProcessingInputs { + /// Collection of (node_id, state) pairs to process + input_states: Vec<(usize, ExecuteState)>, + /// Current index being processed + current_index: usize, + /// Collected deltas from processed inputs + input_deltas: Vec, + }, + + /// Processing a specific node in the circuit + ProcessingNode { + /// Node's evaluation state (includes the delta in its Init state) + eval_state: Box, + }, +} /// A set of deltas for multiple tables/operators /// This provides a cleaner API for passing deltas through circuit execution @@ -39,6 +244,11 @@ impl DeltaSet { } } + /// Create a DeltaSet from a HashMap + pub fn from_map(deltas: HashMap) -> Self { + Self { deltas } + } + /// Add a delta for a table pub fn insert(&mut self, table_name: String, delta: Delta) { self.deltas.insert(table_name, delta); @@ -96,8 +306,8 @@ pub struct DbspNode { pub operator: DbspOperator, /// Input nodes (edges in the DAG) pub inputs: Vec, - /// The actual executable operator (if applicable) - pub executable: Option>, + /// The actual executable operator + pub executable: Box, } impl std::fmt::Debug for DbspNode { @@ -106,11 +316,51 @@ impl std::fmt::Debug for DbspNode { .field("id", &self.id) .field("operator", &self.operator) .field("inputs", &self.inputs) - .field("has_executable", &self.executable.is_some()) + .field("has_executable", &true) .finish() } } +impl DbspNode { + fn process_node( + &mut self, + pager: Rc, + eval_state: &mut EvalState, + root_page: usize, + commit_operators: bool, + state_cursor: Option<&mut Box>, + ) -> Result> { + // Process delta using the executable operator + let op = &mut self.executable; + + // Use provided cursor or create a local one + let mut local_cursor; + let cursor = if let Some(cursor) = state_cursor { + cursor.as_mut() + } else { + // Create a local cursor if none was provided + local_cursor = BTreeCursor::new_table(None, pager.clone(), root_page, OPERATOR_COLUMNS); + &mut local_cursor + }; + + let state = if commit_operators { + // Clone the delta from eval_state - don't extract it + // in case we need to re-execute due to I/O + let delta = match eval_state { + EvalState::Init { delta } => delta.clone(), + _ => panic!("commit can only be called when eval_state is in Init state"), + }; + let result = return_if_io!(op.commit(delta, cursor)); + // After successful commit, move state to Done + *eval_state = EvalState::Done; + result + } else { + return_if_io!(op.eval(eval_state, cursor)) + }; + Ok(IOResult::Done(state)) + } +} + /// Represents a complete DBSP circuit (DAG of operators) #[derive(Debug)] pub struct DbspCircuit { @@ -120,24 +370,48 @@ pub struct DbspCircuit { next_id: usize, /// Root node ID (the final output) pub(super) root: Option, + /// Output schema of the circuit (schema of the root node) + pub(super) output_schema: SchemaRef, + + /// State machine for commit operation + commit_state: CommitState, + + /// Root page for the main materialized view data + pub(super) main_data_root: usize, + /// Root page for internal DBSP state + pub(super) internal_state_root: usize, } impl DbspCircuit { - /// Create a new empty circuit - pub fn new() -> Self { + /// Create a new empty circuit with initial empty schema + /// The actual output schema will be set when the root node is established + pub fn new(main_data_root: usize, internal_state_root: usize) -> Self { + // Start with an empty schema - will be updated when root is set + let empty_schema = Arc::new(LogicalSchema::new(vec![])); Self { nodes: HashMap::new(), next_id: 0, root: None, + output_schema: empty_schema, + commit_state: CommitState::Init, + main_data_root, + internal_state_root, } } + /// Set the root node and update the output schema + fn set_root(&mut self, root_id: usize, schema: SchemaRef) { + self.root = Some(root_id); + self.output_schema = schema; + } + + /// Get the current materialized state by reading from btree /// Add a node to the circuit fn add_node( &mut self, operator: DbspOperator, inputs: Vec, - executable: Option>, + executable: Box, ) -> usize { let id = self.next_id; self.next_id += 1; @@ -153,11 +427,21 @@ impl DbspCircuit { id } - /// Initialize the circuit with base data. Should be called once before processing deltas. - /// If the database is restarting with materialized views, this can be skipped. - pub fn initialize(&mut self, input_data: HashMap) -> Result { + pub fn run_circuit( + &mut self, + pager: Rc, + execute_state: &mut ExecuteState, + commit_operators: bool, + state_cursor: &mut Box, + ) -> Result> { if let Some(root_id) = self.root { - self.initialize_node(root_id, &input_data) + self.execute_node( + root_id, + pager, + execute_state, + commit_operators, + Some(state_cursor), + ) } else { Err(LimboError::ParseError( "Circuit has no root node".to_string(), @@ -165,80 +449,19 @@ impl DbspCircuit { } } - /// Initialize a specific node and its dependencies - fn initialize_node( - &mut self, - node_id: usize, - input_data: &HashMap, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); - - // Initialize inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.initialize_node(input_id, input_data)?; - input_deltas.push(delta); - } - - // Get mutable reference to node - let node = self - .nodes - .get_mut(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - - // Initialize based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // Get data from input map - input_data.get(name).cloned().unwrap_or_else(Delta::new) - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Initialize the executable operator - if let Some(ref mut op) = node.executable { - if !input_deltas.is_empty() { - let input_delta = input_deltas[0].clone(); - op.initialize(input_delta); - op.get_current_state() - } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() - } - } - } - }; - - Ok(result) - } - /// Execute the circuit with incremental input data (deltas). - /// Call initialize() first for initial data, then use execute() for updates. /// /// # Arguments - /// * `input_data` - The committed deltas to process - /// * `uncommitted_data` - Uncommitted transaction deltas that should be visible - /// during this execution but not stored in operators. - /// Use DeltaSet::empty() for no uncommitted changes. + /// * `pager` - Pager for btree access + /// * `context` - Execution context for tracking operator states + /// * `execute_state` - State machine containing input deltas and tracking execution progress pub fn execute( - &self, - input_data: HashMap, - uncommitted_data: DeltaSet, - ) -> Result { + &mut self, + pager: Rc, + execute_state: &mut ExecuteState, + ) -> Result> { if let Some(root_id) = self.root { - self.execute_node(root_id, &input_data, &uncommitted_data) + self.execute_node(root_id, pager, execute_state, false, None) } else { Err(LimboError::ParseError( "Circuit has no root node".to_string(), @@ -246,146 +469,243 @@ impl DbspCircuit { } } - /// Commit deltas to the circuit, updating internal operator state. + /// Commit deltas to the circuit, updating internal operator state and persisting to btree. /// This should be called after execute() when you want to make changes permanent. /// /// # Arguments /// * `input_data` - The deltas to commit (same as what was passed to execute) - pub fn commit(&mut self, input_data: HashMap) -> Result<()> { - if let Some(root_id) = self.root { - self.commit_node(root_id, &input_data)?; - } - Ok(()) - } - - /// Commit a specific node in the circuit - fn commit_node( + /// * `pager` - Pager for creating cursors to the btrees + pub fn commit( &mut self, - node_id: usize, - input_data: &HashMap, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); - - // Process inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.commit_node(input_id, input_data)?; - input_deltas.push(delta); + input_data: HashMap, + pager: Rc, + ) -> Result> { + // No root means nothing to commit + if self.root.is_none() { + return Ok(IOResult::Done(Delta::new())); } - // Get mutable reference to node - let node = self - .nodes - .get_mut(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; + // Get btree root pages + let main_data_root = self.main_data_root; - // Commit based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // For input nodes, just return the committed delta - input_data.get(name).cloned().unwrap_or_else(Delta::new) - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Commit the delta to the executable operator - if let Some(ref mut op) = node.executable { - if !input_deltas.is_empty() { - let input_delta = input_deltas[0].clone(); - // Commit updates state and returns the output delta - op.commit(input_delta) + // Add 1 for the weight column that we store in the btree + let num_columns = self.output_schema.columns.len() + 1; + + // Convert input_data to DeltaSet once, outside the loop + let input_delta_set = DeltaSet::from_map(input_data); + + loop { + // Take ownership of the state for processing, to avoid borrow checker issues (we have + // to call run_circuit, which takes &mut self. Because of that, cannot use + // return_if_io. We have to use the version that restores the state before returning. + let mut state = std::mem::replace(&mut self.commit_state, CommitState::Init); + match &mut state { + CommitState::Init => { + // Create state cursor when entering CommitOperators state + let state_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + self.internal_state_root, + OPERATOR_COLUMNS, + )); + + self.commit_state = CommitState::CommitOperators { + execute_state: Box::new(ExecuteState::Init { + input_data: input_delta_set.clone(), + }), + state_cursor, + }; + } + CommitState::CommitOperators { + ref mut execute_state, + ref mut state_cursor, + } => { + let delta = return_and_restore_if_io!( + &mut self.commit_state, + state, + self.run_circuit(pager.clone(), execute_state, true, state_cursor) + ); + + // Create view cursor when entering UpdateView state + let view_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )); + + self.commit_state = CommitState::UpdateView { + delta, + current_index: 0, + write_row_state: WriteViewRow::new(), + view_cursor, + }; + } + CommitState::UpdateView { + delta, + current_index, + write_row_state, + view_cursor, + } => { + if *current_index >= delta.changes.len() { + self.commit_state = CommitState::Init; + let delta = std::mem::take(delta); + return Ok(IOResult::Done(delta)); } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() + let (row, weight) = delta.changes[*current_index].clone(); + + // If we're starting a new row (Empty state), we need a fresh cursor + // due to btree cursor state machine limitations + if matches!(write_row_state, WriteViewRow::Empty) { + *view_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )); + } + + return_and_restore_if_io!( + &mut self.commit_state, + state, + write_row_state.write_row(view_cursor, row, weight) + ); + + // Move to next row + let delta = std::mem::take(delta); + // Take ownership of view_cursor - we'll create a new one for next row if needed + let view_cursor = std::mem::replace( + view_cursor, + Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )), + ); + + self.commit_state = CommitState::UpdateView { + delta, + current_index: *current_index + 1, + write_row_state: WriteViewRow::new(), + view_cursor, + }; } } } - }; - Ok(result) + } } /// Execute a specific node in the circuit fn execute_node( - &self, + &mut self, node_id: usize, - input_data: &HashMap, - uncommitted_data: &DeltaSet, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); + pager: Rc, + execute_state: &mut ExecuteState, + commit_operators: bool, + state_cursor: Option<&mut Box>, + ) -> Result> { + loop { + match execute_state { + ExecuteState::Uninitialized => { + panic!("Trying to execute an uninitialized ExecuteState state machine"); + } + ExecuteState::Init { input_data } => { + let node = self + .nodes + .get(&node_id) + .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - // Process inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.execute_node(input_id, input_data, uncommitted_data)?; - input_deltas.push(delta); + // Check if this is an Input node + match &node.operator { + DbspOperator::Input { name, .. } => { + // Input nodes get their delta directly from input_data + let delta = input_data.get(name); + *execute_state = ExecuteState::ProcessingNode { + eval_state: Box::new(EvalState::Init { delta }), + }; + } + _ => { + // Non-input nodes need to process their inputs + let input_data = std::mem::take(input_data); + let input_node_ids = node.inputs.clone(); + + let input_states: Vec<(usize, ExecuteState)> = input_node_ids + .iter() + .map(|&input_id| { + ( + input_id, + ExecuteState::Init { + input_data: input_data.clone(), + }, + ) + }) + .collect(); + + *execute_state = ExecuteState::ProcessingInputs { + input_states, + current_index: 0, + input_deltas: Vec::new(), + }; + } + } + } + ExecuteState::ProcessingInputs { + input_states, + current_index, + input_deltas, + } => { + if *current_index >= input_states.len() { + // All inputs processed, check we have exactly one delta + // (Input nodes never reach here since they go straight to ProcessingNode) + let delta = if input_deltas.is_empty() { + return Err(LimboError::InternalError( + "execute() cannot be called without a Delta".to_string(), + )); + } else if input_deltas.len() > 1 { + return Err(LimboError::InternalError( + format!("Until joins are supported, only one delta is expected. Got {} deltas", input_deltas.len()), + )); + } else { + input_deltas[0].clone() + }; + + *execute_state = ExecuteState::ProcessingNode { + eval_state: Box::new(EvalState::Init { delta }), + }; + } else { + // Get the (node_id, state) pair for the current index + let (input_node_id, input_state) = &mut input_states[*current_index]; + + let delta = return_if_io!(self.execute_node( + *input_node_id, + pager.clone(), + input_state, + commit_operators, + None // Input nodes don't need state cursor + )); + input_deltas.push(delta); + *current_index += 1; + } + } + ExecuteState::ProcessingNode { eval_state } => { + // Get mutable reference to node for eval + let node = self + .nodes + .get_mut(&node_id) + .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; + + let output_delta = return_if_io!(node.process_node( + pager.clone(), + eval_state, + self.internal_state_root, + commit_operators, + state_cursor, + )); + return Ok(IOResult::Done(output_delta)); + } + } } - - // Get reference to node (read-only since we're using eval, not commit) - let node = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - - // Execute based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // Get committed data from input map and merge with uncommitted if present - let committed = input_data.get(name).cloned().unwrap_or_else(Delta::new); - let uncommitted = uncommitted_data.get(name); - - // If there's uncommitted data for this table, merge it with committed - if !uncommitted.is_empty() { - let mut combined = committed; - combined.merge(&uncommitted); - combined - } else { - committed - } - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Process delta using the executable operator - if let Some(ref op) = node.executable { - if !input_deltas.is_empty() { - // Process the delta through the operator - let input_delta = input_deltas[0].clone(); - - // Use eval to compute result without modifying state - // The uncommitted data has already been merged into input_delta if needed - op.eval(input_delta, None) - } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() - } - } - } - }; - Ok(result) } } @@ -440,16 +760,17 @@ pub struct DbspCompiler { impl DbspCompiler { /// Create a new DBSP compiler - pub fn new() -> Self { + pub fn new(main_data_root: usize, internal_state_root: usize) -> Self { Self { - circuit: DbspCircuit::new(), + circuit: DbspCircuit::new(main_data_root, internal_state_root), } } /// Compile a logical plan to a DBSP circuit pub fn compile(mut self, plan: &LogicalPlan) -> Result { let root_id = self.compile_plan(plan)?; - self.circuit.root = Some(root_id); + let output_schema = plan.schema().clone(); + self.circuit.set_root(root_id, output_schema); Ok(self.circuit) } @@ -486,10 +807,8 @@ impl DbspCompiler { .collect(); // Create the ProjectOperator - let executable: Option> = - ProjectOperator::from_compiled(compiled_exprs, aliases, input_column_names, output_column_names) - .ok() - .map(|op| Box::new(op) as Box); + let executable: Box = + Box::new(ProjectOperator::from_compiled(compiled_exprs, aliases, input_column_names, output_column_names)?); // Create projection node let node_id = self.circuit.add_node( @@ -526,7 +845,7 @@ impl DbspCompiler { let node_id = self.circuit.add_node( DbspOperator::Filter { predicate: dbsp_predicate }, vec![input_id], - Some(executable), + executable, ); Ok(node_id) } @@ -621,15 +940,16 @@ impl DbspCompiler { } } - // Create the AggregateOperator + // Create the AggregateOperator with a unique operator_id + // Use the next_node_id as the operator_id to ensure uniqueness + let operator_id = self.circuit.next_id; use crate::incremental::operator::AggregateOperator; - let executable: Option> = Some( - Box::new(AggregateOperator::new( - group_by_columns, - aggregate_functions.clone(), - input_column_names, - )) - ); + let executable: Box = Box::new(AggregateOperator::new( + operator_id, // Use next_node_id as operator_id + group_by_columns, + aggregate_functions.clone(), + input_column_names, + )); // Create aggregate node let node_id = self.circuit.add_node( @@ -644,14 +964,17 @@ impl DbspCompiler { Ok(node_id) } LogicalPlan::TableScan(scan) => { - // Create input node (no executable needed for input) + // Create input node with InputOperator for uniform handling + let executable: Box = + Box::new(InputOperator::new(scan.table_name.clone())); + let node_id = self.circuit.add_node( DbspOperator::Input { name: scan.table_name.clone(), schema: scan.schema.clone(), }, vec![], - None, + executable, ); Ok(node_id) } @@ -925,10 +1248,15 @@ impl DbspCompiler { #[cfg(test)] mod tests { use super::*; - use crate::incremental::operator::{Delta, FilterOperator, FilterPredicate}; + use crate::incremental::dbsp::Delta; + use crate::incremental::operator::{FilterOperator, FilterPredicate}; use crate::schema::{BTreeTable, Column as SchemaColumn, Schema, Type}; + use crate::storage::pager::CreateBTreeFlags; use crate::translate::logical::LogicalPlanBuilder; use crate::translate::logical::LogicalSchema; + use crate::util::IOExt; + use crate::{Database, MemoryIO, Pager, IO}; + use std::rc::Rc; use std::sync::Arc; use turso_parser::ast; use turso_parser::parser::Parser; @@ -984,13 +1312,71 @@ mod tests { unique_sets: None, }; schema.add_btree_table(Arc::new(users_table)); + let sales_table = BTreeTable { + name: "sales".to_string(), + root_page: 2, + primary_key_columns: vec![], + columns: vec![ + SchemaColumn { + name: Some("product_id".to_string()), + ty: Type::Integer, + ty_str: "INTEGER".to_string(), + primary_key: false, + is_rowid_alias: false, + notnull: false, + default: None, + unique: false, + collation: None, + hidden: false, + }, + SchemaColumn { + name: Some("amount".to_string()), + ty: Type::Integer, + ty_str: "INTEGER".to_string(), + primary_key: false, + is_rowid_alias: false, + notnull: false, + default: None, + unique: false, + collation: None, + hidden: false, + }, + ], + has_rowid: true, + is_strict: false, + unique_sets: None, + }; + schema.add_btree_table(Arc::new(sales_table)); + schema }}; } + fn setup_btree_for_circuit() -> (Rc, usize, usize) { + let io: Arc = Arc::new(MemoryIO::new()); + let db = Database::open_file(io.clone(), ":memory:", false, false).unwrap(); + let conn = db.connect().unwrap(); + let pager = conn.pager.borrow().clone(); + + let _ = pager.io.block(|| pager.allocate_page1()).unwrap(); + + let main_root_page = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .unwrap() as usize; + + let dbsp_state_page = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .unwrap() as usize; + + (pager, main_root_page, dbsp_state_page) + } + // Macro to compile SQL to DBSP circuit macro_rules! compile_sql { ($sql:expr) => {{ + let (pager, main_root_page, dbsp_state_page) = setup_btree_for_circuit(); let schema = test_schema!(); let mut parser = Parser::new($sql.as_bytes()); let cmd = parser @@ -1002,7 +1388,12 @@ mod tests { ast::Cmd::Stmt(stmt) => { let mut builder = LogicalPlanBuilder::new(&schema); let logical_plan = builder.build_statement(&stmt).unwrap(); - DbspCompiler::new().compile(&logical_plan).unwrap() + ( + DbspCompiler::new(main_root_page, dbsp_state_page) + .compile(&logical_plan) + .unwrap(), + pager, + ) } _ => panic!("Only SQL statements are supported"), } @@ -1108,40 +1499,72 @@ mod tests { circuit.nodes.get(¤t_id).expect("Node not found") } - // Helper to get the current accumulated state of the circuit (from the root operator) - // This returns the internal state including bookkeeping entries - fn get_current_state(circuit: &DbspCircuit) -> Result { - if let Some(root_id) = circuit.root { - let node = circuit - .nodes - .get(&root_id) - .ok_or_else(|| LimboError::ParseError("Root node not found".to_string()))?; - - if let Some(ref executable) = node.executable { - Ok(executable.get_current_state()) - } else { - // Input nodes don't have executables but also don't have state - Ok(Delta::new()) - } - } else { - Err(LimboError::ParseError( - "Circuit has no root node".to_string(), - )) + // Helper function for tests to execute circuit and extract the Delta result + #[cfg(test)] + fn test_execute( + circuit: &mut DbspCircuit, + inputs: HashMap, + pager: Rc, + ) -> Result { + let mut execute_state = ExecuteState::Init { + input_data: DeltaSet::from_map(inputs), + }; + match circuit.execute(pager, &mut execute_state)? { + IOResult::Done(delta) => Ok(delta), + IOResult::IO(_) => panic!("Unexpected I/O in test"), } } - // Helper to create a DeltaSet from a HashMap (for tests) - fn delta_set_from_map(map: HashMap) -> DeltaSet { - let mut delta_set = DeltaSet::new(); - for (key, value) in map { - delta_set.insert(key, value); + // Helper to get the committed BTree state from main_data_root + // This reads the actual persisted data from the BTree + #[cfg(test)] + fn get_current_state(pager: Rc, circuit: &DbspCircuit) -> Result { + let mut delta = Delta::new(); + + let main_data_root = circuit.main_data_root; + let num_columns = circuit.output_schema.columns.len() + 1; + + // Create a cursor to read the btree + let mut btree_cursor = + BTreeCursor::new_table(None, pager.clone(), main_data_root, num_columns); + + // Rewind to the beginning + pager.io.block(|| btree_cursor.rewind())?; + + // Read all rows from the BTree + loop { + // Check if cursor is empty (no more rows) + if btree_cursor.is_empty() { + break; + } + + // Get the rowid + let rowid = pager.io.block(|| btree_cursor.rowid()).unwrap().unwrap(); + + // Get the record at this position + let record = pager + .io + .block(|| btree_cursor.record()) + .unwrap() + .unwrap() + .to_owned(); + + let values_ref = record.get_values(); + let num_data_columns = values_ref.len() - 1; // Get length before consuming + let values: Vec = values_ref + .into_iter() + .take(num_data_columns) // Skip the weight column + .map(|x| x.to_owned()) + .collect(); + delta.insert(rowid, values); + pager.io.block(|| btree_cursor.next()).unwrap(); } - delta_set + Ok(delta) } #[test] fn test_simple_projection() { - let circuit = compile_sql!("SELECT name FROM users"); + let (circuit, _) = compile_sql!("SELECT name FROM users"); // Circuit has 2 nodes with Projection at root assert_circuit!(circuit, depth: 2, root: Projection); @@ -1153,7 +1576,7 @@ mod tests { #[test] fn test_filter_with_projection() { - let circuit = compile_sql!("SELECT name FROM users WHERE age > 18"); + let (circuit, _) = compile_sql!("SELECT name FROM users WHERE age > 18"); // Circuit has 3 nodes with Projection at root assert_circuit!(circuit, depth: 3, root: Projection); @@ -1167,7 +1590,7 @@ mod tests { #[test] fn test_select_star() { - let mut circuit = compile_sql!("SELECT * FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1192,8 +1615,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have all rows with all columns assert_eq!(result.changes.len(), 2); @@ -1207,7 +1633,7 @@ mod tests { #[test] fn test_execute_filter() { - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Create test data let mut input_delta = Delta::new(); @@ -1240,8 +1666,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should only have Alice and Charlie (age > 18) assert_eq!( @@ -1284,7 +1713,7 @@ mod tests { #[test] fn test_simple_column_projection() { - let mut circuit = compile_sql!("SELECT name, age FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT name, age FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1309,8 +1738,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have all rows but only 2 columns (name, age) assert_eq!(result.changes.len(), 2); @@ -1327,7 +1759,7 @@ mod tests { #[test] fn test_simple_aggregation() { // Test COUNT(*) with GROUP BY - let mut circuit = compile_sql!("SELECT age, COUNT(*) FROM users GROUP BY age"); + let (mut circuit, pager) = compile_sql!("SELECT age, COUNT(*) FROM users GROUP BY age"); // Create test data let mut input_delta = Delta::new(); @@ -1360,8 +1792,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have 2 groups: age 25 with count 2, age 30 with count 1 assert_eq!(result.changes.len(), 2); @@ -1392,7 +1827,7 @@ mod tests { #[test] fn test_sum_aggregation() { // Test SUM with GROUP BY - let mut circuit = compile_sql!("SELECT name, SUM(age) FROM users GROUP BY name"); + let (mut circuit, pager) = compile_sql!("SELECT name, SUM(age) FROM users GROUP BY name"); // Create test data - some names appear multiple times let mut input_delta = Delta::new(); @@ -1425,8 +1860,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have 2 groups: Alice with sum 55, Bob with sum 20 assert_eq!(result.changes.len(), 2); @@ -1448,7 +1886,7 @@ mod tests { #[test] fn test_aggregation_without_group_by() { // Test aggregation without GROUP BY - should produce a single row - let mut circuit = compile_sql!("SELECT COUNT(*), SUM(age), AVG(age) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT COUNT(*), SUM(age), AVG(age) FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1481,8 +1919,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have exactly 1 row with all aggregates assert_eq!( @@ -1521,7 +1962,7 @@ mod tests { #[test] fn test_expression_projection_execution() { // Test that complex expressions work through VDBE compilation - let mut circuit = compile_sql!("SELECT hex(id) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT hex(id) FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1546,8 +1987,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); assert_eq!(result.changes.len(), 2); @@ -1586,7 +2030,7 @@ mod tests { fn test_projection_aggregation_projection_pattern() { // Test pattern: projection -> aggregation -> projection // Query: SELECT HEX(SUM(age + 2)) FROM users - let mut circuit = compile_sql!("SELECT HEX(SUM(age + 2)) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT HEX(SUM(age + 2)) FROM users"); // Initial input data let mut input_delta = Delta::new(); @@ -1618,8 +2062,11 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - // Initialize the circuit with the initial data - let result = circuit.initialize(input_data).unwrap(); + let result = test_execute(&mut circuit, input_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(input_data.clone(), pager.clone())) + .unwrap(); // Expected: SUM(age + 2) = (25+2) + (30+2) + (35+2) = 27 + 32 + 37 = 96 // HEX(96) should be the hex representation of the string "96" = "3936" @@ -1649,7 +2096,7 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - let result = circuit.execute(input_data, DeltaSet::empty()).unwrap(); + let result = test_execute(&mut circuit, input_data, pager.clone()).unwrap(); // Expected: new SUM(age + 2) = 96 + (40+2) = 138 // HEX(138) = hex of "138" = "313338" @@ -1674,7 +2121,8 @@ mod tests { fn test_nested_projection_with_groupby() { // Test pattern: projection -> aggregation with GROUP BY -> projection // Query: SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name - let mut circuit = compile_sql!("SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name"); + let (mut circuit, pager) = + compile_sql!("SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name"); // Initial input data let mut input_delta = Delta::new(); @@ -1706,8 +2154,11 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(input_data).unwrap(); + let result = test_execute(&mut circuit, input_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(input_data.clone(), pager.clone())) + .unwrap(); // Expected results: // Alice: SUM(25*2 + 35*2) = 50 + 70 = 120, HEX("120") = "313230" @@ -1746,7 +2197,7 @@ mod tests { fn test_transaction_context() { // Test that uncommitted changes are visible within a transaction // but don't affect the operator's internal state - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1769,10 +2220,13 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + let state = pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); - // Verify initial state: only Alice (age > 18) - let state = get_current_state(&circuit).unwrap(); + // Verify initial delta : only Alice (age > 18) assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.values[1], Value::Text("Alice".into())); @@ -1801,9 +2255,7 @@ mod tests { // Execute with uncommitted data - this simulates processing the uncommitted changes // through the circuit to see what would be visible - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // The result should show Charlie being added (passes filter, age > 18) // David is filtered out (age 15 < 18) @@ -1826,9 +2278,7 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // The commit result should show Charlie being added assert_eq!(commit_result.changes.len(), 1, "Should see Charlie added"); @@ -1838,17 +2288,20 @@ mod tests { ); // Commit the change to make it permanent - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // Now if we execute again with no changes, we should see no delta - let empty_result = circuit.execute(HashMap::new(), DeltaSet::empty()).unwrap(); + let empty_result = test_execute(&mut circuit, HashMap::new(), pager.clone()).unwrap(); assert_eq!(empty_result.changes.len(), 0, "No changes when no new data"); } #[test] fn test_uncommitted_delete() { // Test that uncommitted deletes are handled correctly without affecting operator state - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1879,10 +2332,13 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + let state = pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); - // Verify initial state: Alice, Bob, Charlie (all age > 18) - let state = get_current_state(&circuit).unwrap(); + // Verify initial delta: Alice, Bob, Charlie (all age > 18) assert_eq!(state.changes.len(), 3); // Create uncommitted delete for Bob @@ -1899,9 +2355,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted delete - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show the deleted row that passed the filter assert_eq!( @@ -1911,7 +2365,7 @@ mod tests { ); // Verify operator's internal state is unchanged (still has all 3 users) - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after.changes.len(), 3, @@ -1931,12 +2385,13 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Actually commit the delete to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // The commit result should show Bob being deleted assert_eq!(commit_result.changes.len(), 1, "Should see Bob deleted"); @@ -1950,7 +2405,7 @@ mod tests { ); // After commit, internal state should have only Alice and Charlie - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 2, @@ -1976,7 +2431,7 @@ mod tests { #[test] fn test_uncommitted_update() { // Test that uncommitted updates (delete + insert) are handled correctly - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1999,7 +2454,11 @@ mod tests { ); // Bob is 17, filtered out init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Create uncommitted update: Bob turns 19 (update from 17 to 19) // This is modeled as delete + insert @@ -2024,9 +2483,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted update - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Bob should now appear in the result (age 19 > 18) // Consolidate to see the final state @@ -2062,10 +2519,13 @@ mod tests { commit_data.insert("users".to_string(), commit_delta); // Commit the update - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After committing, Bob should be in the view's state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); let mut consolidated_state = state; consolidated_state.consolidate(); @@ -2094,7 +2554,7 @@ mod tests { #[test] fn test_uncommitted_filtered_delete() { // Test deleting a row that doesn't pass the filter - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with mixed data let mut init_data = HashMap::new(); @@ -2117,7 +2577,11 @@ mod tests { ); // Bob doesn't pass filter init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Create uncommitted delete for Bob (who isn't in the view because age=15) let mut uncommitted = HashMap::new(); @@ -2133,9 +2597,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted delete - should produce no output changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted)) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted, pager.clone()).unwrap(); // Bob wasn't in the view, so deleting him produces no output assert_eq!( @@ -2145,7 +2607,7 @@ mod tests { ); // The view state should still only have Alice - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 1, "View still has only Alice"); assert_eq!(state.changes[0].0.values[1], Value::Text("Alice".into())); } @@ -2153,7 +2615,7 @@ mod tests { #[test] fn test_uncommitted_mixed_operations() { // Test multiple uncommitted operations together - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -2176,10 +2638,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2); // Create uncommitted changes: @@ -2236,9 +2702,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show all changes: delete Alice, update Bob, insert Charlie and David assert_eq!( @@ -2248,7 +2712,7 @@ mod tests { ); // Verify operator's internal state is unchanged - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state_after.changes.len(), 2, "Still has Alice and Bob"); // Commit all changes @@ -2296,19 +2760,20 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Should see: Alice deleted, Bob deleted, Bob inserted, Charlie inserted // (David filtered out) assert_eq!(commit_result.changes.len(), 4, "Should see 4 changes"); // Actually commit the changes to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After all commits, execute with no changes should return empty delta - let empty_result = circuit.execute(HashMap::new(), DeltaSet::empty()).unwrap(); + let empty_result = test_execute(&mut circuit, HashMap::new(), pager.clone()).unwrap(); assert_eq!(empty_result.changes.len(), 0, "No changes when no new data"); } @@ -2319,56 +2784,9 @@ mod tests { // and we need to see correct aggregation results within the transaction // Create a sales table schema for testing - let mut schema = Schema::new(false); - let sales_table = BTreeTable { - name: "sales".to_string(), - root_page: 2, - primary_key_columns: vec![], - columns: vec![ - SchemaColumn { - name: Some("product_id".to_string()), - ty: Type::Integer, - ty_str: "INTEGER".to_string(), - primary_key: false, - is_rowid_alias: false, - notnull: false, - default: None, - unique: false, - collation: None, - hidden: false, - }, - SchemaColumn { - name: Some("amount".to_string()), - ty: Type::Integer, - ty_str: "INTEGER".to_string(), - primary_key: false, - is_rowid_alias: false, - notnull: false, - default: None, - unique: false, - collation: None, - hidden: false, - }, - ], - has_rowid: true, - is_strict: false, - unique_sets: None, - }; - schema.add_btree_table(Arc::new(sales_table)); + let _ = test_schema!(); - // Parse and compile the aggregation query - let sql = "SELECT product_id, SUM(amount) as total, COUNT(*) as cnt FROM sales GROUP BY product_id"; - let mut parser = Parser::new(sql.as_bytes()); - let cmd = parser.next().unwrap().unwrap(); - - let mut circuit = match cmd { - ast::Cmd::Stmt(stmt) => { - let mut builder = LogicalPlanBuilder::new(&schema); - let logical_plan = builder.build_statement(&stmt).unwrap(); - DbspCompiler::new().compile(&logical_plan).unwrap() - } - _ => panic!("Expected SQL statement"), - }; + let (mut circuit, pager) = compile_sql!("SELECT product_id, SUM(amount) as total, COUNT(*) as cnt FROM sales GROUP BY product_id"); // Initialize with base data: (1, 100), (1, 200), (2, 150), (2, 250) let mut init_data = HashMap::new(); @@ -2379,10 +2797,14 @@ mod tests { delta.insert(4, vec![Value::Integer(2), Value::Integer(250)]); init_data.insert("sales".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state: product 1 total=300, product 2 total=400 - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2, "Should have 2 product groups"); // Build a map of product_id -> (total, count) @@ -2430,9 +2852,7 @@ mod tests { uncommitted.insert("sales".to_string(), uncommitted_delta); // Execute with uncommitted data - simulating a read within transaction - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show the aggregate changes from uncommitted data // Product 1: retraction of (300, 2) and insertion of (350, 3) @@ -2444,7 +2864,7 @@ mod tests { ); // IMPORTANT: Verify operator's internal state is unchanged - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after.changes.len(), 2, @@ -2498,9 +2918,7 @@ mod tests { commit_delta.insert(6, vec![Value::Integer(3), Value::Integer(300)]); commit_data.insert("sales".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Should see changes for product 1 (updated) and product 3 (new) assert_eq!( @@ -2510,10 +2928,13 @@ mod tests { ); // Actually commit the changes to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After commit, verify final state - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 3, @@ -2566,7 +2987,7 @@ mod tests { // Test that uncommitted INSERTs are visible within the same transaction // This simulates: BEGIN; INSERT ...; SELECT * FROM view; COMMIT; - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data - need to match the schema (id, name, age) let mut init_data = HashMap::new(); @@ -2589,10 +3010,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data.clone()).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state.len(), 2, @@ -2622,9 +3047,7 @@ mod tests { // Execute with uncommitted data - this should return the uncommitted changes // that passed through the filter (age > 18) - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // IMPORTANT: tx_result should contain the filtered uncommitted changes! // Both Charlie (35) and David (20) should pass the age > 18 filter @@ -2648,7 +3071,7 @@ mod tests { ); // CRITICAL: Verify the operator state wasn't modified by uncommitted execution - let state_after_uncommitted = get_current_state(&circuit).unwrap(); + let state_after_uncommitted = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after_uncommitted.len(), 2, @@ -2680,7 +3103,8 @@ mod tests { // Similar to test_uncommitted_aggregation but explicitly tests rollback semantics // Create a simple aggregation circuit - let mut circuit = compile_sql!("SELECT age, COUNT(*) as cnt FROM users GROUP BY age"); + let (mut circuit, pager) = + compile_sql!("SELECT age, COUNT(*) as cnt FROM users GROUP BY age"); // Initialize with some data let mut init_data = HashMap::new(); @@ -2719,10 +3143,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state: age 25 count=2, age 30 count=2 - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2); let initial_counts: HashMap = state @@ -2783,9 +3211,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Should see the aggregate changes from uncommitted data // Age 25: retraction of count 1 and insertion of count 2 @@ -2796,7 +3222,7 @@ mod tests { ); // Verify internal state is unchanged (simulating rollback by not committing) - let state_after_rollback = get_current_state(&circuit).unwrap(); + let state_after_rollback = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after_rollback.changes.len(), 2, @@ -2836,8 +3262,10 @@ mod tests { #[test] fn test_circuit_rowid_update_consolidation() { + let (pager, p1, p2) = setup_btree_for_circuit(); + // Test that circuit properly consolidates state when rowid changes - let mut circuit = DbspCircuit::new(); + let mut circuit = DbspCircuit::new(p1, p2); // Create a simple filter node let schema = Arc::new(LogicalSchema::new(vec![ @@ -2845,14 +3273,14 @@ mod tests { ("value".to_string(), Type::Integer), ])); - // First create an input node + // First create an input node with InputOperator let input_id = circuit.add_node( DbspOperator::Input { name: "test".to_string(), schema: schema.clone(), }, vec![], - None, // Input nodes don't have executables + Box::new(InputOperator::new("test".to_string())), ); let filter_op = FilterOperator::new( @@ -2873,10 +3301,10 @@ mod tests { let filter_id = circuit.add_node( DbspOperator::Filter { predicate }, vec![input_id], // Filter takes input from the input node - Some(Box::new(filter_op)), + Box::new(filter_op), ); - circuit.root = Some(filter_id); + circuit.set_root(filter_id, schema.clone()); // Initialize with a row let mut init_data = HashMap::new(); @@ -2884,10 +3312,14 @@ mod tests { delta.insert(5, vec![Value::Integer(5), Value::Integer(20)]); init_data.insert("test".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 5); @@ -2898,15 +3330,16 @@ mod tests { update_delta.insert(3, vec![Value::Integer(3), Value::Integer(20)]); update_data.insert("test".to_string(), update_delta); - circuit - .execute(update_data.clone(), DeltaSet::empty()) - .unwrap(); + test_execute(&mut circuit, update_data.clone(), pager.clone()).unwrap(); // Commit the changes to update operator state - circuit.commit(update_data).unwrap(); + pager + .io + .block(|| circuit.commit(update_data.clone(), pager.clone())) + .unwrap(); // The circuit should consolidate the state properly - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 1, @@ -2919,4 +3352,65 @@ mod tests { ); assert_eq!(final_state.changes[0].1, 1); } + + #[test] + fn test_circuit_respects_multiplicities() { + let (mut circuit, pager) = compile_sql!("SELECT * from users"); + + // Insert same row twice (multiplicity 2) + let mut delta = Delta::new(); + delta.insert( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + delta.insert( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + + let mut inputs = HashMap::new(); + inputs.insert("users".to_string(), delta); + test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); + + // Delete once (should leave multiplicity 1) + let mut delete_one = Delta::new(); + delete_one.delete( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + + let mut inputs = HashMap::new(); + inputs.insert("users".to_string(), delete_one); + test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); + + // With proper DBSP: row still exists (weight 2 - 1 = 1) + let state = get_current_state(pager.clone(), &circuit).unwrap(); + let mut consolidated = state; + consolidated.consolidate(); + assert_eq!( + consolidated.len(), + 1, + "Row should still exist with multiplicity 1" + ); + } } diff --git a/core/incremental/cursor.rs b/core/incremental/cursor.rs new file mode 100644 index 000000000..ddcd2a3e1 --- /dev/null +++ b/core/incremental/cursor.rs @@ -0,0 +1,1618 @@ +use crate::{ + incremental::{ + compiler::{DeltaSet, ExecuteState}, + dbsp::{Delta, RowKeyZSet}, + hashable_row::HashableRow, + view::{IncrementalView, ViewTransactionState}, + }, + return_if_io, + storage::btree::BTreeCursor, + types::{IOResult, SeekKey, SeekOp, SeekResult, Value}, + LimboError, Pager, Result, +}; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; + +/// State machine for seek operations +#[derive(Debug)] +enum SeekState { + /// Initial state before seeking + Init, + + /// Actively seeking with btree and uncommitted iterators + Seek { + /// The row we are trying to find + target: i64, + }, + + /// Seek completed successfully + Done, +} + +/// Cursor for reading materialized views that combines: +/// 1. Persistent btree data (committed state) +/// 2. Transaction-specific DBSP deltas (uncommitted changes) +/// +/// Works like a regular table cursor - reads from disk on-demand +/// and overlays transaction changes as needed. +pub struct MaterializedViewCursor { + // Core components + btree_cursor: Box, + view: Arc>, + pager: Rc, + + // Current changes that are uncommitted + uncommitted: RowKeyZSet, + + // Reference to shared transaction state for this specific view - shared with Connection + tx_state: Rc, + + // The transaction state always grows. It never gets reduced. That is in the very nature of + // DBSP, because deletions are just appends with weight < 0. So we will use the length of the + // state to check if we have to recompute the transaction state + last_tx_state_len: usize, + + // Current row cache - only cache the current row we're looking at + current_row: Option<(i64, Vec)>, + + // Execution state for circuit processing + execute_state: ExecuteState, + + // State machine for seek operations + seek_state: SeekState, +} + +impl MaterializedViewCursor { + pub fn new( + btree_cursor: Box, + view: Arc>, + pager: Rc, + tx_state: Rc, + ) -> Result { + Ok(Self { + btree_cursor, + view, + pager, + uncommitted: RowKeyZSet::new(), + tx_state, + last_tx_state_len: 0, + current_row: None, + execute_state: ExecuteState::Uninitialized, + seek_state: SeekState::Init, + }) + } + + /// Compute transaction changes lazily on first access + fn ensure_tx_changes_computed(&mut self) -> Result> { + // Check if we've already processed the current state + let current_len = self.tx_state.len(); + if current_len == self.last_tx_state_len { + return Ok(IOResult::Done(())); + } + + // Get the view and the current transaction state + let mut view_guard = self.view.lock().unwrap(); + let tx_delta = self.tx_state.get_delta(); + + // Process the delta through the circuit to get materialized changes + let mut uncommitted = DeltaSet::new(); + uncommitted.insert(view_guard.base_table().name.clone(), tx_delta); + + let processed_delta = return_if_io!(view_guard.execute_with_uncommitted( + uncommitted, + self.pager.clone(), + &mut self.execute_state + )); + + self.uncommitted = RowKeyZSet::from_delta(&processed_delta); + self.last_tx_state_len = current_len; + Ok(IOResult::Done(())) + } + + // Read the current btree entry as a vector (empty if no current position) + fn read_btree_delta_entry(&mut self) -> Result>> { + let btree_rowid = return_if_io!(self.btree_cursor.rowid()); + let rowid = match btree_rowid { + None => return Ok(IOResult::Done(Vec::new())), + Some(rowid) => rowid, + }; + + let btree_record = return_if_io!(self.btree_cursor.record()); + let btree_ref_values = btree_record + .ok_or_else(|| { + crate::LimboError::InternalError( + "Invalid data in materialized view: found a rowid, but not the row!" + .to_string(), + ) + })? + .get_values(); + + // Convert RefValues to Values (copying for now - can optimize later) + let mut btree_values: Vec = + btree_ref_values.iter().map(|rv| rv.to_owned()).collect(); + + // The last column should be the weight + let weight_value = btree_values.pop().ok_or_else(|| { + crate::LimboError::InternalError( + "Invalid data in materialized view: no weight column found".to_string(), + ) + })?; + + // Convert the Value to isize weight + let weight = match weight_value { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid data in materialized view: expected integer weight, found {weight_value:?}" + ))) + } + }; + + if !(-1..=1).contains(&weight) { + return Err(crate::LimboError::InternalError(format!( + "Invalid data in materialized view: expected weight -1, 0, or 1, found {weight}" + ))); + } + + Ok(IOResult::Done(vec![( + HashableRow::new(rowid, btree_values), + weight, + )])) + } + + /// Internal seek implementation that doesn't check preconditions + fn do_seek(&mut self, target_rowid: i64, op: SeekOp) -> Result> { + loop { + // Process state machine - need to handle mutable borrow carefully + match &mut self.seek_state { + SeekState::Init => { + self.current_row = None; + self.seek_state = SeekState::Seek { + target: target_rowid, + }; + } + SeekState::Seek { target } => { + let target = *target; + let btree_result = + return_if_io!(self.btree_cursor.seek(SeekKey::TableRowId(target), op)); + + let changes = if btree_result == SeekResult::Found { + return_if_io!(self.read_btree_delta_entry()) + } else { + Vec::new() + }; + + let mut btree_entries = Delta { changes }; + let changes = self.uncommitted.seek(target, op); + + let uncommitted_entries = Delta { changes }; + btree_entries.merge(&uncommitted_entries); + + // if empty pre-zset, means nothing was found. Empty post-zset can mean that + // we just canceled weights. + if btree_entries.is_empty() { + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::NotFound)); + } + + let min_seen = btree_entries + .changes + .first() + .expect("canot be empty, we just tested for it") + .0 + .rowid; + let max_seen = btree_entries + .changes + .last() + .expect("canot be empty, we just tested for it") + .0 + .rowid; + + let zset = RowKeyZSet::from_delta(&btree_entries); + let ret = zset.seek(target_rowid, op); + + if !ret.is_empty() { + let (row, _) = &ret[0]; + self.current_row = Some((row.rowid, row.values.clone())); + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::Found)); + } + + let new_target = match op { + SeekOp::GT => Some(max_seen), + SeekOp::GE { eq_only: false } => Some(max_seen + 1), + SeekOp::LT => Some(min_seen), + SeekOp::LE { eq_only: false } => Some(min_seen - 1), + SeekOp::LE { eq_only: true } | SeekOp::GE { eq_only: true } => None, + }; + + if let Some(target) = new_target { + self.seek_state = SeekState::Seek { target }; + } else { + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::NotFound)); + } + } + SeekState::Done => { + // We always return before setting the state to done. Meaning if we got here, + // this is a new seek. + self.seek_state = SeekState::Init; + } + } + } + } + + pub fn seek(&mut self, key: SeekKey, op: SeekOp) -> Result> { + // Ensure transaction changes are computed + return_if_io!(self.ensure_tx_changes_computed()); + + let target_rowid = match &key { + SeekKey::TableRowId(rowid) => *rowid, + SeekKey::IndexKey(_) => { + return Err(LimboError::ParseError( + "Cannot search a materialized view with an index key".to_string(), + )); + } + }; + + self.do_seek(target_rowid, op) + } + + pub fn next(&mut self) -> Result> { + // If cursor is not positioned (no current_row), return false + // This matches BTreeCursor behavior when valid_state == Invalid + let Some((current_rowid, _)) = &self.current_row else { + return Ok(IOResult::Done(false)); + }; + + // Use GT to find the next row after current position + let result = return_if_io!(self.do_seek(*current_rowid, SeekOp::GT)); + Ok(IOResult::Done(result == SeekResult::Found)) + } + + pub fn column(&mut self, col: usize) -> Result> { + if let Some((_, ref values)) = self.current_row { + Ok(IOResult::Done( + values.get(col).cloned().unwrap_or(Value::Null), + )) + } else { + Ok(IOResult::Done(Value::Null)) + } + } + + pub fn rowid(&self) -> Result>> { + Ok(IOResult::Done(self.current_row.as_ref().map(|(id, _)| *id))) + } + + pub fn rewind(&mut self) -> Result> { + return_if_io!(self.ensure_tx_changes_computed()); + // Seek GT from i64::MIN to find the first row using internal do_seek + let _result = return_if_io!(self.do_seek(i64::MIN, SeekOp::GT)); + Ok(IOResult::Done(())) + } + + pub fn is_valid(&self) -> Result { + Ok(self.current_row.is_some()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::IOExt; + use crate::{Connection, Database, OpenFlags}; + use std::rc::Rc; + use std::sync::Arc; + + /// Helper to create a test connection with a table and materialized view + fn create_test_connection() -> Result> { + // Create an in-memory database with experimental views enabled + let io = Arc::new(crate::io::MemoryIO::new()); + let db = Database::open_file_with_flags( + io, + ":memory:", + OpenFlags::default(), + crate::DatabaseOpts { + enable_mvcc: false, + enable_indexes: false, + enable_views: true, + enable_strict: false, + }, + )?; + let conn = db.connect()?; + + // Create a test table + conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY, value INTEGER)")?; + + // Create materialized view + conn.execute("CREATE MATERIALIZED VIEW test_view AS SELECT id, value FROM test_table")?; + + Ok(conn) + } + + /// Helper to create a test cursor for the materialized view + fn create_test_cursor( + conn: &Arc, + ) -> Result<(MaterializedViewCursor, Rc, Rc)> { + // Get the schema and view + let view_mutex = conn + .schema + .borrow() + .get_materialized_view("test_view") + .ok_or(crate::LimboError::InternalError( + "View not found".to_string(), + ))?; + + // Get the view's root page + let view = view_mutex.lock().unwrap(); + let root_page = view.get_root_page(); + if root_page == 0 { + return Err(crate::LimboError::InternalError( + "View not materialized".to_string(), + )); + } + let num_columns = view.columns.len(); + drop(view); + + // Create a btree cursor + let pager = conn.get_pager(); + let btree_cursor = Box::new(BTreeCursor::new( + None, // No MvCursor + pager.clone(), + root_page, + num_columns, + )); + + // Get or create transaction state for this view + let tx_state = conn.view_transaction_states.get_or_create("test_view"); + + // Create the materialized view cursor + let cursor = MaterializedViewCursor::new( + btree_cursor, + view_mutex.clone(), + pager.clone(), + tx_state.clone(), + )?; + + Ok((cursor, tx_state, pager)) + } + + /// Helper to populate test table with data through SQL + fn populate_test_table(conn: &Arc, rows: Vec<(i64, i64)>) -> Result<()> { + for (id, value) in rows { + let sql = format!("INSERT INTO test_table (id, value) VALUES ({id}, {value})"); + conn.execute(&sql)?; + } + Ok(()) + } + + /// Helper to apply changes through ViewTransactionState + fn apply_changes_to_tx_state( + tx_state: &ViewTransactionState, + changes: Vec<(i64, Vec, isize)>, + ) { + for (rowid, values, weight) in changes { + if weight > 0 { + tx_state.insert(rowid, values); + } else if weight < 0 { + tx_state.delete(rowid, values); + } + } + } + + #[test] + fn test_seek_key_exists_in_btree() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with test data: rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // No uncommitted changes - tx_state is already empty + + // Test 1: Seek exact match (row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 2: Seek GE (row 4 should find row 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Test 3: Seek GT (row 3 should find row 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Test 4: Seek LE (row 4 should find row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 5: Seek LT (row 5 should find row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::LT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + Ok(()) + } + + #[test] + fn test_seek_key_exists_only_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 5, 7 + populate_test_table(&conn, vec![(1, 10), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted changes: insert rows 3 and 6 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), // Insert row 3 + (6, vec![Value::Integer(6), Value::Integer(60)], 1), // Insert row 6 + ], + ); + + // Test 1: Seek exact match for uncommitted row 3 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + // Test 2: Seek GE for row 2 should find uncommitted row 3 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 3: Seek GT for row 5 should find uncommitted row 6 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(60)); + + // Test 4: Seek LE for row 6 should find uncommitted row 6 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(6), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + Ok(()) + } + + #[test] + fn test_seek_key_deleted_by_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete row 3 and 5 in uncommitted changes + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), // Delete row 3 + (5, vec![Value::Integer(5), Value::Integer(50)], -1), // Delete row 5 + ], + ); + + // Test 1: Seek exact match for deleted row 3 should not find it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek GE for row 2 should skip deleted row 3 and find row 7 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Test 3: Seek GT for row 1 should skip deleted rows and find row 7 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(1), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Test 4: Seek LE for row 5 should find row 1 (skipping deleted 3 and 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } + + #[test] + fn test_seek_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 3 (delete old + insert new) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), // Delete old row 3 + (3, vec![Value::Integer(3), Value::Integer(35)], 1), // Insert new row 3 + ], + ); + + // Test: Seek for updated row 3 should find it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + // The values should be from the uncommitted set (35 instead of 30) + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(35)); + + Ok(()) + } + + #[test] + fn test_seek_boundary_conditions() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 5, 10 + populate_test_table(&conn, vec![(5, 50), (10, 100)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // No uncommitted changes - tx_state is already empty + + // Test 1: Seek LT for minimum value (should find nothing) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(1), SeekOp::LT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek GT for maximum value (should find nothing) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(15), SeekOp::GT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 3: Seek exact for non-existent key + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(7), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + Ok(()) + } + + #[test] + fn test_seek_complex_uncommitted_weights() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with row 5 + populate_test_table(&conn, vec![(5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Complex uncommitted changes with multiple operations on same row + apply_changes_to_tx_state( + &tx_state, + vec![ + (5, vec![Value::Integer(5), Value::Integer(50)], -1), // Delete original + (5, vec![Value::Integer(5), Value::Integer(51)], 1), // Insert update 1 + (5, vec![Value::Integer(5), Value::Integer(51)], -1), // Delete update 1 + (5, vec![Value::Integer(5), Value::Integer(52)], 1), // Insert update 2 + // Net effect: row 5 exists with value 52 + ], + ); + + // Seek for row 5 should find it (net weight = 1 from btree + 0 from uncommitted = 1) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + // The final value should be 52 from the last update + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(52)); + + Ok(()) + } + + #[test] + fn test_seek_affected_by_transaction_state_changes() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1 and 3 + populate_test_table(&conn, vec![(1, 10), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Seek for row 2 - doesn't exist + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + // Add row 2 to uncommitted + tx_state.insert(2, vec![Value::Integer(2), Value::Integer(20)]); + + // Now seek for row 2 finds it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(20)); + + Ok(()) + } + + #[test] + fn test_rewind_btree_first_uncommitted_later() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 8, 10 (all larger than btree rows) + apply_changes_to_tx_state( + &tx_state, + vec![ + (8, vec![Value::Integer(8), Value::Integer(80)], 1), + (10, vec![Value::Integer(10), Value::Integer(100)], 1), + ], + ); + + // Initially cursor is not positioned + assert!(!cursor.is_valid()?); + + // Rewind should position at first btree row (1) since uncommitted are all larger + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } + + #[test] + fn test_rewind_with_uncommitted_first() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 5, 7 + populate_test_table(&conn, vec![(5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 2 (smaller than any btree row) + apply_changes_to_tx_state( + &tx_state, + vec![(2, vec![Value::Integer(2), Value::Integer(20)], 1)], + ); + + // Rewind should position at row 2 (uncommitted) + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(20)); + + Ok(()) + } + + #[test] + fn test_rewind_skip_deleted_first() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete row 1 in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![(1, vec![Value::Integer(1), Value::Integer(10)], -1)], + ); + + // Rewind should skip deleted row 1 and position at row 3 + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + Ok(()) + } + + #[test] + fn test_rewind_empty_btree_with_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows (no btree data) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), + (7, vec![Value::Integer(7), Value::Integer(70)], 1), + ], + ); + + // Rewind should find first uncommitted row + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + Ok(()) + } + + #[test] + fn test_rewind_all_deleted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4 + populate_test_table(&conn, vec![(2, 20), (4, 40)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete all rows in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (4, vec![Value::Integer(4), Value::Integer(40)], -1), + ], + ); + + // Rewind should find no valid rows + pager.io.block(|| cursor.rewind())?; + assert!(!cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, None); + + Ok(()) + } + + #[test] + fn test_rewind_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3 + populate_test_table(&conn, vec![(1, 10), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 1 (delete + insert with new value) + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), + (1, vec![Value::Integer(1), Value::Integer(15)], 1), + ], + ); + + // Rewind should position at row 1 with updated value + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(15)); + + Ok(()) + } + + // ===== NEXT() TEST SUITE ===== + + #[test] + fn test_next_btree_only_sequential() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Start with rewind to position at first row + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Next should move to row 3 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Next should move to row 5 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Next should move to row 7 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_uncommitted_only() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (no btree data) + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 2, 4, 6 + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], 1), + (4, vec![Value::Integer(4), Value::Integer(40)], 1), + (6, vec![Value::Integer(6), Value::Integer(60)], 1), + ], + ); + + // Start with rewind to position at first row + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + // Next should move to row 4 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + // Next should move to row 6 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_mixed_btree_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 5, 9 + populate_test_table(&conn, vec![(1, 10), (5, 50), (9, 90)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 3, 7 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), + (7, vec![Value::Integer(7), Value::Integer(70)], 1), + ], + ); + + // Should iterate in order: 1, 3, 5, 7, 9 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_skip_deleted_rows() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3, 4, 5 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30), (4, 40), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete rows 2 and 4 in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (4, vec![Value::Integer(4), Value::Integer(40)], -1), + ], + ); + + // Should iterate: 1, 3, 5 (skipping deleted 2 and 4) + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 3 (delete old + insert new) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), + (3, vec![Value::Integer(3), Value::Integer(35)], 1), + ], + ); + + // Should iterate all rows with updated values + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(35)); // Updated value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_from_uninitialized() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4 + populate_test_table(&conn, vec![(2, 20), (4, 40)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Cursor not positioned initially + assert!(!cursor.is_valid()?); + + // Next on uninitialized cursor should return false (matching BTreeCursor behavior) + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + // Position cursor with rewind first + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + // Now next should work + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_empty_table() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (empty table) + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Next on empty table should return false + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_all_deleted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete all rows + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (3, vec![Value::Integer(3), Value::Integer(30)], -1), + ], + ); + + // Next should find nothing + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_complex_interleaving() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4, 6, 8 + populate_test_table(&conn, vec![(2, 20), (4, 40), (6, 60), (8, 80)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Complex changes: + // - Insert row 1 + // - Delete row 2 + // - Insert row 3 + // - Update row 4 + // - Insert row 5 + // - Delete row 6 + // - Insert row 7 + // - Keep row 8 as-is + // - Insert row 9 + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], 1), // Insert 1 + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete 2 + (3, vec![Value::Integer(3), Value::Integer(30)], 1), // Insert 3 + (4, vec![Value::Integer(4), Value::Integer(40)], -1), // Delete old 4 + (4, vec![Value::Integer(4), Value::Integer(45)], 1), // Insert new 4 + (5, vec![Value::Integer(5), Value::Integer(50)], 1), // Insert 5 + (6, vec![Value::Integer(6), Value::Integer(60)], -1), // Delete 6 + (7, vec![Value::Integer(7), Value::Integer(70)], 1), // Insert 7 + (9, vec![Value::Integer(9), Value::Integer(90)], 1), // Insert 9 + ], + ); + + // Should iterate: 1, 3, 4(updated), 5, 7, 8, 9 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(45)); // Updated value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(8)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_after_seek() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7, 9 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70), (9, 90)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Seek to row 5 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Next should move to row 7 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Next should move to row 9 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_multiple_weights_same_row() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with row 1 + populate_test_table(&conn, vec![(1, 10)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Multiple operations on same row: + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), // Delete original + (1, vec![Value::Integer(1), Value::Integer(11)], 1), // Insert v1 + (1, vec![Value::Integer(1), Value::Integer(11)], -1), // Delete v1 + (1, vec![Value::Integer(1), Value::Integer(12)], 1), // Insert v2 + (1, vec![Value::Integer(1), Value::Integer(12)], -1), // Delete v2 + // Net weight: 1 (btree) - 1 + 1 - 1 + 1 - 1 = 0 (row deleted) + ], + ); + + // Row should be deleted + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_only_uncommitted_large_gaps() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (no btree data) + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows with large gaps + apply_changes_to_tx_state( + &tx_state, + vec![ + (100, vec![Value::Integer(100), Value::Integer(1000)], 1), + (500, vec![Value::Integer(500), Value::Integer(5000)], 1), + (999, vec![Value::Integer(999), Value::Integer(9990)], 1), + ], + ); + + // Should iterate through all with large gaps + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(100)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(500)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(999)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_multiple_updates_same_row_single_transaction() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Multiple successive updates to row 2 in the same transaction + // 20 -> 25 -> 28 -> 32 (final value should be 32) + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete original + (2, vec![Value::Integer(2), Value::Integer(25)], 1), // First update + (2, vec![Value::Integer(2), Value::Integer(25)], -1), // Delete first update + (2, vec![Value::Integer(2), Value::Integer(28)], 1), // Second update + (2, vec![Value::Integer(2), Value::Integer(28)], -1), // Delete second update + (2, vec![Value::Integer(2), Value::Integer(32)], 1), // Final update + ], + ); + + // Seek to row 2 should find the final value + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(32)); + + // Next through all rows to verify only final values are seen + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(10)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(32)); // Final value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_empty_materialized_view_with_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Don't populate any data - view is created but empty + // This tests a materialized view that was never populated + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows to empty materialized view + apply_changes_to_tx_state( + &tx_state, + vec![ + (5, vec![Value::Integer(5), Value::Integer(50)], 1), + (10, vec![Value::Integer(10), Value::Integer(100)], 1), + (15, vec![Value::Integer(15), Value::Integer(150)], 1), + ], + ); + + // Test seek on empty materialized view with uncommitted data + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(10), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + // Test GT seek + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(7), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + // Test rewind and next + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(15)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_exact_match_btree_uncommitted_same_rowid_different_values() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 3 with different value (not a delete+insert, just insert) + // This simulates a case where uncommitted has a new version of row 3 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(35)], 1), // New version with positive weight + ], + ); + + // Exact match seek for row 3 should find the uncommitted version (35) + // because when both exist with positive weight, uncommitted takes precedence + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // This test verifies which value we get when both btree and uncommitted + // have the same rowid with positive weights + // The expected behavior needs to be defined - typically uncommitted wins + // or they get merged based on the DBSP semantics + + Ok(()) + } + + #[test] + fn test_boundary_value_seeks() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with some normal values + populate_test_table(&conn, vec![(100, 1000), (200, 2000)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows at extreme positions + apply_changes_to_tx_state( + &tx_state, + vec![ + ( + i64::MIN + 1, + vec![Value::Integer(i64::MIN + 1), Value::Integer(-999)], + 1, + ), + ( + i64::MAX - 1, + vec![Value::Integer(i64::MAX - 1), Value::Integer(999)], + 1, + ), + ], + ); + + // Test 1: Seek GT with i64::MAX should find nothing + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MAX), SeekOp::GT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek LT with i64::MIN should find nothing + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MIN), SeekOp::LT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 3: Seek GE with i64::MAX - 1 should find our extreme row + let result = pager.io.block(|| { + cursor.seek( + SeekKey::TableRowId(i64::MAX - 1), + SeekOp::GE { eq_only: false }, + ) + })?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MAX - 1)); + + // Test 4: Seek LE with i64::MIN + 1 should find our extreme low row + let result = pager.io.block(|| { + cursor.seek( + SeekKey::TableRowId(i64::MIN + 1), + SeekOp::LE { eq_only: false }, + ) + })?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MIN + 1)); + + // Test 5: Seek GT from i64::MIN should find the smallest row + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MIN), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MIN + 1)); + + // Test 6: Seek LT from i64::MAX should find the largest row + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MAX), SeekOp::LT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MAX - 1)); + + Ok(()) + } + + #[test] + fn test_next_concurrent_btree_uncommitted_advance() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3, 4, 5 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30), (4, 40), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete some btree rows and add replacements in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete btree row 2 + (2, vec![Value::Integer(2), Value::Integer(25)], 1), // Replace with new value + (4, vec![Value::Integer(4), Value::Integer(40)], -1), // Delete btree row 4 + ], + ); + + // Should iterate: 1, 2(new), 3, 5 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(25)); // New value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_transaction_state_changes_mid_iteration() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Start iteration + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Move to next row + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Now add new uncommitted changes mid-iteration + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], 1), // Insert before current + (4, vec![Value::Integer(4), Value::Integer(40)], 1), // Insert after current + (6, vec![Value::Integer(6), Value::Integer(60)], 1), // Insert at end + ], + ); + + // Continue iteration - cursor continues from where it was, sees row 5 next + // (new changes are only visible after rewind/seek) + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // No more rows in original iteration + assert!(!pager.io.block(|| cursor.next())?); + + // Rewind and verify we see all rows including the newly added ones + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_rewind_after_failed_seek() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 2 + apply_changes_to_tx_state( + &tx_state, + vec![(2, vec![Value::Integer(2), Value::Integer(20)], 1)], + ); + + // Seek to non-existent row 4 with exact match + assert_eq!( + pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::GE { eq_only: true }))?, + SeekResult::NotFound + ); + assert!(!cursor.is_valid()?); + + // Rewind should work correctly after failed seek + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Verify we can iterate through all rows + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + // Try another failed seek (GT on maximum value) + assert_eq!( + pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GT))?, + SeekResult::NotFound + ); + assert!(!cursor.is_valid()?); + + // Rewind again + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } +} diff --git a/core/incremental/dbsp.rs b/core/incremental/dbsp.rs index 962ceb9c8..607fd562b 100644 --- a/core/incremental/dbsp.rs +++ b/core/incremental/dbsp.rs @@ -1,19 +1,86 @@ // Simplified DBSP integration for incremental view maintenance // For now, we'll use a basic approach and can expand to full DBSP later -use std::collections::HashMap; +use super::hashable_row::HashableRow; +use crate::Value; +use std::collections::{BTreeMap, HashMap}; + +type DeltaEntry = (HashableRow, isize); +/// A delta represents ordered changes to data +#[derive(Debug, Clone, Default)] +pub struct Delta { + /// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete + /// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete + + /// insert. If this is not ordered, it would be applied in arbitrary order and break the view. + pub changes: Vec, +} + +impl Delta { + pub fn new() -> Self { + Self { + changes: Vec::new(), + } + } + + pub fn insert(&mut self, row_key: i64, values: Vec) { + let row = HashableRow::new(row_key, values); + self.changes.push((row, 1)); + } + + pub fn delete(&mut self, row_key: i64, values: Vec) { + let row = HashableRow::new(row_key, values); + self.changes.push((row, -1)); + } + + pub fn is_empty(&self) -> bool { + self.changes.is_empty() + } + + pub fn len(&self) -> usize { + self.changes.len() + } + + /// Merge another delta into this one + /// This preserves the order of operations - no consolidation is done + /// to maintain the full history of changes + pub fn merge(&mut self, other: &Delta) { + // Simply append all changes from other, preserving order + self.changes.extend(other.changes.iter().cloned()); + } + + /// Consolidate changes by combining entries with the same HashableRow + pub fn consolidate(&mut self) { + if self.changes.is_empty() { + return; + } + + // Use a HashMap to accumulate weights + let mut consolidated: HashMap = HashMap::new(); + + for (row, weight) in self.changes.drain(..) { + *consolidated.entry(row).or_insert(0) += weight; + } + + // Convert back to vec, filtering out zero weights + self.changes = consolidated + .into_iter() + .filter(|(_, weight)| *weight != 0) + .collect(); + } +} /// A simplified ZSet for incremental computation /// Each element has a weight: positive for additions, negative for deletions #[derive(Clone, Debug, Default)] pub struct SimpleZSet { - data: HashMap, + data: BTreeMap, } -impl SimpleZSet { +#[allow(dead_code)] +impl SimpleZSet { pub fn new() -> Self { Self { - data: HashMap::new(), + data: BTreeMap::new(), } } @@ -45,36 +112,121 @@ impl SimpleZSet { self.insert(item.clone(), weight); } } -} -/// A simplified stream for incremental computation -#[derive(Clone, Debug)] -pub struct SimpleStream { - current: SimpleZSet, -} - -impl SimpleStream { - pub fn from_zset(zset: SimpleZSet) -> Self { - Self { current: zset } + /// Get the weight for a specific item (0 if not present) + pub fn get(&self, item: &T) -> isize { + self.data.get(item).copied().unwrap_or(0) } - /// Apply a delta (change) to the stream - pub fn apply_delta(&mut self, delta: &SimpleZSet) { - self.current.merge(delta); + /// Get the first element (smallest key) in the Z-set + pub fn first(&self) -> Option<(&T, isize)> { + self.data.iter().next().map(|(k, &v)| (k, v)) } - /// Get the current state as a vector of items (only positive weights) - pub fn to_vec(&self) -> Vec { - self.current.to_vec() + /// Get the last element (largest key) in the Z-set + pub fn last(&self) -> Option<(&T, isize)> { + self.data.iter().next_back().map(|(k, &v)| (k, v)) + } + + /// Get a range of elements + pub fn range(&self, range: R) -> impl Iterator + '_ + where + R: std::ops::RangeBounds, + { + self.data.range(range).map(|(k, &v)| (k, v)) + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Get the number of elements + pub fn len(&self) -> usize { + self.data.len() } } // Type aliases for convenience -use super::hashable_row::HashableRow; - pub type RowKey = HashableRow; pub type RowKeyZSet = SimpleZSet; -pub type RowKeyStream = SimpleStream; + +impl RowKeyZSet { + /// Create a Z-set from a Delta by consolidating all changes + pub fn from_delta(delta: &Delta) -> Self { + let mut zset = Self::new(); + + // Add all changes from the delta, consolidating as we go + for (row, weight) in &delta.changes { + zset.insert(row.clone(), *weight); + } + + zset + } + + /// Seek to find ALL entries for the best matching rowid + /// For GT/GE: returns all entries for the smallest rowid that satisfies the condition + /// For LT/LE: returns all entries for the largest rowid that satisfies the condition + /// Returns empty vec if no match found + pub fn seek(&self, target: i64, op: crate::types::SeekOp) -> Vec<(HashableRow, isize)> { + use crate::types::SeekOp; + + // First find the best matching rowid + let best_rowid = match op { + SeekOp::GT => { + // Find smallest rowid > target + self.data + .iter() + .filter(|(row, _)| row.rowid > target) + .map(|(row, _)| row.rowid) + .min() + } + SeekOp::GE { eq_only: false } => { + // Find smallest rowid >= target + self.data + .iter() + .filter(|(row, _)| row.rowid >= target) + .map(|(row, _)| row.rowid) + .min() + } + SeekOp::GE { eq_only: true } | SeekOp::LE { eq_only: true } => { + // Need exact match + if self.data.iter().any(|(row, _)| row.rowid == target) { + Some(target) + } else { + None + } + } + SeekOp::LT => { + // Find largest rowid < target + self.data + .iter() + .filter(|(row, _)| row.rowid < target) + .map(|(row, _)| row.rowid) + .max() + } + SeekOp::LE { eq_only: false } => { + // Find largest rowid <= target + self.data + .iter() + .filter(|(row, _)| row.rowid <= target) + .map(|(row, _)| row.rowid) + .max() + } + }; + + // Now get ALL entries with that rowid + match best_rowid { + Some(rowid) => self + .data + .iter() + .filter(|(row, _)| row.rowid == rowid) + .map(|(k, &v)| (k.clone(), v)) + .collect(), + None => Vec::new(), + } + } +} #[cfg(test)] mod tests { diff --git a/core/incremental/hashable_row.rs b/core/incremental/hashable_row.rs index 46be59bde..799f88e87 100644 --- a/core/incremental/hashable_row.rs +++ b/core/incremental/hashable_row.rs @@ -78,3 +78,23 @@ impl Hash for HashableRow { self.cached_hash.hash(state); } } + +impl PartialOrd for HashableRow { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for HashableRow { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // First compare by rowid, then by values if rowids are equal + // This ensures Ord is consistent with Eq (which compares all fields) + match self.rowid.cmp(&other.rowid) { + std::cmp::Ordering::Equal => { + // If rowids are equal, compare values to maintain consistency with Eq + self.values.cmp(&other.values) + } + other => other, + } + } +} diff --git a/core/incremental/mod.rs b/core/incremental/mod.rs index 4c26b91ba..755a27351 100644 --- a/core/incremental/mod.rs +++ b/core/incremental/mod.rs @@ -1,4 +1,5 @@ pub mod compiler; +pub mod cursor; pub mod dbsp; pub mod expr_compiler; pub mod hashable_row; diff --git a/core/incremental/operator.rs b/core/incremental/operator.rs index 4f148e943..46a933b87 100644 --- a/core/incremental/operator.rs +++ b/core/incremental/operator.rs @@ -2,15 +2,321 @@ // Operator DAG for DBSP-style incremental computation // Based on Feldera DBSP design but adapted for Turso's architecture +use crate::function::{AggFunc, Func}; +use crate::incremental::dbsp::Delta; use crate::incremental::expr_compiler::CompiledExpression; use crate::incremental::hashable_row::HashableRow; -use crate::types::Text; -use crate::{Connection, Database, SymbolTable, Value}; -use std::collections::{HashMap, HashSet}; +use crate::storage::btree::{BTreeCursor, BTreeKey}; +use crate::types::{IOResult, SeekKey, SeekOp, SeekResult, Text}; +use crate::{ + return_and_restore_if_io, return_if_io, Connection, Database, Result, SymbolTable, Value, +}; +use std::collections::{BTreeMap, HashMap}; use std::fmt::{self, Debug, Display}; -use std::sync::Arc; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use turso_macros::match_ignore_ascii_case; +use turso_parser::ast::{As, Expr, Literal, Name, OneSelect, Operator, ResultColumn}; + +#[derive(Debug)] +pub enum ReadRecord { + GetRecord, + Done { state: Option }, +} + +impl ReadRecord { + fn new() -> Self { + ReadRecord::GetRecord + } + + fn read_record( + &mut self, + key: SeekKey, + aggregates: &[AggregateFunction], + cursor: &mut BTreeCursor, + ) -> Result>> { + loop { + match self { + ReadRecord::GetRecord => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + *self = ReadRecord::Done { state: None }; + } else { + let record = return_if_io!(cursor.record()); + let r = record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found key {key:?} in aggregate storage but could not read record" + )) + })?; + let values = r.get_values(); + let blob = values[1].to_owned(); + + let (state, _group_key) = match blob { + Value::Blob(blob) => AggregateState::from_blob(&blob, aggregates) + .ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Cannot deserialize aggregate state {blob:?}", + )) + }), + _ => Err(crate::LimboError::ParseError( + "Value in aggregator not blob".to_string(), + )), + }?; + *self = ReadRecord::Done { state: Some(state) } + } + } + ReadRecord::Done { state } => return Ok(IOResult::Done(state.clone())), + } + } + } +} + +#[derive(Debug)] +pub(crate) enum WriteRecord { + GetRecord, + Delete { final_weight: isize }, + Insert { final_weight: isize }, + Done, +} +impl WriteRecord { + fn new() -> Self { + WriteRecord::GetRecord + } + + fn write_record( + &mut self, + key: SeekKey, + record: HashableRow, + weight: isize, + cursor: &mut BTreeCursor, + ) -> Result> { + loop { + match self { + WriteRecord::GetRecord => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + *self = WriteRecord::Insert { + final_weight: weight, + }; + } else { + let existing_record = return_if_io!(cursor.record()); + let r = existing_record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found key {key:?} in aggregate storage but could not read record" + )) + })?; + let values = r.get_values(); + // values[2] should contain the weight + let existing_weight = match values[2].to_owned() { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid weight value in aggregate storage for key {key:?}" + ))) + } + }; + let final_weight = existing_weight + weight; + if final_weight <= 0 { + *self = WriteRecord::Delete { final_weight } + } else { + *self = WriteRecord::Insert { final_weight } + } + } + } + WriteRecord::Delete { final_weight: _ } => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + return Err(crate::LimboError::InternalError(format!( + "record not found for {key:?}, but we had just GetRecord! Should not be possible" + ))); + } + // Done - row was deleted and weights cancel out. + // If we iniated the delete we will complete, so Done has to be set + // before so we don't come back here. + *self = WriteRecord::Done; + return_if_io!(cursor.delete()); + } + WriteRecord::Insert { final_weight } => { + return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + // Build the key and insert the record + let key_i64 = match key { + SeekKey::TableRowId(id) => id, + _ => { + return Err(crate::LimboError::InternalError( + "Expected TableRowId for aggregate storage".to_string(), + )) + } + }; + // Create the record values: key, blob, weight + let record_values = vec![ + Value::Integer(key_i64), + record.values[0].clone(), // The blob with serialized state + Value::Integer(*final_weight as i64), + ]; + + // Create an ImmutableRecord from the values + let immutable_record = crate::types::ImmutableRecord::from_values( + &record_values, + record_values.len(), + ); + let btree_key = BTreeKey::new_table_rowid(key_i64, Some(&immutable_record)); + + *self = WriteRecord::Done; + return_if_io!(cursor.insert(&btree_key)); + } + WriteRecord::Done => { + return Ok(IOResult::Done(())); + } + } + } + } +} + +type ComputedStates = HashMap, AggregateState)>; // group_key_str -> (group_key, state) +#[derive(Debug)] +enum AggregateCommitState { + Idle, + Eval { + eval_state: EvalState, + }, + PersistDelta { + delta: Delta, + computed_states: ComputedStates, + current_idx: usize, + write_record: WriteRecord, + }, + Done { + delta: Delta, + }, + Invalid, +} + +// eval() has uncommitted data, so it can't be a member attribute of the Operator. +// The state has to be kept by the caller +#[derive(Debug)] +pub enum EvalState { + Uninitialized, + Init { + delta: Delta, + }, + FetchData { + delta: Delta, // Keep original delta for merge operation + current_idx: usize, + groups_to_read: Vec<(String, Vec)>, // Changed to Vec for index-based access + existing_groups: HashMap, + old_values: HashMap>, + read_record_state: Box, + }, + Done, +} + +impl From for EvalState { + fn from(delta: Delta) -> Self { + EvalState::Init { delta } + } +} + +impl EvalState { + fn from_delta(delta: Delta) -> Self { + Self::Init { delta } + } + + fn delta_ref(&self) -> &Delta { + match self { + EvalState::Init { delta } => delta, + _ => panic!("delta_ref() can only be called when in Init state",), + } + } + pub fn extract_delta(&mut self) -> Delta { + match self { + EvalState::Init { delta } => { + let extracted = std::mem::take(delta); + *self = EvalState::Uninitialized; + extracted + } + _ => panic!("extract_delta() can only be called when in Init state"), + } + } + + fn advance(&mut self, groups_to_read: BTreeMap>) { + let delta = match self { + EvalState::Init { delta } => std::mem::take(delta), + _ => panic!("advance() can only be called when in Init state, current state: {self:?}"), + }; + + let _ = std::mem::replace( + self, + EvalState::FetchData { + delta, + current_idx: 0, + groups_to_read: groups_to_read.into_iter().collect(), // Convert BTreeMap to Vec + existing_groups: HashMap::new(), + old_values: HashMap::new(), + read_record_state: Box::new(ReadRecord::new()), + }, + ); + } + fn process_delta( + &mut self, + operator: &mut AggregateOperator, + cursor: &mut BTreeCursor, + ) -> Result> { + loop { + match self { + EvalState::Uninitialized => { + panic!("Cannot process_delta with Uninitialized state"); + } + EvalState::Init { .. } => { + panic!("State machine not supposed to reach the init state! advance() should have been called"); + } + EvalState::FetchData { + delta, + current_idx, + groups_to_read, + existing_groups, + old_values, + read_record_state, + } => { + if *current_idx >= groups_to_read.len() { + // All groups processed, compute final output + let result = + operator.merge_delta_with_existing(delta, existing_groups, old_values); + *self = EvalState::Done; + return Ok(IOResult::Done(result)); + } else { + // Get the current group to read + let (group_key_str, group_key) = &groups_to_read[*current_idx]; + + let seek_key = operator.generate_storage_key(group_key_str); + let key = SeekKey::TableRowId(seek_key); + + let state = return_if_io!(read_record_state.read_record( + key, + &operator.aggregates, + cursor + )); + + // Anything that mutates state has to happen after return_if_io! + // Unfortunately there's no good way to enforce that without turning + // this into a hot mess of mem::takes. + if let Some(state) = state { + let mut old_row = group_key.clone(); + old_row.extend(state.to_values(&operator.aggregates)); + old_values.insert(group_key_str.clone(), old_row); + existing_groups.insert(group_key_str.clone(), state.clone()); + } + + // All attributes mutated in place. + *current_idx += 1; + *read_record_state = Box::new(ReadRecord::new()); + } + } + EvalState::Done => { + return Ok(IOResult::Done((Delta::new(), HashMap::new()))); + } + } + } + } +} /// Tracks computation counts to verify incremental behavior (for tests now), and in the future /// should be used to provide statistics. @@ -56,69 +362,6 @@ impl ComputationTracker { } } -/// A delta represents ordered changes to data -#[derive(Debug, Clone, Default)] -pub struct Delta { - /// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete - /// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete + - /// insert. If this is not ordered, it would be applied in arbitrary order and break the view. - pub changes: Vec<(HashableRow, isize)>, -} - -impl Delta { - pub fn new() -> Self { - Self { - changes: Vec::new(), - } - } - - pub fn insert(&mut self, row_key: i64, values: Vec) { - let row = HashableRow::new(row_key, values); - self.changes.push((row, 1)); - } - - pub fn delete(&mut self, row_key: i64, values: Vec) { - let row = HashableRow::new(row_key, values); - self.changes.push((row, -1)); - } - - pub fn is_empty(&self) -> bool { - self.changes.is_empty() - } - - pub fn len(&self) -> usize { - self.changes.len() - } - - /// Merge another delta into this one - /// This preserves the order of operations - no consolidation is done - /// to maintain the full history of changes - pub fn merge(&mut self, other: &Delta) { - // Simply append all changes from other, preserving order - self.changes.extend(other.changes.iter().cloned()); - } - - /// Consolidate changes by combining entries with the same HashableRow - pub fn consolidate(&mut self) { - if self.changes.is_empty() { - return; - } - - // Use a HashMap to accumulate weights - let mut consolidated: HashMap = HashMap::new(); - - for (row, weight) in self.changes.drain(..) { - *consolidated.entry(row).or_insert(0) += weight; - } - - // Convert back to vec, filtering out zero weights - self.changes = consolidated - .into_iter() - .filter(|(_, weight)| *weight != 0) - .collect(); - } -} - #[cfg(test)] mod hashable_row_tests { use super::*; @@ -240,8 +483,6 @@ impl FilterPredicate { /// Parse a SQL AST expression into a FilterPredicate /// This centralizes all SQL-to-predicate parsing logic pub fn from_sql_expr(expr: &turso_parser::ast::Expr) -> crate::Result { - use turso_parser::ast::*; - let Expr::Binary(lhs, op, rhs) = expr else { return Err(crate::LimboError::ParseError( "Unsupported WHERE clause for incremental views: not a binary expression" @@ -323,8 +564,6 @@ impl FilterPredicate { /// Parse a WHERE clause from a SELECT statement pub fn from_select(select: &turso_parser::ast::Select) -> crate::Result { - use turso_parser::ast::*; - if let OneSelect::Select { ref where_clause, .. } = select.body.select @@ -391,8 +630,6 @@ impl AggregateFunction { func: &crate::function::Func, input_column: Option, ) -> Option { - use crate::function::{AggFunc, Func}; - match func { Func::Agg(agg_func) => { match agg_func { @@ -412,34 +649,77 @@ impl AggregateFunction { /// Operator DAG (Directed Acyclic Graph) /// Base trait for incremental operators pub trait IncrementalOperator: Debug { - /// Initialize with base data - fn initialize(&mut self, data: Delta); - - /// Evaluate the operator with a delta, without modifying internal state - /// This is used during query execution to compute results including uncommitted changes + /// Evaluate the operator with a state, without modifying internal state + /// This is used during query execution to compute results + /// May need to read from storage to get current state (e.g., for aggregates) /// /// # Arguments - /// * `delta` - The committed delta to process - /// * `uncommitted` - Optional uncommitted changes from the current transaction - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta; + /// * `state` - The evaluation state (may be in progress from a previous I/O operation) + /// * `cursor` - Cursor for reading operator state from storage + /// + /// # Returns + /// The output delta from the evaluation + fn eval(&mut self, state: &mut EvalState, cursor: &mut BTreeCursor) -> Result>; /// Commit a delta to the operator's internal state and return the output /// This is called when a transaction commits, making changes permanent /// Returns the output delta (what downstream operators should see) - fn commit(&mut self, delta: Delta) -> Delta; - - /// Get current accumulated state - fn get_current_state(&self) -> Delta; + /// The cursor parameter is for operators that need to persist state + fn commit(&mut self, delta: Delta, cursor: &mut BTreeCursor) -> Result>; /// Set computation tracker fn set_tracker(&mut self, tracker: Arc>); } +/// Input operator - passes through input data unchanged +/// This operator is used for input nodes in the circuit to provide a uniform interface +#[derive(Debug)] +pub struct InputOperator { + name: String, +} + +impl InputOperator { + pub fn new(name: String) -> Self { + Self { name } + } + + pub fn name(&self) -> &str { + &self.name + } +} + +impl IncrementalOperator for InputOperator { + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + match state { + EvalState::Init { delta } => { + let output = std::mem::take(delta); + *state = EvalState::Done; + Ok(IOResult::Done(output)) + } + _ => unreachable!( + "InputOperator doesn't execute the state machine. Should be in Init state" + ), + } + } + + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { + // Input operator passes through the delta unchanged during commit + Ok(IOResult::Done(delta)) + } + + fn set_tracker(&mut self, _tracker: Arc>) { + // Input operator doesn't need tracking + } +} + /// Filter operator - filters rows based on predicate #[derive(Debug)] pub struct FilterOperator { predicate: FilterPredicate, - current_state: Delta, column_names: Vec, tracker: Option>>, } @@ -448,7 +728,6 @@ impl FilterOperator { pub fn new(predicate: FilterPredicate, column_names: Vec) -> Self { Self { predicate, - current_state: Delta::new(), column_names, tracker: None, } @@ -549,33 +828,22 @@ impl FilterOperator { } impl IncrementalOperator for FilterOperator { - fn initialize(&mut self, data: Delta) { - // Process initial data through filter - for (row, weight) in data.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_filter(); - } - - if self.evaluate_predicate(&row.values) { - self.current_state.changes.push((row, weight)); - } - } - } - - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - let mut output_delta = Delta::new(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + let delta = match state { + EvalState::Init { delta } => std::mem::take(delta), + _ => unreachable!( + "FilterOperator doesn't execute the state machine. Should be in Init state" + ), }; - // Process the combined delta through the filter - for (row, weight) in combined_delta.changes { + let mut output_delta = Delta::new(); + + // Process the delta through the filter + for (row, weight) in delta.changes { if let Some(tracker) = &self.tracker { tracker.lock().unwrap().record_filter(); } @@ -588,10 +856,11 @@ impl IncrementalOperator for FilterOperator { } } - output_delta + *state = EvalState::Done; + Ok(IOResult::Done(output_delta)) } - fn commit(&mut self, delta: Delta) -> Delta { + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { let mut output_delta = Delta::new(); // Commit the delta to our internal state @@ -605,19 +874,11 @@ impl IncrementalOperator for FilterOperator { // For deletes, this means the row was in the view (its values pass the filter) // For inserts, this means the row should be in the view if self.evaluate_predicate(&row.values) { - self.current_state.changes.push((row.clone(), weight)); output_delta.changes.push((row, weight)); } } - output_delta - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated + Ok(IOResult::Done(output_delta)) } fn set_tracker(&mut self, tracker: Arc>) { @@ -631,7 +892,6 @@ pub struct ProjectOperator { columns: Vec, input_column_names: Vec, output_column_names: Vec, - current_state: Delta, tracker: Option>>, // Internal in-memory connection for expression evaluation // Programs are very dependent on having a connection, so give it one. @@ -652,7 +912,6 @@ impl std::fmt::Debug for ProjectOperator { .field("columns", &self.columns) .field("input_column_names", &self.input_column_names) .field("output_column_names", &self.output_column_names) - .field("current_state", &self.current_state) .field("tracker", &self.tracker) .finish_non_exhaustive() } @@ -665,8 +924,6 @@ impl ProjectOperator { input_column_names: Vec, schema: &crate::schema::Schema, ) -> crate::Result { - use turso_parser::ast::*; - // Set up internal connection for expression evaluation let io = Arc::new(crate::MemoryIO::new()); let db = Database::open_file( @@ -769,7 +1026,6 @@ impl ProjectOperator { columns, input_column_names, output_column_names, - current_state: Delta::new(), tracker: None, internal_conn, }) @@ -809,7 +1065,6 @@ impl ProjectOperator { columns, input_column_names, output_column_names, - current_state: Delta::new(), tracker: None, internal_conn, }) @@ -839,7 +1094,6 @@ impl ProjectOperator { } fn evaluate_expression(&self, expr: &turso_parser::ast::Expr, values: &[Value]) -> Value { - use turso_parser::ast::*; match expr { Expr::Id(name) => { if let Some(idx) = self @@ -970,44 +1224,35 @@ impl ProjectOperator { } impl IncrementalOperator for ProjectOperator { - fn initialize(&mut self, data: Delta) { - for (row, weight) in &data.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_project(); - } - - let projected = self.project_values(&row.values); - let projected_row = HashableRow::new(row.rowid, projected); - self.current_state.changes.push((projected_row, *weight)); - } - } - - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - let mut output_delta = Delta::new(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + let delta = match state { + EvalState::Init { delta } => std::mem::take(delta), + _ => unreachable!( + "ProjectOperator doesn't execute the state machine. Should be in Init state" + ), }; - for (row, weight) in &combined_delta.changes { + let mut output_delta = Delta::new(); + + for (row, weight) in delta.changes { if let Some(tracker) = &self.tracker { tracker.lock().unwrap().record_project(); } let projected = self.project_values(&row.values); let projected_row = HashableRow::new(row.rowid, projected); - output_delta.changes.push((projected_row, *weight)); + output_delta.changes.push((projected_row, weight)); } - output_delta + *state = EvalState::Done; + Ok(IOResult::Done(output_delta)) } - fn commit(&mut self, delta: Delta) -> Delta { + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { let mut output_delta = Delta::new(); // Commit the delta to our internal state and build output @@ -1017,20 +1262,10 @@ impl IncrementalOperator for ProjectOperator { } let projected = self.project_values(&row.values); let projected_row = HashableRow::new(row.rowid, projected); - self.current_state - .changes - .push((projected_row.clone(), *weight)); output_delta.changes.push((projected_row, *weight)); } - output_delta - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated + Ok(crate::types::IOResult::Done(output_delta)) } fn set_tracker(&mut self, tracker: Arc>) { @@ -1040,28 +1275,29 @@ impl IncrementalOperator for ProjectOperator { /// Aggregate operator - performs incremental aggregation with GROUP BY /// Maintains running totals/counts that are updated incrementally -#[derive(Debug, Clone)] +/// +/// Note that the AggregateOperator essentially implements a ZSet, even +/// though the ZSet structure is never used explicitly. The on-disk btree +/// plays the role of the set! +#[derive(Debug)] pub struct AggregateOperator { + // Unique operator ID for indexing in persistent storage + operator_id: usize, // GROUP BY columns group_by: Vec, // Aggregate functions to compute aggregates: Vec, // Column names from input pub input_column_names: Vec, - // Aggregation state: group_key_str -> aggregate values - // For each group, we store the aggregate results - // We use String representation of group keys since Value doesn't implement Hash - group_states: HashMap, - // Map to keep track of actual group key values for output - group_key_values: HashMap>, - // Current output state as a Delta - current_state: Delta, tracker: Option>>, + + // State machine for commit operation + commit_state: AggregateCommitState, } /// State for a single group's aggregates #[derive(Debug, Clone)] -struct AggregateState { +pub struct AggregateState { // For COUNT: just the count count: i64, // For SUM: column_name -> sum value @@ -1081,6 +1317,158 @@ impl AggregateState { } } + // Serialize the aggregate state to a binary blob including group key values + // The reason we serialize it like this, instead of just writing the actual values, is that + // The same table may have different aggregators in the circuit. They will all have different + // columns. + fn to_blob(&self, aggregates: &[AggregateFunction], group_key: &[Value]) -> Vec { + let mut blob = Vec::new(); + + // Write version byte for future compatibility + blob.push(1u8); + + // Write number of group key values + blob.extend_from_slice(&(group_key.len() as u32).to_le_bytes()); + + // Write each group key value + for value in group_key { + // Write value type tag + match value { + Value::Null => blob.push(0u8), + Value::Integer(i) => { + blob.push(1u8); + blob.extend_from_slice(&i.to_le_bytes()); + } + Value::Float(f) => { + blob.push(2u8); + blob.extend_from_slice(&f.to_le_bytes()); + } + Value::Text(s) => { + blob.push(3u8); + let text_str = s.as_str(); + let bytes = text_str.as_bytes(); + blob.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); + blob.extend_from_slice(bytes); + } + Value::Blob(b) => { + blob.push(4u8); + blob.extend_from_slice(&(b.len() as u32).to_le_bytes()); + blob.extend_from_slice(b); + } + } + } + + // Write count as 8 bytes (little-endian) + blob.extend_from_slice(&self.count.to_le_bytes()); + + // Write each aggregate's state + for agg in aggregates { + match agg { + AggregateFunction::Sum(col_name) => { + let sum = self.sums.get(col_name).copied().unwrap_or(0.0); + blob.extend_from_slice(&sum.to_le_bytes()); + } + AggregateFunction::Avg(col_name) => { + let (sum, count) = self.avgs.get(col_name).copied().unwrap_or((0.0, 0)); + blob.extend_from_slice(&sum.to_le_bytes()); + blob.extend_from_slice(&count.to_le_bytes()); + } + AggregateFunction::Count => { + // Count is already written above + } + } + } + + blob + } + + /// Deserialize aggregate state from a binary blob + /// Returns the aggregate state and the group key values + fn from_blob(blob: &[u8], aggregates: &[AggregateFunction]) -> Option<(Self, Vec)> { + let mut cursor = 0; + + // Check version byte + if blob.get(cursor) != Some(&1u8) { + return None; + } + cursor += 1; + + // Read number of group key values + let num_group_keys = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + + // Read group key values + let mut group_key = Vec::new(); + for _ in 0..num_group_keys { + let value_type = *blob.get(cursor)?; + cursor += 1; + + let value = match value_type { + 0 => Value::Null, + 1 => { + let i = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + Value::Integer(i) + } + 2 => { + let f = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + Value::Float(f) + } + 3 => { + let len = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + let bytes = blob.get(cursor..cursor + len)?; + cursor += len; + let text_str = std::str::from_utf8(bytes).ok()?; + Value::Text(text_str.to_string().into()) + } + 4 => { + let len = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + let bytes = blob.get(cursor..cursor + len)?; + cursor += len; + Value::Blob(bytes.to_vec()) + } + _ => return None, + }; + group_key.push(value); + } + + // Read count + let count = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + + let mut state = Self::new(); + state.count = count; + + // Read each aggregate's state + for agg in aggregates { + match agg { + AggregateFunction::Sum(col_name) => { + let sum = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + state.sums.insert(col_name.clone(), sum); + } + AggregateFunction::Avg(col_name) => { + let sum = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + let count = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + state.avgs.insert(col_name.clone(), (sum, count)); + } + AggregateFunction::Count => { + // Count was already read above + } + } + } + + Some((state, group_key)) + } + /// Apply a delta to this aggregate state fn apply_delta( &mut self, @@ -1168,25 +1556,146 @@ impl AggregateState { impl AggregateOperator { pub fn new( + operator_id: usize, group_by: Vec, aggregates: Vec, input_column_names: Vec, ) -> Self { Self { + operator_id, group_by, aggregates, input_column_names, - group_states: HashMap::new(), - group_key_values: HashMap::new(), - current_state: Delta::new(), tracker: None, + commit_state: AggregateCommitState::Idle, } } + fn eval_internal( + &mut self, + state: &mut EvalState, + cursor: &mut BTreeCursor, + ) -> Result> { + match state { + EvalState::Uninitialized => { + panic!("Cannot eval AggregateOperator with Uninitialized state"); + } + EvalState::Init { delta } => { + if delta.changes.is_empty() { + *state = EvalState::Done; + return Ok(IOResult::Done((Delta::new(), HashMap::new()))); + } + + let mut groups_to_read = BTreeMap::new(); + for (row, _weight) in &delta.changes { + // Extract group key using cloned fields + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + groups_to_read.insert(group_key_str, group_key); + } + state.advance(groups_to_read); + } + EvalState::FetchData { .. } => { + // Already in progress, continue processing on process_delta below. + } + EvalState::Done => { + panic!("unreachable state! should have returned"); + } + } + + // Process the delta through the state machine + let result = return_if_io!(state.process_delta(self, cursor)); + Ok(IOResult::Done(result)) + } + + fn merge_delta_with_existing( + &mut self, + delta: &Delta, + existing_groups: &mut HashMap, + old_values: &mut HashMap>, + ) -> (Delta, HashMap, AggregateState)>) { + let mut output_delta = Delta::new(); + let mut temp_keys: HashMap> = HashMap::new(); + + // Process each change in the delta + for (row, weight) in &delta.changes { + if let Some(tracker) = &self.tracker { + tracker.lock().unwrap().record_aggregation(); + } + + // Extract group key + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + + let state = existing_groups + .entry(group_key_str.clone()) + .or_insert_with(AggregateState::new); + + temp_keys.insert(group_key_str.clone(), group_key.clone()); + + // Apply the delta to the temporary state + state.apply_delta( + &row.values, + *weight, + &self.aggregates, + &self.input_column_names, + ); + } + + // Generate output delta from temporary states and collect final states + let mut final_states = HashMap::new(); + + for (group_key_str, state) in existing_groups { + let group_key = temp_keys.get(group_key_str).cloned().unwrap_or_default(); + + // Generate a unique rowid for this group + let result_key = self.generate_group_rowid(group_key_str); + + if let Some(old_row_values) = old_values.get(group_key_str) { + let old_row = HashableRow::new(result_key, old_row_values.clone()); + output_delta.changes.push((old_row, -1)); + } + + // Always store the state for persistence (even if count=0, we need to delete it) + final_states.insert(group_key_str.clone(), (group_key.clone(), state.clone())); + + // Only include groups with count > 0 in the output delta + if state.count > 0 { + // Build output row: group_by columns + aggregate values + let mut output_values = group_key.clone(); + output_values.extend(state.to_values(&self.aggregates)); + + let output_row = HashableRow::new(result_key, output_values); + output_delta.changes.push((output_row, 1)); + } + } + (output_delta, final_states) + } + pub fn set_tracker(&mut self, tracker: Arc>) { self.tracker = Some(tracker); } + /// Generate a rowid for a group + /// For no GROUP BY: always returns 0 + /// For GROUP BY: returns a hash of the group key string + fn generate_group_rowid(&self, group_key_str: &str) -> i64 { + if self.group_by.is_empty() { + 0 + } else { + group_key_str + .bytes() + .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)) + } + } + + /// Generate the composite key for BTree storage + /// Combines operator_id and group hash + fn generate_storage_key(&self, group_key_str: &str) -> i64 { + let group_hash = self.generate_group_rowid(group_key_str); + (self.operator_id as i64) << 32 | (group_hash & 0xFFFFFFFF) + } + /// Extract group key values from a row fn extract_group_key(&self, values: &[Value]) -> Vec { let mut key = Vec::new(); @@ -1214,215 +1723,102 @@ impl AggregateOperator { .join(",") } - /// Process a delta and update aggregate state incrementally - pub fn process_delta(&mut self, delta: Delta) -> Delta { - let mut output_delta = Delta::new(); - - // Track which groups were modified and their old values - let mut modified_groups = HashSet::new(); - let mut old_values: HashMap> = HashMap::new(); - - // Process each change in the delta - for (row, weight) in &delta.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_aggregation(); - } - - // Extract group key - let group_key = self.extract_group_key(&row.values); - let group_key_str = Self::group_key_to_string(&group_key); - - // Store old aggregate values BEFORE applying the delta - // (only for the first time we see this group in this batch) - if !modified_groups.contains(&group_key_str) { - if let Some(state) = self.group_states.get(&group_key_str) { - let mut old_row = group_key.clone(); - old_row.extend(state.to_values(&self.aggregates)); - old_values.insert(group_key_str.clone(), old_row); - } - } - - modified_groups.insert(group_key_str.clone()); - - // Store the actual group key values - self.group_key_values - .insert(group_key_str.clone(), group_key.clone()); - - // Get or create aggregate state for this group - let state = self - .group_states - .entry(group_key_str.clone()) - .or_insert_with(AggregateState::new); - - // Apply the delta to the aggregate state - state.apply_delta( - &row.values, - *weight, - &self.aggregates, - &self.input_column_names, - ); - } - - // Generate output delta for modified groups - for group_key_str in modified_groups { - // Get the actual group key values - let group_key = self - .group_key_values - .get(&group_key_str) - .cloned() - .unwrap_or_default(); - - // Generate a unique key for this group - // We use a hash of the group key to ensure consistency - let result_key = group_key_str - .bytes() - .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)); - - // Emit retraction for old value if it existed - if let Some(old_row_values) = old_values.get(&group_key_str) { - let old_row = HashableRow::new(result_key, old_row_values.clone()); - output_delta.changes.push((old_row.clone(), -1)); - // Also remove from current state - self.current_state.changes.push((old_row, -1)); - } - - if let Some(state) = self.group_states.get(&group_key_str) { - // Build output row: group_by columns + aggregate values - let mut output_values = group_key.clone(); - output_values.extend(state.to_values(&self.aggregates)); - - // Check if group should be removed (count is 0) - if state.count > 0 { - // Add to output delta with positive weight - let output_row = HashableRow::new(result_key, output_values.clone()); - output_delta.changes.push((output_row.clone(), 1)); - - // Update current state - self.current_state.changes.push((output_row, 1)); - } else { - // Group has count=0, remove from state - // (we already emitted the retraction above if needed) - self.group_states.remove(&group_key_str); - self.group_key_values.remove(&group_key_str); - } - } - } - - // Consolidate current state to handle removals - self.current_state.consolidate(); - - output_delta + fn seek_key_from_str(&self, group_key_str: &str) -> SeekKey { + // Calculate the composite key for seeking + let key_i64 = self.generate_storage_key(group_key_str); + SeekKey::TableRowId(key_i64) } - pub fn get_current_state(&self) -> &Delta { - &self.current_state + fn seek_key(&self, row: HashableRow) -> SeekKey { + // Extract group key for first row + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + self.seek_key_from_str(&group_key_str) } } impl IncrementalOperator for AggregateOperator { - fn initialize(&mut self, data: Delta) { - // Process all initial data - this modifies state during initialization - let _ = self.process_delta(data); + fn eval(&mut self, state: &mut EvalState, cursor: &mut BTreeCursor) -> Result> { + let (delta, _) = return_if_io!(self.eval_internal(state, cursor)); + Ok(IOResult::Done(delta)) } - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - // Clone the current state to work with temporarily - let mut temp_group_states = self.group_states.clone(); - let mut temp_group_key_values = self.group_key_values.clone(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta - }; - - let mut output_delta = Delta::new(); - let mut modified_groups = HashSet::new(); - let mut old_values: HashMap> = HashMap::new(); - - // Process each change in the combined delta using temporary state - for (row, weight) in &combined_delta.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_aggregation(); - } - - // Extract group key - let group_key = self.extract_group_key(&row.values); - let group_key_str = Self::group_key_to_string(&group_key); - - // Store old aggregate values BEFORE applying the delta - if !modified_groups.contains(&group_key_str) { - if let Some(state) = temp_group_states.get(&group_key_str) { - let mut old_row = group_key.clone(); - old_row.extend(state.to_values(&self.aggregates)); - old_values.insert(group_key_str.clone(), old_row); + fn commit(&mut self, delta: Delta, cursor: &mut BTreeCursor) -> Result> { + loop { + // Note: because we std::mem::replace here (without it, the borrow checker goes nuts, + // because we call self.eval_interval, which requires a mutable borrow), we have to + // restore the state if we return I/O. So we can't use return_if_io! + let mut state = + std::mem::replace(&mut self.commit_state, AggregateCommitState::Invalid); + match &mut state { + AggregateCommitState::Invalid => { + panic!("Reached invalid state! State was replaced, and not replaced back"); } - } + AggregateCommitState::Idle => { + let eval_state = EvalState::from_delta(delta.clone()); + self.commit_state = AggregateCommitState::Eval { eval_state }; + } + AggregateCommitState::Eval { ref mut eval_state } => { + let (output_delta, computed_states) = return_and_restore_if_io!( + &mut self.commit_state, + state, + self.eval_internal(eval_state, cursor) + ); + self.commit_state = AggregateCommitState::PersistDelta { + delta: output_delta, + computed_states, + current_idx: 0, + write_record: WriteRecord::new(), + }; + } + AggregateCommitState::PersistDelta { + delta, + computed_states, + current_idx, + write_record, + } => { + let states_vec: Vec<_> = computed_states.iter().collect(); - modified_groups.insert(group_key_str.clone()); - temp_group_key_values.insert(group_key_str.clone(), group_key.clone()); + if *current_idx >= states_vec.len() { + self.commit_state = AggregateCommitState::Done { + delta: delta.clone(), + }; + } else { + let (group_key_str, (group_key, agg_state)) = states_vec[*current_idx]; - // Get or create aggregate state for this group in temporary state - let state = temp_group_states - .entry(group_key_str.clone()) - .or_insert_with(AggregateState::new); + let seek_key = self.seek_key_from_str(group_key_str); - // Apply the delta to the temporary aggregate state - state.apply_delta( - &row.values, - *weight, - &self.aggregates, - &self.input_column_names, - ); - } + // Determine weight: -1 to delete (cancels existing weight=1), 1 to insert/update + let weight = if agg_state.count == 0 { -1 } else { 1 }; - // Generate output delta for modified groups using temporary state - for group_key_str in modified_groups { - let group_key = temp_group_key_values - .get(&group_key_str) - .cloned() - .unwrap_or_default(); + // Serialize the aggregate state with group key (even for deletion, we need a row) + let state_blob = agg_state.to_blob(&self.aggregates, group_key); + let blob_row = HashableRow::new(0, vec![Value::Blob(state_blob)]); - // Generate a unique key for this group - let result_key = group_key_str - .bytes() - .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)); + return_and_restore_if_io!( + &mut self.commit_state, + state, + write_record.write_record(seek_key, blob_row, weight, cursor) + ); - // Emit retraction for old value if it existed - if let Some(old_row_values) = old_values.get(&group_key_str) { - let old_row = HashableRow::new(result_key, old_row_values.clone()); - output_delta.changes.push((old_row, -1)); - } + let delta = std::mem::take(delta); + let computed_states = std::mem::take(computed_states); - if let Some(state) = temp_group_states.get(&group_key_str) { - // Build output row: group_by columns + aggregate values - let mut output_values = group_key.clone(); - output_values.extend(state.to_values(&self.aggregates)); - - // Check if group should be included (count > 0) - if state.count > 0 { - let output_row = HashableRow::new(result_key, output_values); - output_delta.changes.push((output_row, 1)); + self.commit_state = AggregateCommitState::PersistDelta { + delta, + computed_states, + current_idx: *current_idx + 1, + write_record: WriteRecord::new(), // Reset for next write + }; + } + } + AggregateCommitState::Done { delta } => { + self.commit_state = AggregateCommitState::Idle; + let delta = std::mem::take(delta); + return Ok(IOResult::Done(delta)); } } } - - output_delta - } - - fn commit(&mut self, delta: Delta) -> Delta { - // Actually update the internal state when committing and return the output - self.process_delta(delta) - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated } fn set_tracker(&mut self, tracker: Arc>) { @@ -1433,10 +1829,101 @@ impl IncrementalOperator for AggregateOperator { #[cfg(test)] mod tests { use super::*; + use crate::storage::pager::CreateBTreeFlags; use crate::types::Text; + use crate::util::IOExt; use crate::Value; + use crate::{Database, MemoryIO, IO}; use std::sync::{Arc, Mutex}; + /// Create a test pager for operator tests + fn create_test_pager() -> (std::rc::Rc, usize) { + let io: Arc = Arc::new(MemoryIO::new()); + let db = Database::open_file(io.clone(), ":memory:", false, false).unwrap(); + let conn = db.connect().unwrap(); + + let pager = conn.pager.borrow().clone(); + + // Allocate page 1 first (database header) + let _ = pager.io.block(|| pager.allocate_page1()); + + // Properly create a BTree for aggregate state using the pager API + let root_page_id = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .expect("Failed to create BTree for aggregate state") + as usize; + + (pager, root_page_id) + } + + /// Read the current state from the BTree (for testing) + /// Returns a Delta with all the current aggregate values + fn get_current_state_from_btree( + agg: &AggregateOperator, + pager: &std::rc::Rc, + cursor: &mut BTreeCursor, + ) -> Delta { + let mut result = Delta::new(); + + // Rewind to start of table + pager.io.block(|| cursor.rewind()).unwrap(); + + loop { + // Check if cursor is empty (no more rows) + if cursor.is_empty() { + break; + } + + // Get the record at this position + let record = pager + .io + .block(|| cursor.record()) + .unwrap() + .unwrap() + .to_owned(); + + let values_ref = record.get_values(); + let values: Vec = values_ref.into_iter().map(|x| x.to_owned()).collect(); + + // Check if this record belongs to our operator + if let Some(Value::Integer(key)) = values.first() { + let operator_part = (key >> 32) as usize; + + // Skip if not our operator + if operator_part != agg.operator_id { + pager.io.block(|| cursor.next()).unwrap(); + continue; + } + + // Get the blob data + if let Some(Value::Blob(blob)) = values.get(1) { + // Deserialize the state + if let Some((state, group_key)) = + AggregateState::from_blob(blob, &agg.aggregates) + { + // Should not have made it this far. + assert!(state.count != 0); + // Build output row: group_by columns + aggregate values + let mut output_values = group_key.clone(); + output_values.extend(state.to_values(&agg.aggregates)); + + let group_key_str = AggregateOperator::group_key_to_string(&group_key); + let rowid = agg.generate_group_rowid(&group_key_str); + + let output_row = HashableRow::new(rowid, output_values); + result.changes.push((output_row, 1)); + } + } + } + + pager.io.block(|| cursor.next()).unwrap(); + } + + result.consolidate(); + result + } + /// Assert that we're doing incremental work, not full recomputation fn assert_incremental(tracker: &ComputationTracker, expected_ops: usize, data_size: usize) { assert!( @@ -1464,8 +1951,13 @@ mod tests { // the operator emits both a retraction (-1) of the old value // and an insertion (+1) of the new value. + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + // Create an aggregate operator for SUM(age) with no GROUP BY let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec![], // No GROUP BY vec![AggregateFunction::Sum("age".to_string())], vec!["id".to_string(), "name".to_string(), "age".to_string()], @@ -1499,10 +1991,13 @@ mod tests { ); // Initialize with initial data - agg.initialize(initial_delta); + pager + .io + .block(|| agg.commit(initial_delta.clone(), &mut cursor)) + .unwrap(); // Verify initial state: SUM(age) = 25 + 30 + 35 = 90 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 1, "Should have one aggregate row"); let (row, weight) = &state.changes[0]; assert_eq!(*weight, 1, "Aggregate row should have weight 1"); @@ -1520,8 +2015,10 @@ mod tests { ); // Process the incremental update - let output_delta = agg.eval(update_delta.clone(), None); - agg.commit(update_delta); + let output_delta = pager + .io + .block(|| agg.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // CRITICAL: The output delta should contain TWO changes: // 1. Retraction of old aggregate value (90) with weight -1 @@ -1568,7 +2065,12 @@ mod tests { // the operator emits both retractions and insertions correctly for each group. // Create an aggregate operator for SUM(score) GROUP BY team + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["team".to_string()], // GROUP BY team vec![AggregateFunction::Sum("score".to_string())], vec![ @@ -1610,10 +2112,13 @@ mod tests { ); // Initialize with initial data - agg.initialize(initial_delta); + pager + .io + .block(|| agg.commit(initial_delta.clone(), &mut cursor)) + .unwrap(); // Verify initial state: red team = 30, blue team = 15 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2, "Should have two groups"); // Find the red and blue team aggregates @@ -1653,8 +2158,10 @@ mod tests { ); // Process the incremental update - let output_delta = agg.eval(update_delta.clone(), None); - agg.commit(update_delta); + let output_delta = pager + .io + .block(|| agg.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // Should have 2 changes: retraction of old red team sum, insertion of new red team sum // Blue team should NOT be affected @@ -1703,8 +2210,13 @@ mod tests { fn test_count_increments_not_recounts() { let tracker = Arc::new(Mutex::new(ComputationTracker::new())); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + // Create COUNT(*) GROUP BY category let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![AggregateFunction::Count], vec![ @@ -1728,7 +2240,10 @@ mod tests { ], ); } - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Reset tracker for delta processing tracker.lock().unwrap().aggregation_updates = 0; @@ -1744,15 +2259,15 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); - // Should update one group (cat_0) twice - once in eval, once in commit - // This is still incremental - we're not recounting all groups - assert_eq!(tracker.lock().unwrap().aggregation_updates, 2); + assert_eq!(tracker.lock().unwrap().aggregation_updates, 1); // Check the final state - cat_0 should now have count 11 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_0 = final_state .changes .iter() @@ -1770,7 +2285,12 @@ mod tests { let tracker = Arc::new(Mutex::new(ComputationTracker::new())); // Create SUM(amount) GROUP BY product + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["product".to_string()], vec![AggregateFunction::Sum("amount".to_string())], vec![ @@ -1807,10 +2327,13 @@ mod tests { Value::Integer(150), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state: Widget=250, Gadget=200 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let widget_sum = state .changes .iter() @@ -1833,14 +2356,15 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); - // Should update Widget group twice (once in eval, once in commit) - assert_eq!(tracker.lock().unwrap().aggregation_updates, 2); + assert_eq!(tracker.lock().unwrap().aggregation_updates, 1); // Check final state - Widget should now be 300 (250 + 50) - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let widget = final_state .changes .iter() @@ -1852,7 +2376,12 @@ mod tests { #[test] fn test_count_and_sum_together() { // Test the example from DBSP_ROADMAP: COUNT(*) and SUM(amount) GROUP BY user_id + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["user_id".to_string()], vec![ AggregateFunction::Count, @@ -1879,12 +2408,15 @@ mod tests { 3, vec![Value::Integer(3), Value::Integer(2), Value::Integer(150)], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state // User 1: count=2, sum=300 // User 2: count=1, sum=150 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2); let user1 = state @@ -1911,11 +2443,13 @@ mod tests { 4, vec![Value::Integer(4), Value::Integer(1), Value::Integer(50)], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - user 1 should have updated count and sum - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let user1 = final_state .changes .iter() @@ -1928,7 +2462,12 @@ mod tests { #[test] fn test_avg_maintains_sum_and_count() { // Test AVG aggregation + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![AggregateFunction::Avg("value".to_string())], vec![ @@ -1964,12 +2503,15 @@ mod tests { Value::Integer(30), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial averages // Category A: avg = (10 + 20) / 2 = 15 // Category B: avg = 30 / 1 = 30 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = state .changes .iter() @@ -1996,11 +2538,13 @@ mod tests { Value::Integer(30), ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - Category A avg should now be (10 + 20 + 30) / 3 = 20 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = final_state .changes .iter() @@ -2012,7 +2556,12 @@ mod tests { #[test] fn test_delete_updates_aggregates() { // Test that deletes (negative weights) properly update aggregates + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![ AggregateFunction::Count, @@ -2043,10 +2592,13 @@ mod tests { Value::Integer(200), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state: count=2, sum=300 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert!(!state.changes.is_empty()); let (row, _weight) = &state.changes[0]; assert_eq!(row.values[1], Value::Integer(2)); // count @@ -2063,11 +2615,13 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - should update to count=1, sum=200 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = final_state .changes .iter() @@ -2083,17 +2637,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(10)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(30)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial counts - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2); // Find group A and B @@ -2115,14 +2681,16 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(1, vec![Value::Text("A".into()), Value::Integer(10)]); - let output = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + let output = pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Should emit retraction for old count and insertion for new count assert_eq!(output.changes.len(), 2); // Check final state - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a_final = final_state .changes .iter() @@ -2134,13 +2702,15 @@ mod tests { let mut delete_all_b = Delta::new(); delete_all_b.delete(3, vec![Value::Text("B".into()), Value::Integer(30)]); - let output_b = agg.eval(delete_all_b.clone(), None); - agg.commit(delete_all_b); + let output_b = pager + .io + .block(|| agg.commit(delete_all_b.clone(), &mut cursor)) + .unwrap(); assert_eq!(output_b.changes.len(), 1); // Only retraction, no new row assert_eq!(output_b.changes[0].1, -1); // Retraction // Final state should not have group B - let final_state2 = agg.get_current_state(); + let final_state2 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state2.changes.len(), 1); // Only group A remains assert_eq!(final_state2.changes[0].0.values[0], Value::Text("A".into())); } @@ -2151,7 +2721,16 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); @@ -2159,10 +2738,13 @@ mod tests { init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(30)]); init_data.insert(4, vec![Value::Text("B".into()), Value::Integer(15)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial sums - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2181,11 +2763,13 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(2, vec![Value::Text("A".into()), Value::Integer(20)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check updated sum - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2198,11 +2782,13 @@ mod tests { delete_all_b.delete(3, vec![Value::Text("B".into()), Value::Integer(30)]); delete_all_b.delete(4, vec![Value::Text("B".into()), Value::Integer(15)]); - let _ = agg.eval(delete_all_b.clone(), None); - agg.commit(delete_all_b); + pager + .io + .block(|| agg.commit(delete_all_b.clone(), &mut cursor)) + .unwrap(); // Group B should be gone - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state.changes.len(), 1); // Only group A remains assert_eq!(final_state.changes[0].0.values[0], Value::Text("A".into())); } @@ -2213,17 +2799,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(10)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("A".into()), Value::Integer(30)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial average - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.values[1], Value::Float(20.0)); // AVG = (10+20+30)/3 = 20 @@ -2231,21 +2829,25 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(2, vec![Value::Text("A".into()), Value::Integer(20)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check updated average - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes[0].0.values[1], Value::Float(20.0)); // AVG = (10+30)/2 = 20 (same!) // Delete another to change the average let mut delete_another = Delta::new(); delete_another.delete(3, vec![Value::Text("A".into()), Value::Integer(30)]); - let _ = agg.eval(delete_another.clone(), None); - agg.commit(delete_another); + pager + .io + .block(|| agg.commit(delete_another.clone(), &mut cursor)) + .unwrap(); - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes[0].0.values[1], Value::Float(10.0)); // AVG = 10/1 = 10 } @@ -2260,17 +2862,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(100)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(200)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(50)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2285,11 +2899,13 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(1, vec![Value::Text("A".into()), Value::Integer(100)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check all aggregates updated correctly - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2304,10 +2920,12 @@ mod tests { let mut insert_delta = Delta::new(); insert_delta.insert(4, vec![Value::Text("A".into()), Value::Float(50.5)]); - let _ = agg.eval(insert_delta.clone(), None); - agg.commit(insert_delta); + pager + .io + .block(|| agg.commit(insert_delta.clone(), &mut cursor)) + .unwrap(); - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2324,6 +2942,10 @@ mod tests { // When a row's rowid changes (e.g., UPDATE t SET a=1 WHERE a=3 on INTEGER PRIMARY KEY), // the operator should properly consolidate the state + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut filter = FilterOperator::new( FilterPredicate::GreaterThan { column: "b".to_string(), @@ -2335,10 +2957,12 @@ mod tests { // Initialize with a row (rowid=3, values=[3, 3]) let mut init_data = Delta::new(); init_data.insert(3, vec![Value::Integer(3), Value::Integer(3)]); - filter.initialize(init_data); + let state = pager + .io + .block(|| filter.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state - let state = filter.get_current_state(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 3); assert_eq!( @@ -2352,29 +2976,15 @@ mod tests { update_delta.delete(3, vec![Value::Integer(3), Value::Integer(3)]); update_delta.insert(1, vec![Value::Integer(1), Value::Integer(3)]); - let output = filter.eval(update_delta.clone(), None); - filter.commit(update_delta); + let output = pager + .io + .block(|| filter.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // The output delta should have both changes (both pass the filter b > 2) assert_eq!(output.changes.len(), 2); assert_eq!(output.changes[0].1, -1); // delete weight assert_eq!(output.changes[1].1, 1); // insert weight - - // The current state should be consolidated to only have rows with positive weight - let final_state = filter.get_current_state(); - - // After consolidation, we should have only one row with rowid=1 - assert_eq!( - final_state.changes.len(), - 1, - "State should be consolidated to have only one row" - ); - assert_eq!(final_state.changes[0].0.rowid, 1); - assert_eq!( - final_state.changes[0].0.values, - vec![Value::Integer(1), Value::Integer(3)] - ); - assert_eq!(final_state.changes[0].1, 1); // positive weight } // ============================================================================ @@ -2388,6 +2998,10 @@ mod tests { #[test] fn test_filter_eval_with_uncommitted() { + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut filter = FilterOperator::new( FilterPredicate::GreaterThan { column: "age".to_string(), @@ -2414,10 +3028,12 @@ mod tests { Value::Integer(20), ], ); - filter.initialize(init_data); + let state = pager + .io + .block(|| filter.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Verify initial state (only Alice passes filter) - let state = filter.get_current_state(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 1); @@ -2441,7 +3057,11 @@ mod tests { ); // Eval with uncommitted - should return filtered uncommitted rows - let result = filter.eval(Delta::new(), Some(uncommitted.clone())); + let mut eval_state = uncommitted.clone().into(); + let result = pager + .io + .block(|| filter.eval(&mut eval_state, &mut cursor)) + .unwrap(); assert_eq!( result.changes.len(), 1, @@ -2449,23 +3069,16 @@ mod tests { ); assert_eq!(result.changes[0].0.rowid, 3); - // Verify state hasn't changed - let state_after_eval = filter.get_current_state(); - assert_eq!( - state_after_eval.changes.len(), - 1, - "State should still only have Alice" - ); - assert_eq!(state_after_eval.changes[0].0.rowid, 1); - // Now commit the changes - filter.commit(uncommitted); + let state = pager + .io + .block(|| filter.commit(uncommitted.clone(), &mut cursor)) + .unwrap(); // State should now include Charlie (who passes filter) - let final_state = filter.get_current_state(); assert_eq!( - final_state.changes.len(), - 2, + state.changes.len(), + 1, "State should now have Alice and Charlie" ); } @@ -2473,7 +3086,12 @@ mod tests { #[test] fn test_aggregate_eval_with_uncommitted_preserves_state() { // This is the critical test - aggregations must not modify internal state during eval + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![ AggregateFunction::Count, @@ -2512,10 +3130,13 @@ mod tests { Value::Integer(150), ], ); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state: A -> (count=2, sum=300), B -> (count=1, sum=150) - let initial_state = agg.get_current_state(); + let initial_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(initial_state.changes.len(), 2); // Store initial state for comparison @@ -2547,7 +3168,11 @@ mod tests { ); // Eval with uncommitted should return the delta (changes to aggregates) - let result = agg.eval(Delta::new(), Some(uncommitted.clone())); + let mut eval_state = uncommitted.clone().into(); + let result = pager + .io + .block(|| agg.eval(&mut eval_state, &mut cursor)) + .unwrap(); // Result should contain updates for A and new group C // For A: retraction of old (2, 300) and insertion of new (3, 350) @@ -2555,7 +3180,7 @@ mod tests { assert!(!result.changes.is_empty(), "Should have aggregate changes"); // CRITICAL: Verify internal state hasn't changed - let state_after_eval = agg.get_current_state(); + let state_after_eval = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!( state_after_eval.changes.len(), 2, @@ -2579,10 +3204,13 @@ mod tests { ); // Now commit the changes - agg.commit(uncommitted); + pager + .io + .block(|| agg.commit(uncommitted.clone(), &mut cursor)) + .unwrap(); // State should now be updated - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state.changes.len(), 3, "Should now have A, B, and C"); let a_final = final_state @@ -2622,7 +3250,12 @@ mod tests { fn test_aggregate_eval_multiple_times_without_commit() { // Test that calling eval multiple times with different uncommitted data // doesn't pollute the internal state + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec![], // No GROUP BY vec![ AggregateFunction::Count, @@ -2635,10 +3268,13 @@ mod tests { let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Integer(1), Value::Integer(100)]); init_data.insert(2, vec![Value::Integer(2), Value::Integer(200)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Initial state: count=2, sum=300 - let initial_state = agg.get_current_state(); + let initial_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(initial_state.changes.len(), 1); assert_eq!(initial_state.changes[0].0.values[0], Value::Integer(2)); assert_eq!(initial_state.changes[0].0.values[1], Value::Float(300.0)); @@ -2646,10 +3282,14 @@ mod tests { // First eval with uncommitted let mut uncommitted1 = Delta::new(); uncommitted1.insert(3, vec![Value::Integer(3), Value::Integer(50)]); - let _ = agg.eval(Delta::new(), Some(uncommitted1)); + let mut eval_state1 = uncommitted1.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state1, &mut cursor)) + .unwrap(); // State should be unchanged - let state1 = agg.get_current_state(); + let state1 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state1.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state1.changes[0].0.values[1], Value::Float(300.0)); @@ -2657,20 +3297,28 @@ mod tests { let mut uncommitted2 = Delta::new(); uncommitted2.insert(4, vec![Value::Integer(4), Value::Integer(75)]); uncommitted2.insert(5, vec![Value::Integer(5), Value::Integer(25)]); - let _ = agg.eval(Delta::new(), Some(uncommitted2)); + let mut eval_state2 = uncommitted2.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state2, &mut cursor)) + .unwrap(); // State should STILL be unchanged - let state2 = agg.get_current_state(); + let state2 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state2.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state2.changes[0].0.values[1], Value::Float(300.0)); // Third eval with deletion as uncommitted let mut uncommitted3 = Delta::new(); uncommitted3.delete(1, vec![Value::Integer(1), Value::Integer(100)]); - let _ = agg.eval(Delta::new(), Some(uncommitted3)); + let mut eval_state3 = uncommitted3.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state3, &mut cursor)) + .unwrap(); // State should STILL be unchanged - let state3 = agg.get_current_state(); + let state3 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state3.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state3.changes[0].0.values[1], Value::Float(300.0)); } @@ -2678,7 +3326,12 @@ mod tests { #[test] fn test_aggregate_eval_with_mixed_committed_and_uncommitted() { // Test eval with both committed delta and uncommitted changes + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["type".to_string()], vec![AggregateFunction::Count], vec!["id".to_string(), "type".to_string()], @@ -2688,7 +3341,10 @@ mod tests { let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Integer(1), Value::Text("X".into())]); init_data.insert(2, vec![Value::Integer(2), Value::Text("Y".into())]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Create a committed delta (to be processed) let mut committed_delta = Delta::new(); @@ -2700,20 +3356,76 @@ mod tests { uncommitted.insert(5, vec![Value::Integer(5), Value::Text("Z".into())]); // Eval with both - should process both but not commit - let result = agg.eval(committed_delta.clone(), Some(uncommitted)); + let mut combined = committed_delta.clone(); + combined.merge(&uncommitted); + let mut eval_state = combined.clone().into(); + let result = pager + .io + .block(|| agg.eval(&mut eval_state, &mut cursor)) + .unwrap(); // Result should reflect changes from both - assert!(!result.changes.is_empty()); + assert!(!result.changes.is_empty(), "Result should not be empty"); + + // Verify the DBSP pattern: retraction (-1) followed by insertion (1) for updates, + // and just insertion (1) for new groups + + // We expect exactly 5 changes: + // - X: retraction + insertion (was 1, now 2) + // - Y: retraction + insertion (was 1, now 2) + // - Z: insertion only (new group with count 1) + assert_eq!( + result.changes.len(), + 5, + "Should have 5 changes (2 retractions + 3 insertions)" + ); + + // Sort by group name then by weight to get predictable order + let mut sorted_changes: Vec<_> = result.changes.iter().collect(); + sorted_changes.sort_by(|a, b| { + let a_group = &a.0.values[0]; + let b_group = &b.0.values[0]; + match a_group.partial_cmp(b_group).unwrap() { + std::cmp::Ordering::Equal => a.1.cmp(&b.1), // Sort by weight if same group + other => other, + } + }); + + // Check X group: should have retraction (-1) for count=1, then insertion (1) for count=2 + assert_eq!(sorted_changes[0].0.values[0], Value::Text("X".into())); + assert_eq!(sorted_changes[0].0.values[1], Value::Integer(1)); // old count + assert_eq!(sorted_changes[0].1, -1); // retraction + + assert_eq!(sorted_changes[1].0.values[0], Value::Text("X".into())); + assert_eq!(sorted_changes[1].0.values[1], Value::Integer(2)); // new count + assert_eq!(sorted_changes[1].1, 1); // insertion + + // Check Y group: should have retraction (-1) for count=1, then insertion (1) for count=2 + assert_eq!(sorted_changes[2].0.values[0], Value::Text("Y".into())); + assert_eq!(sorted_changes[2].0.values[1], Value::Integer(1)); // old count + assert_eq!(sorted_changes[2].1, -1); // retraction + + assert_eq!(sorted_changes[3].0.values[0], Value::Text("Y".into())); + assert_eq!(sorted_changes[3].0.values[1], Value::Integer(2)); // new count + assert_eq!(sorted_changes[3].1, 1); // insertion + + // Check Z group: should only have insertion (1) for count=1 (new group) + assert_eq!(sorted_changes[4].0.values[0], Value::Text("Z".into())); + assert_eq!(sorted_changes[4].0.values[1], Value::Integer(1)); // new count + assert_eq!(sorted_changes[4].1, 1); // insertion only (no retraction as it's new); // But internal state should be unchanged - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2, "Should still have only X and Y"); // Now commit only the committed_delta - agg.commit(committed_delta); + pager + .io + .block(|| agg.commit(committed_delta.clone(), &mut cursor)) + .unwrap(); // State should now have X count=2, Y count=1 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let x = final_state .changes .iter() diff --git a/core/incremental/view.rs b/core/incremental/view.rs index e7ba76980..b15faf847 100644 --- a/core/incremental/view.rs +++ b/core/incremental/view.rs @@ -1,13 +1,16 @@ use super::compiler::{DbspCircuit, DbspCompiler, DeltaSet}; -use super::dbsp::{RowKeyStream, RowKeyZSet}; -use super::operator::{ComputationTracker, Delta, FilterPredicate}; +use super::dbsp::Delta; +use super::operator::{ComputationTracker, FilterPredicate}; use crate::schema::{BTreeTable, Column, Schema}; +use crate::storage::btree::BTreeCursor; use crate::translate::logical::LogicalPlanBuilder; -use crate::types::{IOCompletions, IOResult, Value}; +use crate::types::{IOResult, Value}; use crate::util::extract_view_columns; -use crate::{io_yield_one, Completion, LimboError, Result, Statement}; -use std::collections::{BTreeMap, HashMap}; +use crate::{return_if_io, LimboError, Pager, Result, Statement}; +use std::cell::RefCell; +use std::collections::HashMap; use std::fmt; +use std::rc::Rc; use std::sync::{Arc, Mutex}; use turso_parser::ast; use turso_parser::{ @@ -23,18 +26,26 @@ pub enum PopulateState { Processing { stmt: Box, rows_processed: usize, + /// If we're in the middle of processing a row (merge_delta returned I/O) + pending_row: Option<(i64, Vec)>, // (rowid, values) }, /// Population complete Done, } +/// State machine for merge_delta to handle I/O operations impl fmt::Debug for PopulateState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { PopulateState::Start => write!(f, "Start"), - PopulateState::Processing { rows_processed, .. } => f + PopulateState::Processing { + rows_processed, + pending_row, + .. + } => f .debug_struct("Processing") .field("rows_processed", rows_processed) + .field("has_pending", &pending_row.is_some()) .finish(), PopulateState::Done => write!(f, "Done"), } @@ -45,11 +56,95 @@ impl fmt::Debug for PopulateState { #[derive(Debug, Clone, Default)] pub struct ViewTransactionState { // Per-connection delta for uncommitted changes (contains both weights and values) - pub delta: Delta, + // Using RefCell for interior mutability + delta: RefCell, } -/// Incremental view that maintains a stream of row keys using DBSP-style computation -/// The actual row data is stored as transformed Values +impl ViewTransactionState { + /// Create a new transaction state + pub fn new() -> Self { + Self { + delta: RefCell::new(Delta::new()), + } + } + + /// Insert a row into the delta + pub fn insert(&self, key: i64, values: Vec) { + self.delta.borrow_mut().insert(key, values); + } + + /// Delete a row from the delta + pub fn delete(&self, key: i64, values: Vec) { + self.delta.borrow_mut().delete(key, values); + } + + /// Clear all changes in the delta + pub fn clear(&self) { + self.delta.borrow_mut().changes.clear(); + } + + /// Get a clone of the current delta + pub fn get_delta(&self) -> Delta { + self.delta.borrow().clone() + } + + /// Check if the delta is empty + pub fn is_empty(&self) -> bool { + self.delta.borrow().is_empty() + } + + /// Returns how many elements exist in the delta. + pub fn len(&self) -> usize { + self.delta.borrow().len() + } +} + +/// Container for all view transaction states within a connection +/// Provides interior mutability for the map of view states +#[derive(Debug, Clone, Default)] +pub struct AllViewsTxState { + states: Rc>>>, +} + +impl AllViewsTxState { + /// Create a new container for view transaction states + pub fn new() -> Self { + Self { + states: Rc::new(RefCell::new(HashMap::new())), + } + } + + /// Get or create a transaction state for a view + pub fn get_or_create(&self, view_name: &str) -> Rc { + let mut states = self.states.borrow_mut(); + states + .entry(view_name.to_string()) + .or_insert_with(|| Rc::new(ViewTransactionState::new())) + .clone() + } + + /// Get a transaction state for a view if it exists + pub fn get(&self, view_name: &str) -> Option> { + self.states.borrow().get(view_name).cloned() + } + + /// Clear all transaction states + pub fn clear(&self) { + self.states.borrow_mut().clear(); + } + + /// Check if there are no transaction states + pub fn is_empty(&self) -> bool { + self.states.borrow().is_empty() + } + + /// Get all view names that have transaction states + pub fn get_view_names(&self) -> Vec { + self.states.borrow().keys().cloned().collect() + } +} + +/// Incremental view that maintains its state through a DBSP circuit /// /// This version keeps everything in-memory. This is acceptable for small views, since DBSP /// doesn't have to track the history of changes. Still for very large views (think of the result @@ -62,12 +157,7 @@ pub struct ViewTransactionState { /// Uses DBSP circuits for incremental computation. #[derive(Debug)] pub struct IncrementalView { - // Stream of row keys for this view - stream: RowKeyStream, name: String, - // Store the actual row data as Values, keyed by row_key - // Using BTreeMap for ordered iteration - pub records: BTreeMap>, // WHERE clause predicate for filtering (kept for compatibility) pub where_predicate: FilterPredicate, // The SELECT statement that defines how to transform input data @@ -75,8 +165,6 @@ pub struct IncrementalView { // DBSP circuit that encapsulates the computation circuit: DbspCircuit, - // Track whether circuit has been initialized with data - circuit_initialized: bool, // Tables referenced by this view (extracted from FROM clause and JOINs) base_table: Arc, @@ -88,6 +176,8 @@ pub struct IncrementalView { // We will use this one day to export rows_read, but for now, will just test that we're doing the expected amount of compute #[cfg_attr(not(test), allow(dead_code))] pub tracker: Arc>, + // Root page of the btree storing the materialized state (0 for unmaterialized) + root_page: usize, } impl IncrementalView { @@ -110,6 +200,8 @@ impl IncrementalView { select: &ast::Select, schema: &Schema, _base_table: &Arc, + main_data_root: usize, + internal_state_root: usize, ) -> Result { // Build the logical plan from the SELECT statement let mut builder = LogicalPlanBuilder::new(schema); @@ -117,8 +209,8 @@ impl IncrementalView { let stmt = ast::Stmt::Select(select.clone()); let logical_plan = builder.build_statement(&stmt)?; - // Compile the logical plan to a DBSP circuit - let compiler = DbspCompiler::new(); + // Compile the logical plan to a DBSP circuit with the storage roots + let compiler = DbspCompiler::new(main_data_root, internal_state_root); let circuit = compiler.compile(&logical_plan)?; Ok(circuit) @@ -145,7 +237,37 @@ impl IncrementalView { false } - pub fn from_sql(sql: &str, schema: &Schema) -> Result { + /// Validate a SELECT statement and extract the columns it would produce + /// This is used during CREATE MATERIALIZED VIEW to validate the view before storing it + pub fn validate_and_extract_columns( + select: &ast::Select, + schema: &Schema, + ) -> Result> { + // For now, just extract columns from a simple select + // This will need to be expanded to handle joins, aggregates, etc. + + // Get the base table name + let base_table_name = Self::extract_base_table(select).ok_or_else(|| { + LimboError::ParseError("Cannot extract base table from SELECT".to_string()) + })?; + + // Get the table from schema + let table = schema + .get_table(&base_table_name) + .and_then(|t| t.btree()) + .ok_or_else(|| LimboError::ParseError(format!("Table {base_table_name} not found")))?; + + // For now, return all columns from the base table + // In the future, this should parse the select list and handle projections + Ok(table.columns.clone()) + } + + pub fn from_sql( + sql: &str, + schema: &Schema, + main_data_root: usize, + internal_state_root: usize, + ) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next_cmd()?; let cmd = cmd.expect("View is an empty statement"); @@ -155,7 +277,13 @@ impl IncrementalView { view_name, columns: _, select, - }) => IncrementalView::from_stmt(view_name, select, schema), + }) => IncrementalView::from_stmt( + view_name, + select, + schema, + main_data_root, + internal_state_root, + ), _ => Err(LimboError::ParseError(format!( "View is not a CREATE MATERIALIZED VIEW statement: {sql}" ))), @@ -166,6 +294,8 @@ impl IncrementalView { view_name: ast::QualifiedName, select: ast::Select, schema: &Schema, + main_data_root: usize, + internal_state_root: usize, ) -> Result { let name = view_name.name.as_str().to_string(); @@ -203,9 +333,12 @@ impl IncrementalView { base_table, view_columns, schema, + main_data_root, + internal_state_root, ) } + #[allow(clippy::too_many_arguments)] pub fn new( name: String, where_predicate: FilterPredicate, @@ -213,30 +346,31 @@ impl IncrementalView { base_table: Arc, columns: Vec, schema: &Schema, + main_data_root: usize, + internal_state_root: usize, ) -> Result { - let records = BTreeMap::new(); - // Create the tracker that will be shared by all operators let tracker = Arc::new(Mutex::new(ComputationTracker::new())); // Compile the SELECT statement into a DBSP circuit - let circuit = Self::try_compile_circuit(&select_stmt, schema, &base_table)?; - - // Circuit will be initialized when we first call merge_delta - let circuit_initialized = false; + let circuit = Self::try_compile_circuit( + &select_stmt, + schema, + &base_table, + main_data_root, + internal_state_root, + )?; Ok(Self { - stream: RowKeyStream::from_zset(RowKeyZSet::new()), name, - records, where_predicate, select_stmt, circuit, - circuit_initialized, base_table, columns, populate_state: PopulateState::Start, tracker, + root_page: main_data_root, }) } @@ -244,6 +378,29 @@ impl IncrementalView { &self.name } + pub fn base_table(&self) -> &Arc { + &self.base_table + } + + /// Execute the circuit with uncommitted changes to get processed delta + pub fn execute_with_uncommitted( + &mut self, + uncommitted: DeltaSet, + pager: Rc, + execute_state: &mut crate::incremental::compiler::ExecuteState, + ) -> crate::Result> { + // Initialize execute_state with the input data + *execute_state = crate::incremental::compiler::ExecuteState::Init { + input_data: uncommitted, + }; + self.circuit.execute(pager, execute_state) + } + + /// Get the root page for this materialized view's btree + pub fn get_root_page(&self) -> usize { + self.root_page + } + /// Get all table names referenced by this view pub fn get_referenced_table_names(&self) -> Vec { vec![self.base_table.name.clone()] @@ -348,132 +505,189 @@ impl IncrementalView { /// Populate the view by scanning the source table using a state machine /// This can be called multiple times and will resume from where it left off + /// This method is only for materialized views and will persist data to the btree pub fn populate_from_table( &mut self, conn: &std::sync::Arc, + pager: &std::rc::Rc, + _btree_cursor: &mut BTreeCursor, ) -> crate::Result> { // If already populated, return immediately if matches!(self.populate_state, PopulateState::Done) { return Ok(IOResult::Done(())); } - const BATCH_SIZE: usize = 100; // Process 100 rows at a time before yielding + // Assert that this is a materialized view with a root page + assert!( + self.root_page != 0, + "populate_from_table should only be called for materialized views with root_page" + ); loop { - match &mut self.populate_state { - PopulateState::Start => { - // Generate the SQL query for populating the view - // It is best to use a standard query than a cursor for two reasons: - // 1) Using a sql query will allow us to be much more efficient in cases where we only want - // some rows, in particular for indexed filters - // 2) There are two types of cursors: index and table. In some situations (like for example - // if the table has an integer primary key), the key will be exclusively in the index - // btree and not in the table btree. Using cursors would force us to be aware of this - // distinction (and others), and ultimately lead to reimplementing the whole query - // machinery (next step is which index is best to use, etc) - let query = self.sql_for_populate()?; + // To avoid borrow checker issues, we need to handle state transitions carefully + let needs_start = matches!(self.populate_state, PopulateState::Start); - // Prepare the statement - let stmt = conn.prepare(&query)?; + if needs_start { + // Generate the SQL query for populating the view + // It is best to use a standard query than a cursor for two reasons: + // 1) Using a sql query will allow us to be much more efficient in cases where we only want + // some rows, in particular for indexed filters + // 2) There are two types of cursors: index and table. In some situations (like for example + // if the table has an integer primary key), the key will be exclusively in the index + // btree and not in the table btree. Using cursors would force us to be aware of this + // distinction (and others), and ultimately lead to reimplementing the whole query + // machinery (next step is which index is best to use, etc) + let query = self.sql_for_populate()?; - self.populate_state = PopulateState::Processing { - stmt: Box::new(stmt), - rows_processed: 0, - }; - // Continue to next state + // Prepare the statement + let stmt = conn.prepare(&query)?; + + self.populate_state = PopulateState::Processing { + stmt: Box::new(stmt), + rows_processed: 0, + pending_row: None, + }; + // Continue to next state + continue; + } + + // Handle Done state + if matches!(self.populate_state, PopulateState::Done) { + return Ok(IOResult::Done(())); + } + + // Handle Processing state - extract state to avoid borrow issues + let (mut stmt, mut rows_processed, pending_row) = + match std::mem::replace(&mut self.populate_state, PopulateState::Done) { + PopulateState::Processing { + stmt, + rows_processed, + pending_row, + } => (stmt, rows_processed, pending_row), + _ => unreachable!("We already handled Start and Done states"), + }; + + // If we have a pending row from a previous I/O interruption, process it first + if let Some((rowid, values)) = pending_row { + // Create a single-row delta for the pending row + let mut single_row_delta = Delta::new(); + single_row_delta.insert(rowid, values.clone()); + + // Process the pending row with the pager + match self.merge_delta(&single_row_delta, pager.clone())? { + IOResult::Done(_) => { + // Row processed successfully, continue to next row + rows_processed += 1; + // Continue to fetch next row from statement + } + IOResult::IO(io) => { + // Still not done, save state with pending row + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: Some((rowid, values)), // Keep the pending row + }; + return Ok(IOResult::IO(io)); + } } + } - PopulateState::Processing { - stmt, - rows_processed, - } => { - // Collect rows into a delta batch - let mut batch_delta = Delta::new(); - let mut batch_count = 0; + // Process rows one at a time - no batching + loop { + // This step() call resumes from where the statement left off + match stmt.step()? { + crate::vdbe::StepResult::Row => { + // Get the row + let row = stmt.row().unwrap(); - loop { - if batch_count >= BATCH_SIZE { - // Process this batch through the standard pipeline - self.merge_delta(&batch_delta); - // Yield control after processing a batch - // TODO: currently this inner statement is the one that is tracking completions - // so as a stop gap we can just return a dummy completion here - io_yield_one!(Completion::new_dummy()); - } + // Extract values from the row + let all_values: Vec = + row.get_values().cloned().collect(); - // This step() call resumes from where the statement left off - match stmt.step()? { - crate::vdbe::StepResult::Row => { - // Get the row - let row = stmt.row().unwrap(); - - // Extract values from the row - let all_values: Vec = - row.get_values().cloned().collect(); - - // Determine how to extract the rowid - // If there's a rowid alias (INTEGER PRIMARY KEY), the rowid is one of the columns - // Otherwise, it's the last value we explicitly selected - let (rowid, values) = if let Some((idx, _)) = - self.base_table.get_rowid_alias_column() - { - // The rowid is the value at the rowid alias column index - let rowid = match all_values.get(idx) { - Some(crate::types::Value::Integer(id)) => *id, - _ => { - // This shouldn't happen - rowid alias must be an integer - *rows_processed += 1; - batch_count += 1; - continue; - } - }; - // All values are table columns (no separate rowid was selected) - (rowid, all_values) - } else { - // The last value is the explicitly selected rowid - let rowid = match all_values.last() { - Some(crate::types::Value::Integer(id)) => *id, - _ => { - // This shouldn't happen - rowid must be an integer - *rows_processed += 1; - batch_count += 1; - continue; - } - }; - // Get all values except the rowid - let values = all_values[..all_values.len() - 1].to_vec(); - (rowid, values) + // Determine how to extract the rowid + // If there's a rowid alias (INTEGER PRIMARY KEY), the rowid is one of the columns + // Otherwise, it's the last value we explicitly selected + let (rowid, values) = + if let Some((idx, _)) = self.base_table.get_rowid_alias_column() { + // The rowid is the value at the rowid alias column index + let rowid = match all_values.get(idx) { + Some(crate::types::Value::Integer(id)) => *id, + _ => { + // This shouldn't happen - rowid alias must be an integer + rows_processed += 1; + continue; + } }; + // All values are table columns (no separate rowid was selected) + (rowid, all_values) + } else { + // The last value is the explicitly selected rowid + let rowid = match all_values.last() { + Some(crate::types::Value::Integer(id)) => *id, + _ => { + // This shouldn't happen - rowid must be an integer + rows_processed += 1; + continue; + } + }; + // Get all values except the rowid + let values = all_values[..all_values.len() - 1].to_vec(); + (rowid, values) + }; - // Add to batch delta - let merge_delta handle filtering and aggregation - batch_delta.insert(rowid, values); + // Create a single-row delta and process it immediately + let mut single_row_delta = Delta::new(); + single_row_delta.insert(rowid, values.clone()); - *rows_processed += 1; - batch_count += 1; + // Process this single row through merge_delta with the pager + match self.merge_delta(&single_row_delta, pager.clone())? { + IOResult::Done(_) => { + // Row processed successfully, continue to next row + rows_processed += 1; } - crate::vdbe::StepResult::Done => { - // Process any remaining rows in the batch - self.merge_delta(&batch_delta); - // All rows processed, move to Done state - self.populate_state = PopulateState::Done; - return Ok(IOResult::Done(())); - } - crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => { - return Err(LimboError::Busy); - } - crate::vdbe::StepResult::IO => { - // Process current batch before yielding - self.merge_delta(&batch_delta); - // The Statement needs to wait for IO - io_yield_one!(Completion::new_dummy()); + IOResult::IO(io) => { + // Save state and return I/O + // We'll resume at the SAME row when called again (don't increment rows_processed) + // The circuit still has unfinished work for this row + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, // Don't increment - row not done yet! + pending_row: Some((rowid, values)), // Save the row for resumption + }; + return Ok(IOResult::IO(io)); } } } - } - PopulateState::Done => { - // Already populated - return Ok(IOResult::Done(())); + crate::vdbe::StepResult::Done => { + // All rows processed, we're done + self.populate_state = PopulateState::Done; + return Ok(IOResult::Done(())); + } + + crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => { + // Save state before returning error + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: None, // No pending row when interrupted between rows + }; + return Err(LimboError::Busy); + } + + crate::vdbe::StepResult::IO => { + // Statement needs I/O - save state and return + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: None, // No pending row when interrupted between rows + }; + // TODO: Get the actual I/O completion from the statement + let completion = crate::io::Completion::new_dummy(); + return Ok(IOResult::IO(crate::types::IOCompletions::Single( + completion, + ))); + } } } } @@ -555,95 +769,23 @@ impl IncrementalView { None } - /// Get the current records as an iterator - for cursor-based access - pub fn iter(&self) -> impl Iterator)> + '_ { - self.stream.to_vec().into_iter().filter_map(move |row| { - self.records - .get(&row.rowid) - .map(|values| (row.rowid, values.clone())) - }) - } - - /// Get current data merged with transaction state - pub fn current_data(&self, tx_state: Option<&ViewTransactionState>) -> Vec<(i64, Vec)> { - if let Some(tx_state) = tx_state { - // Use circuit to process uncommitted changes - let mut uncommitted = DeltaSet::new(); - uncommitted.insert(self.base_table.name.clone(), tx_state.delta.clone()); - - // Execute with uncommitted changes (won't affect circuit state) - match self.circuit.execute(HashMap::new(), uncommitted) { - Ok(processed_delta) => { - // Merge processed delta with committed records - let mut result_map: BTreeMap> = self.records.clone(); - for (row, weight) in &processed_delta.changes { - if *weight > 0 { - result_map.insert(row.rowid, row.values.clone()); - } else if *weight < 0 { - result_map.remove(&row.rowid); - } - } - result_map.into_iter().collect() - } - Err(e) => { - // Return error or panic - no fallback - panic!("Failed to execute circuit with uncommitted data: {e:?}"); - } - } - } else { - // No transaction state: return committed records - self.records.clone().into_iter().collect() - } - } - /// Merge a delta of changes into the view's current state - pub fn merge_delta(&mut self, delta: &Delta) { + pub fn merge_delta( + &mut self, + delta: &Delta, + pager: std::rc::Rc, + ) -> crate::Result> { // Early return if delta is empty if delta.is_empty() { - return; + return Ok(IOResult::Done(())); } - // Use the circuit to process the delta + // Use the circuit to process the delta and write to btree let mut input_data = HashMap::new(); input_data.insert(self.base_table.name.clone(), delta.clone()); - // If circuit hasn't been initialized yet, initialize it first - // This happens during populate_from_table - if !self.circuit_initialized { - // Initialize the circuit with empty state - self.circuit - .initialize(HashMap::new()) - .expect("Failed to initialize circuit"); - self.circuit_initialized = true; - } - - // Execute the circuit to process the delta - let current_delta = match self.circuit.execute(input_data.clone(), DeltaSet::empty()) { - Ok(output) => { - // Commit the changes to the circuit's internal state - self.circuit - .commit(input_data) - .expect("Failed to commit to circuit"); - output - } - Err(e) => { - panic!("Failed to execute circuit: {e:?}"); - } - }; - - // Update records and stream with the processed delta - let mut zset_delta = RowKeyZSet::new(); - - for (row, weight) in ¤t_delta.changes { - if *weight > 0 { - self.records.insert(row.rowid, row.values.clone()); - zset_delta.insert(row.clone(), 1); - } else if *weight < 0 { - self.records.remove(&row.rowid); - zset_delta.insert(row.clone(), -1); - } - } - - self.stream.apply_delta(&zset_delta); + // The circuit now handles all btree I/O internally with the provided pager + let _delta = return_if_io!(self.circuit.commit(input_data, pager)); + Ok(IOResult::Done(())) } } diff --git a/core/lib.rs b/core/lib.rs index 2ca949e76..2f32774df 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -32,7 +32,6 @@ mod uuid; mod vdbe; mod vector; mod vtab; -mod vtab_view; #[cfg(feature = "fuzz")] pub mod numeric; @@ -40,7 +39,7 @@ pub mod numeric; #[cfg(not(feature = "fuzz"))] mod numeric; -use crate::incremental::view::ViewTransactionState; +use crate::incremental::view::AllViewsTxState; use crate::storage::encryption::CipherMode; use crate::translate::optimizer::optimize_plan; use crate::translate::pragma::TURSO_CDC_DEFAULT_TABLE_NAME; @@ -441,13 +440,6 @@ impl Database { Ok(()) })?; } - // FIXME: the correct way to do this is to just materialize the view. - // But this will allow us to keep going. - let conn = db.connect()?; - let pager = conn.pager.borrow().clone(); - pager - .io - .block(|| conn.schema.borrow().populate_materialized_views(&conn))?; Ok(db) } @@ -489,7 +481,7 @@ impl Database { attached_databases: RefCell::new(DatabaseCatalog::new()), query_only: Cell::new(false), mv_tx_id: Cell::new(None), - view_transaction_states: RefCell::new(HashMap::new()), + view_transaction_states: AllViewsTxState::new(), metrics: RefCell::new(ConnectionMetrics::new()), is_nested_stmt: Cell::new(false), encryption_key: RefCell::new(None), @@ -926,7 +918,7 @@ pub struct Connection { /// Per-connection view transaction states for uncommitted changes. This represents /// one entry per view that was touched in the transaction. - view_transaction_states: RefCell>, + view_transaction_states: AllViewsTxState, /// Connection-level metrics aggregation pub metrics: RefCell, /// Whether the connection is executing a statement initiated by another statement. @@ -1072,7 +1064,7 @@ impl Connection { // Preserve existing views to avoid expensive repopulation. // TODO: We may not need to do this if we materialize our views. - let existing_views = self.schema.borrow().materialized_views.clone(); + let existing_views = self.schema.borrow().incremental_views.clone(); // TODO: this is hack to avoid a cyclical problem with schema reprepare // The problem here is that we prepare a statement here, but when the statement tries @@ -1096,13 +1088,6 @@ impl Connection { self.with_schema_mut(|schema| { *schema = fresh; }); - - { - let schema = self.schema.borrow(); - pager - .io - .block(|| schema.populate_materialized_views(self))?; - } Result::Ok(()) } @@ -1716,7 +1701,7 @@ impl Connection { .expect("query must be parsed to statement"); let syms = self.syms.borrow(); self.with_schema_mut(|schema| { - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); if let Err(LimboError::ExtensionError(e)) = parse_schema_rows(rows, schema, &syms, None, existing_views) { diff --git a/core/schema.rs b/core/schema.rs index 53a2a55e0..142101966 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,8 +1,4 @@ use crate::incremental::view::IncrementalView; -use crate::types::IOResult; - -/// Type alias for the materialized views collection -pub type MaterializedViewsMap = HashMap>>; /// Simple view structure for non-materialized views #[derive(Debug, Clone)] @@ -23,12 +19,12 @@ use crate::translate::plan::SelectPlan; use crate::util::{ module_args_from_sql, module_name_from_sql, type_from_name, IOExt, UnparsedFromSqlIndex, }; -use crate::{return_if_io, LimboError, MvCursor, Pager, RefValue, SymbolTable, VirtualTable}; use crate::{util::normalize_ident, Result}; +use crate::{LimboError, MvCursor, Pager, RefValue, SymbolTable, VirtualTable}; use core::fmt; use std::cell::RefCell; use std::collections::hash_map::Entry; -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::ops::Deref; use std::rc::Rc; use std::sync::Arc; @@ -42,6 +38,7 @@ use turso_parser::{ const SCHEMA_TABLE_NAME: &str = "sqlite_schema"; const SCHEMA_TABLE_NAME_ALT: &str = "sqlite_master"; +pub const DBSP_TABLE_PREFIX: &str = "__turso_internal_dbsp_state_"; /// Check if a table name refers to a system table that should be protected from direct writes pub fn is_system_table(table_name: &str) -> bool { @@ -52,7 +49,14 @@ pub fn is_system_table(table_name: &str) -> bool { #[derive(Debug)] pub struct Schema { pub tables: HashMap>, - pub materialized_views: MaterializedViewsMap, + + /// Track which tables are actually materialized views + pub materialized_view_names: HashSet, + /// Store original SQL for materialized views (for .schema command) + pub materialized_view_sql: HashMap, + /// The incremental view objects (DBSP circuits) + pub incremental_views: HashMap>>, + pub views: ViewsMap, /// table_name to list of indexes for the table @@ -81,12 +85,16 @@ impl Schema { Arc::new(Table::Virtual(Arc::new((*function).clone()))), ); } - let materialized_views: MaterializedViewsMap = HashMap::new(); + let materialized_view_names = HashSet::new(); + let materialized_view_sql = HashMap::new(); + let incremental_views = HashMap::new(); let views: ViewsMap = HashMap::new(); let table_to_materialized_views: HashMap> = HashMap::new(); Self { tables, - materialized_views, + materialized_view_names, + materialized_view_sql, + incremental_views, views, indexes, has_indexes, @@ -102,41 +110,51 @@ impl Schema { .iter() .any(|idx| idx.1.iter().any(|i| i.name == name)) } - pub fn add_materialized_view(&mut self, view: IncrementalView) { + pub fn add_materialized_view(&mut self, view: IncrementalView, table: Arc, sql: String) { let name = normalize_ident(view.name()); - self.materialized_views + + // Add to tables (so it appears as a regular table) + self.tables.insert(name.clone(), table); + + // Track that this is a materialized view + self.materialized_view_names.insert(name.clone()); + self.materialized_view_sql.insert(name.clone(), sql); + + // Store the incremental view (DBSP circuit) + self.incremental_views .insert(name, Arc::new(Mutex::new(view))); } pub fn get_materialized_view(&self, name: &str) -> Option>> { let name = normalize_ident(name); - self.materialized_views.get(&name).cloned() + self.incremental_views.get(&name).cloned() + } + + pub fn is_materialized_view(&self, name: &str) -> bool { + let name = normalize_ident(name); + self.materialized_view_names.contains(&name) } pub fn remove_view(&mut self, name: &str) -> Result<()> { let name = normalize_ident(name); - // Check if we have both a regular view and a materialized view with the same name - // It should be impossible to have both - let has_regular_view = self.views.contains_key(&name); - let has_materialized_view = self.materialized_views.contains_key(&name); - - assert!( - !(has_regular_view && has_materialized_view), - "Found both regular view and materialized view with name: {name}" - ); - - if has_regular_view { + if self.views.contains_key(&name) { self.views.remove(&name); Ok(()) - } else if has_materialized_view { + } else if self.materialized_view_names.contains(&name) { + // Remove from tables + self.tables.remove(&name); + + // Remove from materialized view tracking + self.materialized_view_names.remove(&name); + self.materialized_view_sql.remove(&name); + self.incremental_views.remove(&name); + // Remove from table_to_materialized_views dependencies for views in self.table_to_materialized_views.values_mut() { views.retain(|v| v != &name); } - // Remove the materialized view itself - self.materialized_views.remove(&name); Ok(()) } else { Err(crate::LimboError::ParseError(format!( @@ -165,30 +183,6 @@ impl Schema { .unwrap_or_default() } - /// Get all materialized views that depend on a given table, skip normalizing ident. - /// We are basically assuming we already normalized the ident. - pub fn get_dependent_materialized_views_unnormalized( - &self, - table_name: &str, - ) -> Option<&Vec> { - self.table_to_materialized_views.get(table_name) - } - - /// Populate all materialized views by scanning their source tables - /// Returns IOResult to support async execution - pub fn populate_materialized_views( - &self, - conn: &Arc, - ) -> Result> { - for view in self.materialized_views.values() { - let mut view = view - .lock() - .map_err(|_| LimboError::InternalError("Failed to lock view".to_string()))?; - return_if_io!(view.populate_from_table(conn)); - } - Ok(IOResult::Done(())) - } - /// Add a regular (non-materialized) view pub fn add_view(&mut self, view: View) { let name = normalize_ident(&view.name); @@ -224,6 +218,12 @@ impl Schema { pub fn remove_table(&mut self, table_name: &str) { let name = normalize_ident(table_name); self.tables.remove(&name); + + // If this was a materialized view, also clean up the metadata + if self.materialized_view_names.remove(&name) { + self.incremental_views.remove(&name); + self.materialized_view_sql.remove(&name); + } } pub fn get_btree_table(&self, name: &str) -> Option> { @@ -297,8 +297,10 @@ impl Schema { let mut automatic_indices: HashMap> = HashMap::with_capacity(10); - // Collect materialized views for second pass to populate table_to_materialized_views mapping - let mut materialized_views_to_process: Vec<(String, Vec)> = Vec::new(); + // Store DBSP state table root pages: view_name -> dbsp_state_root_page + let mut dbsp_state_roots: HashMap = HashMap::new(); + // Store materialized view info (SQL and root page) for later creation + let mut materialized_view_info: HashMap = HashMap::new(); if matches!(pager.begin_read_tx()?, LimboResult::Busy) { return Err(LimboError::Busy); @@ -357,6 +359,18 @@ impl Schema { } let table = BTreeTable::from_sql(sql, root_page as usize)?; + + // Check if this is a DBSP state table + if table.name.starts_with(DBSP_TABLE_PREFIX) { + // Extract the view name from _dbsp_state_ + let view_name = table + .name + .strip_prefix(DBSP_TABLE_PREFIX) + .unwrap() + .to_string(); + dbsp_state_roots.insert(view_name, root_page as usize); + } + self.add_btree_table(Arc::new(table)); } "index" => { @@ -418,6 +432,14 @@ impl Schema { }; let name = name_text.as_str(); + // Get the root page (column 3) to determine if this is a materialized view + // Regular views have rootpage = 0, materialized views have rootpage != 0 + let root_page_value = record_cursor.get_value(&row, 3)?; + let RefValue::Integer(root_page_int) = root_page_value else { + return Err(LimboError::ConversionError("Expected integer value".into())); + }; + let root_page = root_page_int as usize; + let sql_value = record_cursor.get_value(&row, 4)?; let RefValue::Text(sql_text) = sql_value else { return Err(LimboError::ConversionError("Expected text value".into())); @@ -429,15 +451,12 @@ impl Schema { if let Ok(Some(Cmd::Stmt(stmt))) = parser.next_cmd() { match stmt { Stmt::CreateMaterializedView { .. } => { - // Create IncrementalView for materialized views - if let Ok(incremental_view) = IncrementalView::from_sql(sql, self) { - let referenced_tables = - incremental_view.get_referenced_table_names(); - let view_name = name.to_string(); - self.add_materialized_view(incremental_view); - materialized_views_to_process - .push((view_name, referenced_tables)); - } + // Store materialized view info for later creation + // We'll create the actual IncrementalView in a later pass + // when we have both the main root page and DBSP state root + let view_name = name.to_string(); + materialized_view_info + .insert(view_name, (sql.to_string(), root_page)); } Stmt::CreateView { view_name: _, @@ -481,14 +500,6 @@ impl Schema { pager.end_read_tx()?; - // Second pass: populate table_to_materialized_views mapping - for (view_name, referenced_tables) in materialized_views_to_process { - // Register this view as dependent on each referenced table - for table_name in referenced_tables { - self.add_materialized_view_dependency(&table_name, &view_name); - } - } - for unparsed_sql_from_index in from_sql_indexes { if !self.indexes_enabled() { self.table_set_has_index(&unparsed_sql_from_index.table_name); @@ -520,6 +531,39 @@ impl Schema { } } + // Third pass: Create materialized views now that we have both root pages + for (view_name, (sql, main_root)) in materialized_view_info { + // Look up the DBSP state root for this view - must exist for materialized views + let dbsp_state_root = dbsp_state_roots.get(&view_name).ok_or_else(|| { + LimboError::InternalError(format!( + "Materialized view {view_name} is missing its DBSP state table" + )) + })?; + + // Create the IncrementalView with both root pages + let incremental_view = + IncrementalView::from_sql(&sql, self, main_root, *dbsp_state_root)?; + let referenced_tables = incremental_view.get_referenced_table_names(); + + // Create a BTreeTable for the materialized view + let table = Arc::new(Table::BTree(Arc::new(BTreeTable { + name: view_name.clone(), + root_page: main_root, + columns: incremental_view.columns.clone(), + primary_key_columns: Vec::new(), + has_rowid: true, + is_strict: false, + unique_sets: None, + }))); + + self.add_materialized_view(incremental_view, table, sql); + + // Register dependencies + for table_name in referenced_tables { + self.add_materialized_view_dependency(&table_name, &view_name); + } + } + Ok(()) } } @@ -565,15 +609,19 @@ impl Clone for Schema { (name.clone(), indexes) }) .collect(); - let materialized_views = self - .materialized_views + let materialized_view_names = self.materialized_view_names.clone(); + let materialized_view_sql = self.materialized_view_sql.clone(); + let incremental_views = self + .incremental_views .iter() .map(|(name, view)| (name.clone(), view.clone())) .collect(); let views = self.views.clone(); Self { tables, - materialized_views, + materialized_view_names, + materialized_view_sql, + incremental_views, views, indexes, has_indexes: self.has_indexes.clone(), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 4a534b25e..dee30b2af 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -82,6 +82,12 @@ pub fn prepare_delete_plan( Some(table) => table, None => crate::bail_parse_error!("no such table: {}", tbl_name), }; + + // Check if this is a materialized view + if schema.is_materialized_view(&tbl_name) { + crate::bail_parse_error!("cannot modify materialized view {}", tbl_name); + } + let table = if let Some(table) = table.virtual_table() { Table::Virtual(table.clone()) } else if let Some(table) = table.btree() { diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ae21991a6..04ce1ef4f 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -63,6 +63,7 @@ pub fn translate_insert( if with.is_some() { crate::bail_parse_error!("WITH clause is not supported"); } + if on_conflict.is_some() { crate::bail_parse_error!("ON CONFLICT clause is not supported"); } @@ -86,6 +87,11 @@ pub fn translate_insert( None => crate::bail_parse_error!("no such table: {}", table_name), }; + // Check if this is a materialized view + if schema.is_materialized_view(table_name.as_str()) { + crate::bail_parse_error!("cannot modify materialized view {}", table_name); + } + let resolver = Resolver::new(schema, syms); if let Some(virtual_table) = &table.virtual_table() { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 6774c04d2..3e44ff0bd 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -196,7 +196,8 @@ pub fn init_loop( t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } - let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; + let (table_cursor_id, index_cursor_id) = + table.open_cursors(program, mode, t_ctx.resolver.schema)?; match &table.op { Operation::Scan(Scan::BTreeTable { index, .. }) => match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 082e39f96..e96dc4a1a 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -3,7 +3,7 @@ use turso_parser::ast::{self, SortOrder}; use crate::{ function::AggFunc, - schema::{BTreeTable, Column, FromClauseSubquery, Index, Table}, + schema::{BTreeTable, Column, FromClauseSubquery, Index, Schema, Table}, vdbe::{ builder::{CursorKey, CursorType, ProgramBuilder}, insn::{IdxInsertFlags, Insn}, @@ -852,6 +852,7 @@ impl JoinedTable { &self, program: &mut ProgramBuilder, mode: OperationMode, + schema: &Schema, ) -> Result<(Option, Option)> { let index = self.op.index(); match &self.table { @@ -863,10 +864,17 @@ impl JoinedTable { let table_cursor_id = if table_not_required { None } else { - Some(program.alloc_cursor_id_keyed( - CursorKey::table(self.internal_id), - CursorType::BTreeTable(btree.clone()), - )) + // Check if this is a materialized view + let cursor_type = + if let Some(view_mutex) = schema.get_materialized_view(&btree.name) { + CursorType::MaterializedView(btree.clone(), view_mutex) + } else { + CursorType::BTreeTable(btree.clone()) + }; + Some( + program + .alloc_cursor_id_keyed(CursorKey::table(self.internal_id), cursor_type), + ) }; let index_cursor_id = index.map(|index| { program.alloc_cursor_id_keyed( diff --git a/core/translate/planner.rs b/core/translate/planner.rs index cf5a4314d..ea4cc8f53 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -3,9 +3,9 @@ use std::sync::Arc; use super::{ expr::walk_expr, plan::{ - Aggregate, ColumnUsedMask, Distinctness, EvalAt, JoinInfo, JoinOrderMember, JoinedTable, - Operation, OuterQueryReference, Plan, QueryDestination, ResultSetColumn, Scan, - TableReferences, WhereTerm, + Aggregate, ColumnUsedMask, Distinctness, EvalAt, IterationDirection, JoinInfo, + JoinOrderMember, JoinedTable, Operation, OuterQueryReference, Plan, QueryDestination, + ResultSetColumn, Scan, TableReferences, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -529,12 +529,29 @@ fn parse_table( schema.get_materialized_view(table_name.as_str()) }); if let Some(view) = view { - // Create a virtual table wrapper for the view - // We'll use the view's columns from the schema - let vtab = crate::vtab_view::create_view_virtual_table( - normalize_ident(table_name.as_str()).as_str(), - view.clone(), - )?; + // Check if this materialized view has persistent storage + let view_guard = view.lock().unwrap(); + let root_page = view_guard.get_root_page(); + + if root_page == 0 { + drop(view_guard); + return Err(crate::LimboError::InternalError( + "Materialized view has no storage allocated".to_string(), + )); + } + + // This is a materialized view with storage - treat it as a regular BTree table + // Create a BTreeTable from the view's metadata + let btree_table = Arc::new(crate::schema::BTreeTable { + name: view_guard.name().to_string(), + root_page, + columns: view_guard.columns.clone(), + primary_key_columns: Vec::new(), + has_rowid: true, + is_strict: false, + unique_sets: None, + }); + drop(view_guard); let alias = maybe_alias .map(|a| match a { @@ -544,12 +561,11 @@ fn parse_table( .map(|a| normalize_ident(a.as_str())); table_references.add_joined_table(JoinedTable { - op: Operation::Scan(Scan::VirtualTable { - idx_num: -1, - idx_str: None, - constraints: Vec::new(), + op: Operation::Scan(Scan::BTreeTable { + iter_dir: IterationDirection::Forwards, + index: None, }), - table: Table::Virtual(vtab), + table: Table::BTree(btree_table), identifier: alias.unwrap_or(normalized_qualified_name), internal_id: table_ref_counter.next(), join_info: None, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 03025651c..5356e5a8a 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -690,6 +690,14 @@ pub fn translate_drop_table( } let table = table.unwrap(); // safe since we just checked for None + + // Check if this is a materialized view - if so, refuse to drop it with DROP TABLE + if schema.is_materialized_view(tbl_name.name.as_str()) { + bail_parse_error!( + "Cannot DROP TABLE on materialized view {}. Use DROP VIEW instead.", + tbl_name.name.as_str() + ); + } let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; let null_reg = program.alloc_register(); // r1 diff --git a/core/translate/update.rs b/core/translate/update.rs index 6f9c5af23..961046eea 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -140,6 +140,12 @@ pub fn prepare_update_plan( Some(table) => table, None => bail_parse_error!("Parse error: no such table: {}", table_name), }; + + // Check if this is a materialized view + if schema.is_materialized_view(table_name.as_str()) { + bail_parse_error!("cannot modify materialized view {}", table_name); + } + let table_name = table.get_name(); let iter_dir = body .order_by diff --git a/core/translate/view.rs b/core/translate/view.rs index fcb12df01..78e9c6a63 100644 --- a/core/translate/view.rs +++ b/core/translate/view.rs @@ -1,69 +1,14 @@ -use crate::schema::Schema; +use crate::schema::{Schema, DBSP_TABLE_PREFIX}; +use crate::storage::pager::CreateBTreeFlags; use crate::translate::emitter::Resolver; use crate::translate::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; use crate::util::normalize_ident; use crate::vdbe::builder::{CursorType, ProgramBuilder}; -use crate::vdbe::insn::{CmpInsFlags, Cookie, Insn}; +use crate::vdbe::insn::{CmpInsFlags, Cookie, Insn, RegisterOrLiteral}; use crate::{Connection, Result, SymbolTable}; use std::sync::Arc; use turso_parser::ast; -/// Common logic for creating views (both regular and materialized) -fn emit_create_view_program( - schema: &Schema, - view_name: &str, - sql: String, - syms: &SymbolTable, - program: &mut ProgramBuilder, - populate_materialized: bool, -) -> Result<()> { - let normalized_view_name = normalize_ident(view_name); - - // Open cursor to sqlite_schema table - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); - let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); - program.emit_insn(Insn::OpenWrite { - cursor_id: sqlite_schema_cursor_id, - root_page: 1usize.into(), - db: 0, - }); - - // Add the view entry to sqlite_schema - let resolver = Resolver::new(schema, syms); - emit_schema_entry( - program, - &resolver, - sqlite_schema_cursor_id, - None, // cdc_table_cursor_id, no cdc for views - SchemaEntryType::View, - &normalized_view_name, - &normalized_view_name, // for views, tbl_name is same as name - 0, // views don't have a root page - Some(sql), - )?; - - // Parse schema to load the new view - program.emit_insn(Insn::ParseSchema { - db: sqlite_schema_cursor_id, - where_clause: Some(format!("name = '{normalized_view_name}'")), - }); - - program.emit_insn(Insn::SetCookie { - db: 0, - cookie: Cookie::SchemaVersion, - value: (schema.schema_version + 1) as i32, - p5: 0, - }); - - // Populate materialized views if needed - // Note: This must come after SetCookie since it may do I/O operations - if populate_materialized { - program.emit_insn(Insn::PopulateMaterializedViews); - } - - Ok(()) -} - pub fn translate_create_materialized_view( schema: &Schema, view_name: &str, @@ -92,17 +37,144 @@ pub fn translate_create_materialized_view( ))); } - // Validate that this view can be created as an IncrementalView + // Validate the view can be created and extract its columns // This validation happens before updating sqlite_master to prevent // storing invalid view definitions use crate::incremental::view::IncrementalView; - IncrementalView::can_create_view(select_stmt)?; + use crate::schema::BTreeTable; + let view_columns = IncrementalView::validate_and_extract_columns(select_stmt, schema)?; - // Reconstruct the SQL string + // Reconstruct the SQL string for storage let sql = create_materialized_view_to_str(view_name, select_stmt); - // Use common logic to emit the view creation program - emit_create_view_program(schema, view_name, sql, syms, &mut program, true)?; + // Create a btree for storing the materialized view state + // This btree will hold the materialized rows (row_id -> values) + let view_root_reg = program.alloc_register(); + + program.emit_insn(Insn::CreateBtree { + db: 0, + root: view_root_reg, + flags: CreateBTreeFlags::new_table(), + }); + + // Create a second btree for DBSP operator state (e.g., aggregate state) + // This is stored as a hidden table: __turso_internal_dbsp_state_ + let dbsp_state_root_reg = program.alloc_register(); + + program.emit_insn(Insn::CreateBtree { + db: 0, + root: dbsp_state_root_reg, + flags: CreateBTreeFlags::new_table(), + }); + + // Create a proper BTreeTable for the cursor with the actual view columns + let view_table = Arc::new(BTreeTable { + root_page: 0, // Will be set to actual root page after creation + name: normalized_view_name.clone(), + columns: view_columns.clone(), + primary_key_columns: vec![], // Materialized views use implicit rowid + has_rowid: true, + is_strict: false, + unique_sets: None, + }); + + // Allocate a cursor for writing to the view's btree during population + let view_cursor_id = program.alloc_cursor_id(crate::vdbe::builder::CursorType::BTreeTable( + view_table.clone(), + )); + + // Open the cursor to the view's btree + program.emit_insn(Insn::OpenWrite { + cursor_id: view_cursor_id, + root_page: RegisterOrLiteral::Register(view_root_reg), + db: 0, + }); + + // Clear any existing data in the btree + // This is important because if we're reusing a page that previously held + // a materialized view, there might be old data still there + // We need to start with a clean slate + let clear_loop_label = program.allocate_label(); + let clear_done_label = program.allocate_label(); + + // Rewind to the beginning of the btree + program.emit_insn(Insn::Rewind { + cursor_id: view_cursor_id, + pc_if_empty: clear_done_label, + }); + + // Loop to delete all rows + program.preassign_label_to_next_insn(clear_loop_label); + program.emit_insn(Insn::Delete { + cursor_id: view_cursor_id, + table_name: normalized_view_name.clone(), + }); + program.emit_insn(Insn::Next { + cursor_id: view_cursor_id, + pc_if_next: clear_loop_label, + }); + + program.preassign_label_to_next_insn(clear_done_label); + + // Open cursor to sqlite_schema table + let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); + program.emit_insn(Insn::OpenWrite { + cursor_id: sqlite_schema_cursor_id, + root_page: 1usize.into(), + db: 0, + }); + + // Add the materialized view entry to sqlite_schema + let resolver = Resolver::new(schema, syms); + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id, no cdc for views + SchemaEntryType::View, + &normalized_view_name, + &normalized_view_name, + view_root_reg, // btree root for materialized view data + Some(sql), + )?; + + // Add the DBSP state table to sqlite_master (required for materialized views) + let dbsp_table_name = format!("{DBSP_TABLE_PREFIX}{normalized_view_name}"); + let dbsp_sql = format!("CREATE TABLE {dbsp_table_name} (key INTEGER PRIMARY KEY, state BLOB)"); + + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id + SchemaEntryType::Table, + &dbsp_table_name, + &dbsp_table_name, + dbsp_state_root_reg, // Root for DBSP state table + Some(dbsp_sql), + )?; + + // Parse schema to load the new view and DBSP state table + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: Some(format!( + "name = '{normalized_view_name}' OR name = '{dbsp_table_name}'" + )), + }); + + program.emit_insn(Insn::SetCookie { + db: 0, + cookie: Cookie::SchemaVersion, + value: (schema.schema_version + 1) as i32, + p5: 0, + }); + + // Populate the materialized view + let cursor_info = vec![(normalized_view_name.clone(), view_cursor_id)]; + program.emit_insn(Insn::PopulateMaterializedViews { + cursors: cursor_info, + }); program.epilogue(schema); Ok(program) @@ -137,8 +209,41 @@ pub fn translate_create_view( // Reconstruct the SQL string let sql = create_view_to_str(view_name, select_stmt); - // Use common logic to emit the view creation program - emit_create_view_program(schema, view_name, sql, syms, &mut program, false)?; + // Open cursor to sqlite_schema table + let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); + program.emit_insn(Insn::OpenWrite { + cursor_id: sqlite_schema_cursor_id, + root_page: 1usize.into(), + db: 0, + }); + + // Add the view entry to sqlite_schema + let resolver = Resolver::new(schema, syms); + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id, no cdc for views + SchemaEntryType::View, + &normalized_view_name, + &normalized_view_name, + 0, // Regular views don't have a btree + Some(sql), + )?; + + // Parse schema to load the new view + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: Some(format!("name = '{normalized_view_name}'")), + }); + + program.emit_insn(Insn::SetCookie { + db: 0, + cookie: Cookie::SchemaVersion, + value: (schema.schema_version + 1) as i32, + p5: 0, + }); Ok(program) } @@ -156,10 +261,9 @@ pub fn translate_drop_view( let normalized_view_name = normalize_ident(view_name); // Check if view exists (either regular or materialized) - let view_exists = schema.get_view(&normalized_view_name).is_some() - || schema - .get_materialized_view(&normalized_view_name) - .is_some(); + let is_regular_view = schema.get_view(&normalized_view_name).is_some(); + let is_materialized_view = schema.is_materialized_view(&normalized_view_name); + let view_exists = is_regular_view || is_materialized_view; if !view_exists && !if_exists { return Err(crate::LimboError::ParseError(format!( @@ -172,6 +276,20 @@ pub fn translate_drop_view( return Ok(program); } + // If this is a materialized view, we need to destroy its btree as well + if is_materialized_view { + if let Some(table) = schema.get_table(&normalized_view_name) { + if let Some(btree_table) = table.btree() { + // Destroy the btree for the materialized view + program.emit_insn(Insn::Destroy { + root: btree_table.root_page, + former_root_reg: 0, // No autovacuum + is_temp: 0, + }); + } + } + } + // Open cursor to sqlite_schema table let schema_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = @@ -217,6 +335,8 @@ pub fn translate_drop_view( // Check if type == 'view' and name == view_name let skip_delete_label = program.allocate_label(); + + // Both regular and materialized views are stored as type='view' in sqlite_schema program.emit_insn(Insn::Ne { lhs: col0_reg, rhs: type_reg, @@ -224,6 +344,7 @@ pub fn translate_drop_view( flags: CmpInsFlags::default(), collation: program.curr_collation(), }); + program.emit_insn(Insn::Ne { lhs: col1_reg, rhs: view_name_reg, diff --git a/core/types.rs b/core/types.rs index 25ad4655e..597f20cdd 100644 --- a/core/types.rs +++ b/core/types.rs @@ -2430,6 +2430,7 @@ pub enum Cursor { Pseudo(PseudoCursor), Sorter(Sorter), Virtual(VirtualTableCursor), + MaterializedView(Box), } impl Cursor { @@ -2445,6 +2446,12 @@ impl Cursor { Self::Sorter(cursor) } + pub fn new_materialized_view( + cursor: crate::incremental::cursor::MaterializedViewCursor, + ) -> Self { + Self::MaterializedView(Box::new(cursor)) + } + pub fn as_btree_mut(&mut self) -> &mut BTreeCursor { match self { Self::BTree(cursor) => cursor, @@ -2472,6 +2479,15 @@ impl Cursor { _ => panic!("Cursor is not a virtual cursor"), } } + + pub fn as_materialized_view_mut( + &mut self, + ) -> &mut crate::incremental::cursor::MaterializedViewCursor { + match self { + Self::MaterializedView(cursor) => cursor, + _ => panic!("Cursor is not a materialized view cursor"), + } + } } #[derive(Debug)] @@ -2549,6 +2565,23 @@ macro_rules! return_if_io { }; } +#[macro_export] +macro_rules! return_and_restore_if_io { + ($field:expr, $saved_state:expr, $e:expr) => { + match $e { + Ok(IOResult::Done(v)) => v, + Ok(IOResult::IO(io)) => { + let _ = std::mem::replace($field, $saved_state); + return Ok(IOResult::IO(io)); + } + Err(e) => { + let _ = std::mem::replace($field, $saved_state); + return Err(e); + } + } + }; +} + #[derive(Debug, PartialEq)] pub enum SeekResult { /// Record matching the [SeekOp] found in the B-tree and cursor was positioned to point onto that record diff --git a/core/util.rs b/core/util.rs index 097139d18..b259a90f3 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,14 +1,16 @@ #![allow(unused)] +use crate::incremental::view::IncrementalView; use crate::translate::expr::WalkControl; use crate::types::IOResult; use crate::{ - schema::{self, Column, MaterializedViewsMap, Schema, Type}, + schema::{self, BTreeTable, Column, Schema, Table, Type, DBSP_TABLE_PREFIX}, translate::{collate::CollationSeq, expr::walk_expr, plan::JoinOrderMember}, types::{Value, ValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, }; use crate::{Connection, IO}; use std::{ + collections::HashMap, rc::Rc, sync::{Arc, Mutex}, }; @@ -148,7 +150,7 @@ pub fn parse_schema_rows( schema: &mut Schema, syms: &SymbolTable, mv_tx_id: Option, - mut existing_views: MaterializedViewsMap, + mut existing_views: HashMap>>, ) -> Result<()> { rows.set_mv_tx_id(mv_tx_id); // TODO: if we IO, this unparsed indexes is lost. Will probably need some state between @@ -156,8 +158,12 @@ pub fn parse_schema_rows( let mut from_sql_indexes = Vec::with_capacity(10); let mut automatic_indices = std::collections::HashMap::with_capacity(10); - // Collect views for second pass to populate table_to_views mapping - let mut views_to_process: Vec<(String, Vec)> = Vec::new(); + // Store DBSP state table root pages: view_name -> dbsp_state_root_page + let mut dbsp_state_roots: std::collections::HashMap = + std::collections::HashMap::new(); + // Store materialized view info (SQL and root page) for later creation + let mut materialized_view_info: std::collections::HashMap = + std::collections::HashMap::new(); loop { match rows.step()? { StepResult::Row => { @@ -189,6 +195,18 @@ pub fn parse_schema_rows( schema.add_virtual_table(vtab); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; + + // Check if this is a DBSP state table + if table.name.starts_with(DBSP_TABLE_PREFIX) { + // Extract the view name from __turso_internal_dbsp_state_ + let view_name = table + .name + .strip_prefix(DBSP_TABLE_PREFIX) + .unwrap() + .to_string(); + dbsp_state_roots.insert(view_name, root_page as usize); + } + schema.add_btree_table(Arc::new(table)); } } @@ -228,6 +246,7 @@ pub fn parse_schema_rows( use turso_parser::parser::Parser; let name: &str = row.get::<&str>(1)?; + let root_page = row.get::(3)?; let sql: &str = row.get::<&str>(4)?; let view_name = name.to_string(); @@ -236,52 +255,17 @@ pub fn parse_schema_rows( if let Ok(Some(Cmd::Stmt(stmt))) = parser.next_cmd() { match stmt { Stmt::CreateMaterializedView { .. } => { - // Handle materialized view with potential reuse - let should_create_new = if let Some(existing_view) = - existing_views.remove(&view_name) - { - // Check if we can reuse this view (same SQL definition) - let can_reuse = if let Ok(view_guard) = existing_view.lock() - { - view_guard.has_same_sql(sql) - } else { - false - }; + // Store materialized view info for later creation + // We'll handle reuse logic and create the actual IncrementalView + // in a later pass when we have both the main root page and DBSP state root + materialized_view_info.insert( + view_name.clone(), + (sql.to_string(), root_page as usize), + ); - if can_reuse { - // Reuse the existing view - it's already populated! - let referenced_tables = - if let Ok(view_guard) = existing_view.lock() { - view_guard.get_referenced_table_names() - } else { - vec![] - }; - - // Add the existing view to the new schema - schema - .materialized_views - .insert(view_name.clone(), existing_view); - - // Store for second pass processing - views_to_process - .push((view_name.clone(), referenced_tables)); - false // Don't create new - } else { - true // SQL changed, need to create new - } - } else { - true // No existing view, need to create new - }; - - if should_create_new { - // Create a new IncrementalView - // If this fails, we should propagate the error so the transaction rolls back - let incremental_view = - IncrementalView::from_sql(sql, schema)?; - let referenced_tables = - incremental_view.get_referenced_table_names(); - schema.add_materialized_view(incremental_view); - views_to_process.push((view_name, referenced_tables)); + // Mark the existing view for potential reuse + if existing_views.contains_key(&view_name) { + // We'll check for reuse in the third pass } } Stmt::CreateView { @@ -359,11 +343,56 @@ pub fn parse_schema_rows( } } - // Second pass: populate table_to_views mapping - for (view_name, referenced_tables) in views_to_process { - // Register this view as dependent on each referenced table - for table_name in referenced_tables { - schema.add_materialized_view_dependency(&table_name, &view_name); + // Third pass: Create materialized views now that we have both root pages + for (view_name, (sql, main_root)) in materialized_view_info { + // Look up the DBSP state root for this view - must exist for materialized views + let dbsp_state_root = dbsp_state_roots.get(&view_name).ok_or_else(|| { + LimboError::InternalError(format!( + "Materialized view {view_name} is missing its DBSP state table" + )) + })?; + + // Check if we can reuse the existing view + let mut reuse_view = false; + if let Some(existing_view_mutex) = schema.get_materialized_view(&view_name) { + let existing_view = existing_view_mutex.lock().unwrap(); + if let Some(existing_sql) = schema.materialized_view_sql.get(&view_name) { + if existing_sql == &sql { + reuse_view = true; + } + } + } + + if reuse_view { + // View already exists with same SQL, just update dependencies + let existing_view_mutex = schema.get_materialized_view(&view_name).unwrap(); + let existing_view = existing_view_mutex.lock().unwrap(); + let referenced_tables = existing_view.get_referenced_table_names(); + drop(existing_view); // Release lock before modifying schema + for table_name in referenced_tables { + schema.add_materialized_view_dependency(&table_name, &view_name); + } + } else { + // Create new IncrementalView with both root pages + let incremental_view = + IncrementalView::from_sql(&sql, schema, main_root, *dbsp_state_root)?; + let referenced_tables = incremental_view.get_referenced_table_names(); + + // Create a Table for the materialized view + let table = Arc::new(schema::Table::BTree(Arc::new(schema::BTreeTable { + root_page: main_root, + name: view_name.clone(), + columns: incremental_view.columns.clone(), // Use the view's columns, not the base table's + primary_key_columns: vec![], + has_rowid: true, + is_strict: false, + unique_sets: None, + }))); + + schema.add_materialized_view(incremental_view, table, sql.clone()); + for table_name in referenced_tables { + schema.add_materialized_view_dependency(&table_name, &view_name); + } } } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index f661d6826..ffa26c03d 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -123,6 +123,10 @@ pub enum CursorType { Pseudo(PseudoCursorType), Sorter, VirtualTable(Arc), + MaterializedView( + Arc, + Arc>, + ), } impl CursorType { @@ -865,6 +869,7 @@ impl ProgramBuilder { let default = match cursor_type { CursorType::BTreeTable(btree) => &btree.columns[column].default, CursorType::BTreeIndex(index) => &index.columns[column].default, + CursorType::MaterializedView(btree, _) => &btree.columns[column].default, _ => break 'value None, }; diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7a989d0c5..36d0f88f4 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -953,11 +953,43 @@ pub fn op_open_read( let num_columns = match cursor_type { CursorType::BTreeTable(table_rc) => table_rc.columns.len(), CursorType::BTreeIndex(index_arc) => index_arc.columns.len(), + CursorType::MaterializedView(table_rc, _) => table_rc.columns.len(), _ => unreachable!("This should not have happened"), }; match cursor_type { + CursorType::MaterializedView(_, view_mutex) => { + // This is a materialized view with storage + // Create btree cursor for reading the persistent data + let btree_cursor = Box::new(BTreeCursor::new_table( + mv_cursor, + pager.clone(), + *root_page, + num_columns, + )); + + // Get the view name and look up or create its transaction state + let view_name = view_mutex.lock().unwrap().name().to_string(); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(&view_name); + + // Create materialized view cursor with this view's transaction state + let mv_cursor = crate::incremental::cursor::MaterializedViewCursor::new( + btree_cursor, + view_mutex.clone(), + pager.clone(), + tx_state, + )?; + + cursors + .get_mut(*cursor_id) + .unwrap() + .replace(Cursor::new_materialized_view(mv_cursor)); + } CursorType::BTreeTable(_) => { + // Regular table let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), *root_page, num_columns); cursors .get_mut(*cursor_id) @@ -1282,10 +1314,18 @@ pub fn op_rewind( ); assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Rewind"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.rewind()); - cursor.is_empty() + let mut cursor = state.get_cursor(*cursor_id); + match &mut *cursor { + Cursor::BTree(btree_cursor) => { + return_if_io!(btree_cursor.rewind()); + btree_cursor.is_empty() + } + Cursor::MaterializedView(mv_cursor) => { + return_if_io!(mv_cursor.rewind()); + !mv_cursor.is_valid()? + } + _ => panic!("Rewind on non-btree/materialized-view cursor"), + } }; if is_empty { state.pc = pc_if_empty.as_offset_int(); @@ -1430,17 +1470,43 @@ pub fn op_column( } => { { let mut table_cursor = state.get_cursor(table_cursor_id); - let table_cursor = table_cursor.as_btree_mut(); - return_if_io!( - table_cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true }) - ); + // MaterializedView cursors shouldn't go through deferred seek logic + // but if we somehow get here, handle it appropriately + match &mut *table_cursor { + Cursor::MaterializedView(mv_cursor) => { + // Seek to the rowid in the materialized view + return_if_io!(mv_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + } + _ => { + // Regular btree cursor + let table_cursor = table_cursor.as_btree_mut(); + return_if_io!(table_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + } + } } state.op_column_state = OpColumnState::GetColumn; } OpColumnState::GetColumn => { + // First check if this is a MaterializedViewCursor + { + let mut cursor = state.get_cursor(*cursor_id); + if let Cursor::MaterializedView(mv_cursor) = &mut *cursor { + // Handle materialized view column access + let value = return_if_io!(mv_cursor.column(*column)); + drop(cursor); + state.registers[*dest] = Register::Value(value); + break 'outer; + } + // Fall back to normal handling + } + let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); match cursor_type { - CursorType::BTreeTable(_) | CursorType::BTreeIndex(_) => { + CursorType::BTreeTable(_) + | CursorType::BTreeIndex(_) + | CursorType::MaterializedView(_, _) => { 'ifnull: { let mut cursor_ref = must_be_btree_cursor!( *cursor_id, @@ -1843,12 +1909,19 @@ pub fn op_next( ); assert!(pc_if_next.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Next"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.next()); - - cursor.is_empty() + let mut cursor = state.get_cursor(*cursor_id); + match &mut *cursor { + Cursor::BTree(btree_cursor) => { + btree_cursor.set_null_flag(false); + return_if_io!(btree_cursor.next()); + btree_cursor.is_empty() + } + Cursor::MaterializedView(mv_cursor) => { + let has_more = return_if_io!(mv_cursor.next()); + !has_more + } + _ => panic!("Next on non-btree/materialized-view cursor"), + } }; if !is_empty { // Increment metrics for row read @@ -2444,9 +2517,18 @@ pub fn op_row_id( } else { state.registers[*dest] = Register::Value(Value::Null); } + } else if let Some(Cursor::MaterializedView(mv_cursor)) = + cursors.get_mut(*cursor_id).unwrap() + { + if let Some(rowid) = return_if_io!(mv_cursor.rowid()) { + state.registers[*dest] = Register::Value(Value::Integer(rowid)); + } else { + state.registers[*dest] = Register::Value(Value::Null); + } } else { return Err(LimboError::InternalError( - "RowId: cursor is not a table or virtual cursor".to_string(), + "RowId: cursor is not a table, virtual, or materialized view cursor" + .to_string(), )); } break; @@ -2497,40 +2579,67 @@ pub fn op_seek_rowid( assert!(target_pc.is_offset()); let (pc, did_seek) = { let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let rowid = match state.registers[*src_reg].get_value() { - Value::Integer(rowid) => Some(*rowid), - Value::Null => None, - // For non-integer values try to apply affinity and convert them to integer. - other => { - let mut temp_reg = Register::Value(other.clone()); - let converted = apply_affinity_char(&mut temp_reg, Affinity::Numeric); - if converted { - match temp_reg.get_value() { - Value::Integer(i) => Some(*i), - Value::Float(f) => Some(*f as i64), - _ => unreachable!("apply_affinity_char with Numeric should produce an integer if it returns true"), + + // Handle MaterializedView cursor + let (pc, did_seek) = match &mut *cursor { + Cursor::MaterializedView(mv_cursor) => { + let rowid = match state.registers[*src_reg].get_value() { + Value::Integer(rowid) => Some(*rowid), + Value::Null => None, + _ => None, + }; + + match rowid { + Some(rowid) => { + let seek_result = return_if_io!(mv_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + let pc = if !matches!(seek_result, SeekResult::Found) { + target_pc.as_offset_int() + } else { + state.pc + 1 + }; + (pc, true) } - } else { - None + None => (target_pc.as_offset_int(), false), } } - }; - - match rowid { - Some(rowid) => { - let seek_result = return_if_io!( - cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true }) - ); - let pc = if !matches!(seek_result, SeekResult::Found) { - target_pc.as_offset_int() - } else { - state.pc + 1 + Cursor::BTree(btree_cursor) => { + let rowid = match state.registers[*src_reg].get_value() { + Value::Integer(rowid) => Some(*rowid), + Value::Null => None, + // For non-integer values try to apply affinity and convert them to integer. + other => { + let mut temp_reg = Register::Value(other.clone()); + let converted = apply_affinity_char(&mut temp_reg, Affinity::Numeric); + if converted { + match temp_reg.get_value() { + Value::Integer(i) => Some(*i), + Value::Float(f) => Some(*f as i64), + _ => unreachable!("apply_affinity_char with Numeric should produce an integer if it returns true"), + } + } else { + None + } + } }; - (pc, true) + + match rowid { + Some(rowid) => { + let seek_result = return_if_io!(btree_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + let pc = if !matches!(seek_result, SeekResult::Found) { + target_pc.as_offset_int() + } else { + state.pc + 1 + }; + (pc, true) + } + None => (target_pc.as_offset_int(), false), + } } - None => (target_pc.as_offset_int(), false), - } + _ => panic!("SeekRowid on non-btree/materialized-view cursor"), + }; + (pc, did_seek) }; // Increment btree_seeks metric for SeekRowid operation after cursor is dropped if did_seek { @@ -5192,12 +5301,11 @@ pub fn op_insert( match &state.op_insert_state.sub_state { OpInsertSubState::MaybeCaptureRecord => { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); + let dependent_views = schema.get_dependent_materialized_views(table_name); // If there are no dependent views, we don't need to capture the old record. // We also don't need to do it if the rowid of the UPDATEd row was changed, because that means // we deleted it earlier and `op_delete` already captured the change. - if dependent_views.is_none() || flag.has(InsertFlags::UPDATE_ROWID_CHANGE) { + if dependent_views.is_empty() || flag.has(InsertFlags::UPDATE_ROWID_CHANGE) { if flag.has(InsertFlags::REQUIRE_SEEK) { state.op_insert_state.sub_state = OpInsertSubState::Seek; } else { @@ -5303,9 +5411,8 @@ pub fn op_insert( state.op_insert_state.sub_state = OpInsertSubState::UpdateLastRowid; } else { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - if dependent_views.is_some() { + let dependent_views = schema.get_dependent_materialized_views(table_name); + if !dependent_views.is_empty() { state.op_insert_state.sub_state = OpInsertSubState::ApplyViewChange; } else { break; @@ -5325,9 +5432,8 @@ pub fn op_insert( program.n_change.set(prev_changes + 1); } let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - if dependent_views.is_some() { + let dependent_views = schema.get_dependent_materialized_views(table_name); + if !dependent_views.is_empty() { state.op_insert_state.sub_state = OpInsertSubState::ApplyViewChange; continue; } @@ -5335,10 +5441,8 @@ pub fn op_insert( } OpInsertSubState::ApplyViewChange => { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - assert!(dependent_views.is_some()); - let dependent_views = dependent_views.unwrap(); + let dependent_views = schema.get_dependent_materialized_views(table_name); + assert!(!dependent_views.is_empty()); let (key, values) = { let mut cursor = state.get_cursor(*cursor_id); @@ -5383,17 +5487,24 @@ pub fn op_insert( (key, new_values) }; - let mut tx_states = program.connection.view_transaction_states.borrow_mut(); + for v in dependent_views.iter() {} + if let Some((key, values)) = state.op_insert_state.old_record.take() { for view_name in dependent_views.iter() { - let tx_state = tx_states.entry(view_name.clone()).or_default(); - tx_state.delta.delete(key, values.clone()); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(view_name); + tx_state.delete(key, values.clone()); } } for view_name in dependent_views.iter() { - let tx_state = tx_states.entry(view_name.clone()).or_default(); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(view_name); - tx_state.delta.insert(key, values.clone()); + tx_state.insert(key, values.clone()); } break; @@ -5522,10 +5633,12 @@ pub fn op_delete( assert!(!dependent_views.is_empty()); let maybe_deleted_record = state.op_delete_state.deleted_record.take(); if let Some((key, values)) = maybe_deleted_record { - let mut tx_states = program.connection.view_transaction_states.borrow_mut(); for view_name in dependent_views { - let tx_state = tx_states.entry(view_name.clone()).or_default(); - tx_state.delta.delete(key, values.clone()); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(&view_name); + tx_state.delete(key, values.clone()); } } break; @@ -6232,7 +6345,10 @@ pub fn op_open_write( } else { let num_columns = match cursor_type { CursorType::BTreeTable(table_rc) => table_rc.columns.len(), - _ => unreachable!("Expected BTreeTable. This should not have happened."), + CursorType::MaterializedView(table_rc, _) => table_rc.columns.len(), + _ => unreachable!( + "Expected BTreeTable or MaterializedView. This should not have happened." + ), }; let cursor = @@ -6453,6 +6569,7 @@ pub fn op_parse_schema( }, insn ); + let conn = program.connection.clone(); // set auto commit to false in order for parse schema to not commit changes as transaction state is stored in connection, // and we use the same connection for nested query. @@ -6464,7 +6581,7 @@ pub fn op_parse_schema( conn.with_schema_mut(|schema| { // TODO: This function below is synchronous, make it async - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); conn.is_nested_stmt.set(true); parse_schema_rows( stmt, @@ -6479,7 +6596,7 @@ pub fn op_parse_schema( conn.with_schema_mut(|schema| { // TODO: This function below is synchronous, make it async - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); conn.is_nested_stmt.set(true); parse_schema_rows( stmt, @@ -6500,14 +6617,75 @@ pub fn op_parse_schema( pub fn op_populate_materialized_views( program: &Program, state: &mut ProgramState, - _insn: &Insn, - _pager: &Rc, + insn: &Insn, + pager: &Rc, _mv_store: Option<&Arc>, ) -> Result { - let conn = program.connection.clone(); - let schema = conn.schema.borrow(); + load_insn!(PopulateMaterializedViews { cursors }, insn); + + let conn = program.connection.clone(); + + // For each view, get its cursor and root page + let mut view_info = Vec::new(); + { + let cursors_ref = state.cursors.borrow(); + for (view_name, cursor_id) in cursors { + // Get the cursor to find the root page + let cursor = cursors_ref + .get(*cursor_id) + .and_then(|c| c.as_ref()) + .ok_or_else(|| { + LimboError::InternalError(format!("Cursor {cursor_id} not found")) + })?; + + let root_page = match cursor { + crate::types::Cursor::BTree(btree_cursor) => btree_cursor.root_page(), + _ => { + return Err(LimboError::InternalError( + "Expected BTree cursor for materialized view".into(), + )) + } + }; + + view_info.push((view_name.clone(), root_page, *cursor_id)); + } + } + + // Now populate the views (after releasing the schema borrow) + for (view_name, _root_page, cursor_id) in view_info { + let schema = conn.schema.borrow(); + if let Some(view) = schema.get_materialized_view(&view_name) { + let mut view = view.lock().unwrap(); + // Drop the schema borrow before calling populate_from_table + drop(schema); + + // Get the cursor for writing + // Get a mutable reference to the cursor + let mut cursors_ref = state.cursors.borrow_mut(); + let cursor = cursors_ref + .get_mut(cursor_id) + .and_then(|c| c.as_mut()) + .ok_or_else(|| { + LimboError::InternalError(format!( + "Cursor {cursor_id} not found for population" + )) + })?; + + // Extract the BTreeCursor + let btree_cursor = match cursor { + crate::types::Cursor::BTree(btree_cursor) => btree_cursor, + _ => { + return Err(LimboError::InternalError( + "Expected BTree cursor for materialized view population".into(), + )) + } + }; + + // Now populate it with the cursor for writing + return_if_io!(view.populate_from_table(&conn, pager, btree_cursor.as_mut())); + } + } - return_if_io!(schema.populate_materialized_views(&conn)); // All views populated, advance to next instruction state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -6932,6 +7110,9 @@ pub fn op_open_ephemeral( CursorType::VirtualTable(_) => { panic!("OpenEphemeral on virtual table cursor, use Insn::VOpen instead"); } + CursorType::MaterializedView(_, _) => { + panic!("OpenEphemeral on materialized view cursor"); + } } state.pc += 1; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index fe3b23073..5f160c235 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -19,6 +19,7 @@ pub fn insn_to_str( CursorType::BTreeIndex(index) => &index.name, CursorType::Pseudo(_) => "pseudo", CursorType::VirtualTable(virtual_table) => &virtual_table.name, + CursorType::MaterializedView(table, _) => &table.name, CursorType::Sorter => "sorter", } }; @@ -541,6 +542,10 @@ pub fn insn_to_str( let name = &index.columns.get(*column).unwrap().name; Some(name) } + CursorType::MaterializedView(table, _) => { + let name = table.columns.get(*column).and_then(|v| v.name.as_ref()); + name + } CursorType::Pseudo(_) => None, CursorType::Sorter => None, CursorType::VirtualTable(v) => v.columns.get(*column).unwrap().name.as_ref(), @@ -1337,13 +1342,13 @@ pub fn insn_to_str( 0, where_clause.clone().unwrap_or("NULL".to_string()), ), - Insn::PopulateMaterializedViews => ( + Insn::PopulateMaterializedViews { cursors } => ( "PopulateMaterializedViews", 0, 0, 0, Value::Null, - 0, + cursors.len() as u16, "".to_string(), ), Insn::Prev { diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 799eb86b9..80a813caa 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -898,7 +898,12 @@ pub enum Insn { }, /// Populate all materialized views after schema parsing - PopulateMaterializedViews, + /// The cursors parameter contains a mapping of view names to cursor IDs that have been + /// opened to the view's btree for writing the materialized data + PopulateMaterializedViews { + /// Mapping of view name to cursor_id for writing to the view's btree + cursors: Vec<(String, usize)>, + }, /// Place the result of lhs >> rhs in dest register. ShiftRight { @@ -1190,7 +1195,7 @@ impl Insn { Insn::IsNull { .. } => execute::op_is_null, Insn::CollSeq { .. } => execute::op_coll_seq, Insn::ParseSchema { .. } => execute::op_parse_schema, - Insn::PopulateMaterializedViews => execute::op_populate_materialized_views, + Insn::PopulateMaterializedViews { .. } => execute::op_populate_materialized_views, Insn::ShiftRight { .. } => execute::op_shift_right, Insn::ShiftLeft { .. } => execute::op_shift_left, Insn::AddImm { .. } => execute::op_add_imm, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1c844e08a..0a5ea402d 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -69,6 +69,17 @@ use std::{ }; use tracing::{instrument, Level}; +/// State machine for committing view deltas with I/O handling +#[derive(Debug, Clone)] +pub enum ViewDeltaCommitState { + NotStarted, + Processing { + views: Vec, // view names (all materialized views have storage) + current_index: usize, + }, + Done, +} + /// We use labels to indicate that we want to jump to whatever the instruction offset /// will be at runtime, because the offset cannot always be determined when the jump /// instruction is created. @@ -284,6 +295,8 @@ pub struct ProgramState { current_collation: Option, op_column_state: OpColumnState, op_row_id_state: OpRowIdState, + /// State machine for committing view deltas with I/O handling + view_delta_state: ViewDeltaCommitState, } impl ProgramState { @@ -326,6 +339,7 @@ impl ProgramState { current_collation: None, op_column_state: OpColumnState::Start, op_row_id_state: OpRowIdState::Start, + view_delta_state: ViewDeltaCommitState::NotStarted, } } @@ -413,6 +427,7 @@ macro_rules! must_be_btree_cursor { let cursor = match cursor_type { CursorType::BTreeTable(_) => $state.get_cursor($cursor_id), CursorType::BTreeIndex(_) => $state.get_cursor($cursor_id), + CursorType::MaterializedView(_, _) => $state.get_cursor($cursor_id), CursorType::Pseudo(_) => panic!("{} on pseudo cursor", $insn_name), CursorType::Sorter => panic!("{} on sorter cursor", $insn_name), CursorType::VirtualTable(_) => panic!("{} on virtual table cursor", $insn_name), @@ -518,20 +533,97 @@ impl Program { } #[instrument(skip_all, level = Level::DEBUG)] - fn apply_view_deltas(&self, rollback: bool) { - if self.connection.view_transaction_states.borrow().is_empty() { - return; - } + fn apply_view_deltas( + &self, + state: &mut ProgramState, + rollback: bool, + pager: &Rc, + ) -> Result> { + use crate::types::IOResult; - let tx_states = self.connection.view_transaction_states.take(); + loop { + match &state.view_delta_state { + ViewDeltaCommitState::NotStarted => { + if self.connection.view_transaction_states.is_empty() { + return Ok(IOResult::Done(())); + } - if !rollback { - let schema = self.connection.schema.borrow(); + if rollback { + // On rollback, just clear and done + self.connection.view_transaction_states.clear(); + return Ok(IOResult::Done(())); + } - for (view_name, tx_state) in tx_states.iter() { - if let Some(view_mutex) = schema.get_materialized_view(view_name) { - let mut view = view_mutex.lock().unwrap(); - view.merge_delta(&tx_state.delta); + // Not a rollback - proceed with processing + let schema = self.connection.schema.borrow(); + + // Collect materialized views - they should all have storage + let mut views = Vec::new(); + for view_name in self.connection.view_transaction_states.get_view_names() { + if let Some(view_mutex) = schema.get_materialized_view(&view_name) { + let view = view_mutex.lock().unwrap(); + let root_page = view.get_root_page(); + + // Materialized views should always have storage (root_page != 0) + assert!( + root_page != 0, + "Materialized view '{view_name}' should have a root page" + ); + + views.push(view_name); + } + } + + state.view_delta_state = ViewDeltaCommitState::Processing { + views, + current_index: 0, + }; + } + + ViewDeltaCommitState::Processing { + views, + current_index, + } => { + // At this point we know it's not a rollback + if *current_index >= views.len() { + // All done, clear the transaction states + self.connection.view_transaction_states.clear(); + state.view_delta_state = ViewDeltaCommitState::Done; + return Ok(IOResult::Done(())); + } + + let view_name = &views[*current_index]; + + let delta = self + .connection + .view_transaction_states + .get(view_name) + .unwrap() + .get_delta(); + + let schema = self.connection.schema.borrow(); + if let Some(view_mutex) = schema.get_materialized_view(view_name) { + let mut view = view_mutex.lock().unwrap(); + + // Handle I/O from merge_delta - pass pager, circuit will create its own cursor + match view.merge_delta(&delta, pager.clone())? { + IOResult::Done(_) => { + // Move to next view + state.view_delta_state = ViewDeltaCommitState::Processing { + views: views.clone(), + current_index: current_index + 1, + }; + } + IOResult::IO(io) => { + // Return I/O, will resume at same index + return Ok(IOResult::IO(io)); + } + } + } + } + + ViewDeltaCommitState::Done => { + return Ok(IOResult::Done(())); } } } @@ -544,7 +636,14 @@ impl Program { mv_store: Option<&Arc>, rollback: bool, ) -> Result> { - self.apply_view_deltas(rollback); + // Apply view deltas with I/O handling + match self.apply_view_deltas(program_state, rollback, &pager)? { + IOResult::IO(io) => return Ok(IOResult::IO(io)), + IOResult::Done(_) => {} + } + + // Reset state for next use + program_state.view_delta_state = ViewDeltaCommitState::NotStarted; if self.connection.transaction_state.get() == TransactionState::None && mv_store.is_none() { // No need to do any work here if not in tx. Current MVCC logic doesn't work with this assumption, diff --git a/core/vtab.rs b/core/vtab.rs index 61db382ba..fc511ba40 100644 --- a/core/vtab.rs +++ b/core/vtab.rs @@ -6,7 +6,7 @@ use crate::{Connection, LimboError, SymbolTable, Value}; use std::ffi::c_void; use std::ptr::NonNull; use std::rc::Rc; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use turso_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use turso_parser::{ast, parser::Parser}; @@ -14,7 +14,6 @@ use turso_parser::{ast, parser::Parser}; pub(crate) enum VirtualTableType { Pragma(PragmaVirtualTable), External(ExtVirtualTable), - View(crate::vtab_view::ViewVirtualTable), } #[derive(Clone, Debug)] @@ -30,7 +29,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => true, VirtualTableType::External(table) => table.readonly(), - VirtualTableType::View(_) => true, } } @@ -88,21 +86,6 @@ impl VirtualTable { Ok(Arc::new(vtab)) } - /// Create a virtual table for a view - pub(crate) fn view( - view_name: &str, - columns: Vec, - view: Arc>, - ) -> crate::Result> { - let vtab = VirtualTable { - name: view_name.to_owned(), - columns, - kind: VTabKind::VirtualTable, - vtab_type: VirtualTableType::View(crate::vtab_view::ViewVirtualTable { view }), - }; - Ok(Arc::new(vtab)) - } - fn resolve_columns(schema: String) -> crate::Result> { let mut parser = Parser::new(schema.as_bytes()); if let ast::Cmd::Stmt(ast::Stmt::CreateTable { body, .. }) = parser.next_cmd()?.ok_or( @@ -124,9 +107,6 @@ impl VirtualTable { VirtualTableType::External(table) => { Ok(VirtualTableCursor::External(table.open(conn.clone())?)) } - VirtualTableType::View(table) => { - Ok(VirtualTableCursor::View(Box::new(table.open(conn)?))) - } } } @@ -134,7 +114,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => Err(LimboError::ReadOnly), VirtualTableType::External(table) => table.update(args), - VirtualTableType::View(_) => Err(LimboError::ReadOnly), } } @@ -142,7 +121,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => Ok(()), VirtualTableType::External(table) => table.destroy(), - VirtualTableType::View(_) => Ok(()), } } @@ -154,7 +132,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(table) => table.best_index(constraints), VirtualTableType::External(table) => table.best_index(constraints, order_by), - VirtualTableType::View(view) => view.best_index(), } } } @@ -162,7 +139,6 @@ impl VirtualTable { pub enum VirtualTableCursor { Pragma(Box), External(ExtVirtualTableCursor), - View(Box), } impl VirtualTableCursor { @@ -170,7 +146,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.next(), VirtualTableCursor::External(cursor) => cursor.next(), - VirtualTableCursor::View(cursor) => cursor.next(), } } @@ -178,7 +153,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.rowid(), VirtualTableCursor::External(cursor) => cursor.rowid(), - VirtualTableCursor::View(cursor) => cursor.rowid(), } } @@ -186,7 +160,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.column(column), VirtualTableCursor::External(cursor) => cursor.column(column), - VirtualTableCursor::View(cursor) => cursor.column(column), } } @@ -202,7 +175,6 @@ impl VirtualTableCursor { VirtualTableCursor::External(cursor) => { cursor.filter(idx_num, idx_str, arg_count, args) } - VirtualTableCursor::View(cursor) => cursor.filter(args), } } } diff --git a/core/vtab_view.rs b/core/vtab_view.rs deleted file mode 100644 index 4b44f0592..000000000 --- a/core/vtab_view.rs +++ /dev/null @@ -1,101 +0,0 @@ -use crate::incremental::view::IncrementalView; -use crate::{Connection, LimboError, Value, VirtualTable}; -use std::sync::{Arc, Mutex}; - -/// Create a virtual table wrapper for a view -pub fn create_view_virtual_table( - view_name: &str, - view: Arc>, -) -> crate::Result> { - // Use the VirtualTable::view method we added - let view_locked = view.lock().map_err(|_| { - LimboError::InternalError("Failed to lock view for virtual table creation".to_string()) - })?; - let columns = view_locked.columns.clone(); - drop(view_locked); // Release the lock before passing the Arc - VirtualTable::view(view_name, columns, view) -} - -/// Virtual table wrapper for incremental views -#[derive(Clone, Debug)] -pub struct ViewVirtualTable { - pub view: Arc>, -} - -impl ViewVirtualTable { - pub fn best_index(&self) -> Result { - // Views don't use indexes - return a simple index info - Ok(turso_ext::IndexInfo { - idx_num: 0, - idx_str: None, - order_by_consumed: false, - estimated_cost: 1000000.0, - estimated_rows: 1000, - constraint_usages: Vec::new(), - }) - } - - pub fn open(&self, conn: Arc) -> crate::Result { - // Views are now populated during schema parsing (in parse_schema_rows) - // so we just get the current data from the view. - - let view = self.view.lock().map_err(|_| { - LimboError::InternalError("Failed to lock view for reading".to_string()) - })?; - - let tx_states = conn.view_transaction_states.borrow(); - let tx_state = tx_states.get(view.name()); - - let data: Vec<(i64, Vec)> = view.current_data(tx_state); - Ok(ViewVirtualTableCursor { - data, - current_pos: 0, - }) - } -} - -/// Cursor for iterating over view data -pub struct ViewVirtualTableCursor { - data: Vec<(i64, Vec)>, - current_pos: usize, -} - -impl ViewVirtualTableCursor { - pub fn next(&mut self) -> crate::Result { - if self.current_pos < self.data.len() { - self.current_pos += 1; - Ok(self.current_pos < self.data.len()) - } else { - Ok(false) - } - } - - pub fn rowid(&self) -> i64 { - if self.current_pos < self.data.len() { - self.data[self.current_pos].0 - } else { - -1 - } - } - - pub fn column(&self, column: usize) -> crate::Result { - if self.current_pos >= self.data.len() { - return Ok(Value::Null); - } - - let (_row_key, values) = &self.data[self.current_pos]; - - // Return the value at the requested column index - if let Some(value) = values.get(column) { - Ok(value.clone()) - } else { - Ok(Value::Null) - } - } - - pub fn filter(&mut self, _args: Vec) -> crate::Result { - // Reset to beginning for new filter - self.current_pos = 0; - Ok(!self.data.is_empty()) - } -} From 832e0dee81d6cd1a9a67e1784c4cc2dcb1acaf0e Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 5 Sep 2025 15:40:45 +0300 Subject: [PATCH 2/4] core/incremental: Fix typos in cursor.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- core/incremental/cursor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/incremental/cursor.rs b/core/incremental/cursor.rs index ddcd2a3e1..1e0e91af8 100644 --- a/core/incremental/cursor.rs +++ b/core/incremental/cursor.rs @@ -198,13 +198,13 @@ impl MaterializedViewCursor { let min_seen = btree_entries .changes .first() - .expect("canot be empty, we just tested for it") + .expect("cannot be empty, we just tested for it") .0 .rowid; let max_seen = btree_entries .changes .last() - .expect("canot be empty, we just tested for it") + .expect("cannot be empty, we just tested for it") .0 .rowid; From 5dcffadad6af83337ef1c33b9a147501788c9559 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 5 Sep 2025 16:03:25 +0300 Subject: [PATCH 3/4] core/vdbe: Remove empty loop Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- core/vdbe/execute.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 36d0f88f4..1b3c63fff 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -5487,7 +5487,6 @@ pub fn op_insert( (key, new_values) }; - for v in dependent_views.iter() {} if let Some((key, values)) = state.op_insert_state.old_record.take() { for view_name in dependent_views.iter() { From b2664e12c29b1eb77e87c08829b33806d97e8b2d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Fri, 5 Sep 2025 16:12:12 +0300 Subject: [PATCH 4/4] cargo fmt --- core/vdbe/execute.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1b3c63fff..4b0c40b58 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -5487,7 +5487,6 @@ pub fn op_insert( (key, new_values) }; - if let Some((key, values)) = state.op_insert_state.old_record.take() { for view_name in dependent_views.iter() { let tx_state = program