diff --git a/core/incremental/compiler.rs b/core/incremental/compiler.rs index 9cd2e3702..6b9996a85 100644 --- a/core/incremental/compiler.rs +++ b/core/incremental/compiler.rs @@ -5,16 +5,221 @@ //! //! Based on the DBSP paper: "DBSP: Automatic Incremental View Maintenance for Rich Query Languages" +use crate::incremental::dbsp::Delta; use crate::incremental::expr_compiler::CompiledExpression; +use crate::incremental::hashable_row::HashableRow; use crate::incremental::operator::{ - Delta, FilterOperator, FilterPredicate, IncrementalOperator, ProjectOperator, + EvalState, FilterOperator, FilterPredicate, IncrementalOperator, InputOperator, ProjectOperator, }; +use crate::storage::btree::{BTreeCursor, BTreeKey}; // Note: logical module must be made pub(crate) in translate/mod.rs -use crate::translate::logical::{BinaryOperator, LogicalExpr, LogicalPlan, SchemaRef}; -use crate::types::Value; -use crate::{LimboError, Result}; +use crate::translate::logical::{ + BinaryOperator, LogicalExpr, LogicalPlan, LogicalSchema, SchemaRef, +}; +use crate::types::{IOResult, SeekKey, SeekOp, SeekResult, Value}; +use crate::Pager; +use crate::{return_and_restore_if_io, return_if_io, LimboError, Result}; use std::collections::HashMap; use std::fmt::{self, Display, Formatter}; +use std::rc::Rc; +use std::sync::Arc; + +// The state table is always a key-value store with 3 columns: key, state, and weight. +const OPERATOR_COLUMNS: usize = 3; + +/// State machine for writing a row to the materialized view +#[derive(Debug)] +pub enum WriteViewRow { + /// Initial empty state + Empty, + + /// Reading existing record to get current weight + GetRecord, + + /// Deleting the row (when final weight <= 0) + Delete, + + /// Inserting/updating the row with new weight + Insert { + /// The final weight to write + final_weight: isize, + }, + + /// Completed processing this row + Done, +} + +impl WriteViewRow { + fn new() -> Self { + Self::Empty + } + fn write_row( + &mut self, + cursor: &mut BTreeCursor, + row: HashableRow, + weight: isize, + ) -> Result> { + loop { + match self { + WriteViewRow::Empty => { + let key = SeekKey::TableRowId(row.rowid); + let res = return_if_io!(cursor.seek(key, SeekOp::GE { eq_only: true })); + match res { + SeekResult::Found => *self = WriteViewRow::GetRecord, + _ => { + *self = WriteViewRow::Insert { + final_weight: weight, + } + } + } + } + WriteViewRow::GetRecord => { + let existing_record = return_if_io!(cursor.record()); + let r = existing_record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found rowid {} in storage but could not read record", + row.rowid + )) + })?; + let values = r.get_values(); + + // last value should contain the weight + let existing_weight = match values.last() { + Some(ref_val) => match ref_val.to_owned() { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid weight value in storage for rowid {}", + row.rowid + ))) + } + }, + None => { + return Err(crate::LimboError::InternalError(format!( + "No weight value found in storage for rowid {}", + row.rowid + ))) + } + }; + let final_weight = existing_weight + weight; + if final_weight <= 0 { + *self = WriteViewRow::Delete + } else { + *self = WriteViewRow::Insert { final_weight } + } + } + WriteViewRow::Delete => { + // Delete the row. Important: when delete returns I/O, the btree operation + // has already completed in memory, so mark as Done to avoid retry + *self = WriteViewRow::Done; + return_if_io!(cursor.delete()); + } + WriteViewRow::Insert { final_weight } => { + let key = SeekKey::TableRowId(row.rowid); + return_if_io!(cursor.seek(key, SeekOp::GE { eq_only: true })); + + // Create the record values: row values + weight + let mut values = row.values.clone(); + values.push(Value::Integer(*final_weight as i64)); + + // Create an ImmutableRecord from the values + let immutable_record = + crate::types::ImmutableRecord::from_values(&values, values.len()); + let btree_key = BTreeKey::new_table_rowid(row.rowid, Some(&immutable_record)); + // Insert the row. Important: when insert returns I/O, the btree operation + // has already completed in memory, so mark as Done to avoid retry + *self = WriteViewRow::Done; + return_if_io!(cursor.insert(&btree_key)); + } + WriteViewRow::Done => { + break; + } + } + } + Ok(IOResult::Done(())) + } +} + +/// State machine for commit operations +pub enum CommitState { + /// Initial state - ready to start commit + Init, + + /// Running circuit with commit_operators flag set to true + CommitOperators { + /// Execute state for running the circuit + execute_state: Box, + /// Persistent cursor for operator state btree (internal_state_root) + state_cursor: Box, + }, + + /// Updating the materialized view with the delta + UpdateView { + /// Delta to write to the view + delta: Delta, + /// Current index in delta.changes being processed + current_index: usize, + /// State for writing individual rows + write_row_state: WriteViewRow, + /// Cursor for view data btree - created fresh for each row + view_cursor: Box, + }, +} + +impl std::fmt::Debug for CommitState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Init => write!(f, "Init"), + Self::CommitOperators { execute_state, .. } => f + .debug_struct("CommitOperators") + .field("execute_state", execute_state) + .field("has_state_cursor", &true) + .finish(), + Self::UpdateView { + delta, + current_index, + write_row_state, + .. + } => f + .debug_struct("UpdateView") + .field("delta", delta) + .field("current_index", current_index) + .field("write_row_state", write_row_state) + .field("has_view_cursor", &true) + .finish(), + } + } +} + +/// State machine for circuit execution across I/O operations +/// Similar to EvalState but for tracking execution state through the circuit +#[derive(Debug)] +pub enum ExecuteState { + /// Empty state so we can allocate the space without executing + Uninitialized, + + /// Initial state - starting circuit execution + Init { + /// Input deltas to process + input_data: DeltaSet, + }, + + /// Processing multiple inputs (for recursive node processing) + ProcessingInputs { + /// Collection of (node_id, state) pairs to process + input_states: Vec<(usize, ExecuteState)>, + /// Current index being processed + current_index: usize, + /// Collected deltas from processed inputs + input_deltas: Vec, + }, + + /// Processing a specific node in the circuit + ProcessingNode { + /// Node's evaluation state (includes the delta in its Init state) + eval_state: Box, + }, +} /// A set of deltas for multiple tables/operators /// This provides a cleaner API for passing deltas through circuit execution @@ -39,6 +244,11 @@ impl DeltaSet { } } + /// Create a DeltaSet from a HashMap + pub fn from_map(deltas: HashMap) -> Self { + Self { deltas } + } + /// Add a delta for a table pub fn insert(&mut self, table_name: String, delta: Delta) { self.deltas.insert(table_name, delta); @@ -96,8 +306,8 @@ pub struct DbspNode { pub operator: DbspOperator, /// Input nodes (edges in the DAG) pub inputs: Vec, - /// The actual executable operator (if applicable) - pub executable: Option>, + /// The actual executable operator + pub executable: Box, } impl std::fmt::Debug for DbspNode { @@ -106,11 +316,51 @@ impl std::fmt::Debug for DbspNode { .field("id", &self.id) .field("operator", &self.operator) .field("inputs", &self.inputs) - .field("has_executable", &self.executable.is_some()) + .field("has_executable", &true) .finish() } } +impl DbspNode { + fn process_node( + &mut self, + pager: Rc, + eval_state: &mut EvalState, + root_page: usize, + commit_operators: bool, + state_cursor: Option<&mut Box>, + ) -> Result> { + // Process delta using the executable operator + let op = &mut self.executable; + + // Use provided cursor or create a local one + let mut local_cursor; + let cursor = if let Some(cursor) = state_cursor { + cursor.as_mut() + } else { + // Create a local cursor if none was provided + local_cursor = BTreeCursor::new_table(None, pager.clone(), root_page, OPERATOR_COLUMNS); + &mut local_cursor + }; + + let state = if commit_operators { + // Clone the delta from eval_state - don't extract it + // in case we need to re-execute due to I/O + let delta = match eval_state { + EvalState::Init { delta } => delta.clone(), + _ => panic!("commit can only be called when eval_state is in Init state"), + }; + let result = return_if_io!(op.commit(delta, cursor)); + // After successful commit, move state to Done + *eval_state = EvalState::Done; + result + } else { + return_if_io!(op.eval(eval_state, cursor)) + }; + Ok(IOResult::Done(state)) + } +} + /// Represents a complete DBSP circuit (DAG of operators) #[derive(Debug)] pub struct DbspCircuit { @@ -120,24 +370,48 @@ pub struct DbspCircuit { next_id: usize, /// Root node ID (the final output) pub(super) root: Option, + /// Output schema of the circuit (schema of the root node) + pub(super) output_schema: SchemaRef, + + /// State machine for commit operation + commit_state: CommitState, + + /// Root page for the main materialized view data + pub(super) main_data_root: usize, + /// Root page for internal DBSP state + pub(super) internal_state_root: usize, } impl DbspCircuit { - /// Create a new empty circuit - pub fn new() -> Self { + /// Create a new empty circuit with initial empty schema + /// The actual output schema will be set when the root node is established + pub fn new(main_data_root: usize, internal_state_root: usize) -> Self { + // Start with an empty schema - will be updated when root is set + let empty_schema = Arc::new(LogicalSchema::new(vec![])); Self { nodes: HashMap::new(), next_id: 0, root: None, + output_schema: empty_schema, + commit_state: CommitState::Init, + main_data_root, + internal_state_root, } } + /// Set the root node and update the output schema + fn set_root(&mut self, root_id: usize, schema: SchemaRef) { + self.root = Some(root_id); + self.output_schema = schema; + } + + /// Get the current materialized state by reading from btree /// Add a node to the circuit fn add_node( &mut self, operator: DbspOperator, inputs: Vec, - executable: Option>, + executable: Box, ) -> usize { let id = self.next_id; self.next_id += 1; @@ -153,11 +427,21 @@ impl DbspCircuit { id } - /// Initialize the circuit with base data. Should be called once before processing deltas. - /// If the database is restarting with materialized views, this can be skipped. - pub fn initialize(&mut self, input_data: HashMap) -> Result { + pub fn run_circuit( + &mut self, + pager: Rc, + execute_state: &mut ExecuteState, + commit_operators: bool, + state_cursor: &mut Box, + ) -> Result> { if let Some(root_id) = self.root { - self.initialize_node(root_id, &input_data) + self.execute_node( + root_id, + pager, + execute_state, + commit_operators, + Some(state_cursor), + ) } else { Err(LimboError::ParseError( "Circuit has no root node".to_string(), @@ -165,80 +449,19 @@ impl DbspCircuit { } } - /// Initialize a specific node and its dependencies - fn initialize_node( - &mut self, - node_id: usize, - input_data: &HashMap, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); - - // Initialize inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.initialize_node(input_id, input_data)?; - input_deltas.push(delta); - } - - // Get mutable reference to node - let node = self - .nodes - .get_mut(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - - // Initialize based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // Get data from input map - input_data.get(name).cloned().unwrap_or_else(Delta::new) - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Initialize the executable operator - if let Some(ref mut op) = node.executable { - if !input_deltas.is_empty() { - let input_delta = input_deltas[0].clone(); - op.initialize(input_delta); - op.get_current_state() - } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() - } - } - } - }; - - Ok(result) - } - /// Execute the circuit with incremental input data (deltas). - /// Call initialize() first for initial data, then use execute() for updates. /// /// # Arguments - /// * `input_data` - The committed deltas to process - /// * `uncommitted_data` - Uncommitted transaction deltas that should be visible - /// during this execution but not stored in operators. - /// Use DeltaSet::empty() for no uncommitted changes. + /// * `pager` - Pager for btree access + /// * `context` - Execution context for tracking operator states + /// * `execute_state` - State machine containing input deltas and tracking execution progress pub fn execute( - &self, - input_data: HashMap, - uncommitted_data: DeltaSet, - ) -> Result { + &mut self, + pager: Rc, + execute_state: &mut ExecuteState, + ) -> Result> { if let Some(root_id) = self.root { - self.execute_node(root_id, &input_data, &uncommitted_data) + self.execute_node(root_id, pager, execute_state, false, None) } else { Err(LimboError::ParseError( "Circuit has no root node".to_string(), @@ -246,146 +469,243 @@ impl DbspCircuit { } } - /// Commit deltas to the circuit, updating internal operator state. + /// Commit deltas to the circuit, updating internal operator state and persisting to btree. /// This should be called after execute() when you want to make changes permanent. /// /// # Arguments /// * `input_data` - The deltas to commit (same as what was passed to execute) - pub fn commit(&mut self, input_data: HashMap) -> Result<()> { - if let Some(root_id) = self.root { - self.commit_node(root_id, &input_data)?; - } - Ok(()) - } - - /// Commit a specific node in the circuit - fn commit_node( + /// * `pager` - Pager for creating cursors to the btrees + pub fn commit( &mut self, - node_id: usize, - input_data: &HashMap, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); - - // Process inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.commit_node(input_id, input_data)?; - input_deltas.push(delta); + input_data: HashMap, + pager: Rc, + ) -> Result> { + // No root means nothing to commit + if self.root.is_none() { + return Ok(IOResult::Done(Delta::new())); } - // Get mutable reference to node - let node = self - .nodes - .get_mut(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; + // Get btree root pages + let main_data_root = self.main_data_root; - // Commit based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // For input nodes, just return the committed delta - input_data.get(name).cloned().unwrap_or_else(Delta::new) - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Commit the delta to the executable operator - if let Some(ref mut op) = node.executable { - if !input_deltas.is_empty() { - let input_delta = input_deltas[0].clone(); - // Commit updates state and returns the output delta - op.commit(input_delta) + // Add 1 for the weight column that we store in the btree + let num_columns = self.output_schema.columns.len() + 1; + + // Convert input_data to DeltaSet once, outside the loop + let input_delta_set = DeltaSet::from_map(input_data); + + loop { + // Take ownership of the state for processing, to avoid borrow checker issues (we have + // to call run_circuit, which takes &mut self. Because of that, cannot use + // return_if_io. We have to use the version that restores the state before returning. + let mut state = std::mem::replace(&mut self.commit_state, CommitState::Init); + match &mut state { + CommitState::Init => { + // Create state cursor when entering CommitOperators state + let state_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + self.internal_state_root, + OPERATOR_COLUMNS, + )); + + self.commit_state = CommitState::CommitOperators { + execute_state: Box::new(ExecuteState::Init { + input_data: input_delta_set.clone(), + }), + state_cursor, + }; + } + CommitState::CommitOperators { + ref mut execute_state, + ref mut state_cursor, + } => { + let delta = return_and_restore_if_io!( + &mut self.commit_state, + state, + self.run_circuit(pager.clone(), execute_state, true, state_cursor) + ); + + // Create view cursor when entering UpdateView state + let view_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )); + + self.commit_state = CommitState::UpdateView { + delta, + current_index: 0, + write_row_state: WriteViewRow::new(), + view_cursor, + }; + } + CommitState::UpdateView { + delta, + current_index, + write_row_state, + view_cursor, + } => { + if *current_index >= delta.changes.len() { + self.commit_state = CommitState::Init; + let delta = std::mem::take(delta); + return Ok(IOResult::Done(delta)); } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() + let (row, weight) = delta.changes[*current_index].clone(); + + // If we're starting a new row (Empty state), we need a fresh cursor + // due to btree cursor state machine limitations + if matches!(write_row_state, WriteViewRow::Empty) { + *view_cursor = Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )); + } + + return_and_restore_if_io!( + &mut self.commit_state, + state, + write_row_state.write_row(view_cursor, row, weight) + ); + + // Move to next row + let delta = std::mem::take(delta); + // Take ownership of view_cursor - we'll create a new one for next row if needed + let view_cursor = std::mem::replace( + view_cursor, + Box::new(BTreeCursor::new_table( + None, + pager.clone(), + main_data_root, + num_columns, + )), + ); + + self.commit_state = CommitState::UpdateView { + delta, + current_index: *current_index + 1, + write_row_state: WriteViewRow::new(), + view_cursor, + }; } } } - }; - Ok(result) + } } /// Execute a specific node in the circuit fn execute_node( - &self, + &mut self, node_id: usize, - input_data: &HashMap, - uncommitted_data: &DeltaSet, - ) -> Result { - // Clone to avoid borrow checker issues - let inputs = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))? - .inputs - .clone(); + pager: Rc, + execute_state: &mut ExecuteState, + commit_operators: bool, + state_cursor: Option<&mut Box>, + ) -> Result> { + loop { + match execute_state { + ExecuteState::Uninitialized => { + panic!("Trying to execute an uninitialized ExecuteState state machine"); + } + ExecuteState::Init { input_data } => { + let node = self + .nodes + .get(&node_id) + .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - // Process inputs first - let mut input_deltas = Vec::new(); - for input_id in inputs { - let delta = self.execute_node(input_id, input_data, uncommitted_data)?; - input_deltas.push(delta); + // Check if this is an Input node + match &node.operator { + DbspOperator::Input { name, .. } => { + // Input nodes get their delta directly from input_data + let delta = input_data.get(name); + *execute_state = ExecuteState::ProcessingNode { + eval_state: Box::new(EvalState::Init { delta }), + }; + } + _ => { + // Non-input nodes need to process their inputs + let input_data = std::mem::take(input_data); + let input_node_ids = node.inputs.clone(); + + let input_states: Vec<(usize, ExecuteState)> = input_node_ids + .iter() + .map(|&input_id| { + ( + input_id, + ExecuteState::Init { + input_data: input_data.clone(), + }, + ) + }) + .collect(); + + *execute_state = ExecuteState::ProcessingInputs { + input_states, + current_index: 0, + input_deltas: Vec::new(), + }; + } + } + } + ExecuteState::ProcessingInputs { + input_states, + current_index, + input_deltas, + } => { + if *current_index >= input_states.len() { + // All inputs processed, check we have exactly one delta + // (Input nodes never reach here since they go straight to ProcessingNode) + let delta = if input_deltas.is_empty() { + return Err(LimboError::InternalError( + "execute() cannot be called without a Delta".to_string(), + )); + } else if input_deltas.len() > 1 { + return Err(LimboError::InternalError( + format!("Until joins are supported, only one delta is expected. Got {} deltas", input_deltas.len()), + )); + } else { + input_deltas[0].clone() + }; + + *execute_state = ExecuteState::ProcessingNode { + eval_state: Box::new(EvalState::Init { delta }), + }; + } else { + // Get the (node_id, state) pair for the current index + let (input_node_id, input_state) = &mut input_states[*current_index]; + + let delta = return_if_io!(self.execute_node( + *input_node_id, + pager.clone(), + input_state, + commit_operators, + None // Input nodes don't need state cursor + )); + input_deltas.push(delta); + *current_index += 1; + } + } + ExecuteState::ProcessingNode { eval_state } => { + // Get mutable reference to node for eval + let node = self + .nodes + .get_mut(&node_id) + .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; + + let output_delta = return_if_io!(node.process_node( + pager.clone(), + eval_state, + self.internal_state_root, + commit_operators, + state_cursor, + )); + return Ok(IOResult::Done(output_delta)); + } + } } - - // Get reference to node (read-only since we're using eval, not commit) - let node = self - .nodes - .get(&node_id) - .ok_or_else(|| LimboError::ParseError("Node not found".to_string()))?; - - // Execute based on operator type - let result = match &node.operator { - DbspOperator::Input { name, .. } => { - // Get committed data from input map and merge with uncommitted if present - let committed = input_data.get(name).cloned().unwrap_or_else(Delta::new); - let uncommitted = uncommitted_data.get(name); - - // If there's uncommitted data for this table, merge it with committed - if !uncommitted.is_empty() { - let mut combined = committed; - combined.merge(&uncommitted); - combined - } else { - committed - } - } - DbspOperator::Filter { .. } - | DbspOperator::Projection { .. } - | DbspOperator::Aggregate { .. } => { - // Process delta using the executable operator - if let Some(ref op) = node.executable { - if !input_deltas.is_empty() { - // Process the delta through the operator - let input_delta = input_deltas[0].clone(); - - // Use eval to compute result without modifying state - // The uncommitted data has already been merged into input_delta if needed - op.eval(input_delta, None) - } else { - Delta::new() - } - } else { - // If no executable, pass through the input - if !input_deltas.is_empty() { - input_deltas[0].clone() - } else { - Delta::new() - } - } - } - }; - Ok(result) } } @@ -440,16 +760,17 @@ pub struct DbspCompiler { impl DbspCompiler { /// Create a new DBSP compiler - pub fn new() -> Self { + pub fn new(main_data_root: usize, internal_state_root: usize) -> Self { Self { - circuit: DbspCircuit::new(), + circuit: DbspCircuit::new(main_data_root, internal_state_root), } } /// Compile a logical plan to a DBSP circuit pub fn compile(mut self, plan: &LogicalPlan) -> Result { let root_id = self.compile_plan(plan)?; - self.circuit.root = Some(root_id); + let output_schema = plan.schema().clone(); + self.circuit.set_root(root_id, output_schema); Ok(self.circuit) } @@ -486,10 +807,8 @@ impl DbspCompiler { .collect(); // Create the ProjectOperator - let executable: Option> = - ProjectOperator::from_compiled(compiled_exprs, aliases, input_column_names, output_column_names) - .ok() - .map(|op| Box::new(op) as Box); + let executable: Box = + Box::new(ProjectOperator::from_compiled(compiled_exprs, aliases, input_column_names, output_column_names)?); // Create projection node let node_id = self.circuit.add_node( @@ -526,7 +845,7 @@ impl DbspCompiler { let node_id = self.circuit.add_node( DbspOperator::Filter { predicate: dbsp_predicate }, vec![input_id], - Some(executable), + executable, ); Ok(node_id) } @@ -621,15 +940,16 @@ impl DbspCompiler { } } - // Create the AggregateOperator + // Create the AggregateOperator with a unique operator_id + // Use the next_node_id as the operator_id to ensure uniqueness + let operator_id = self.circuit.next_id; use crate::incremental::operator::AggregateOperator; - let executable: Option> = Some( - Box::new(AggregateOperator::new( - group_by_columns, - aggregate_functions.clone(), - input_column_names, - )) - ); + let executable: Box = Box::new(AggregateOperator::new( + operator_id, // Use next_node_id as operator_id + group_by_columns, + aggregate_functions.clone(), + input_column_names, + )); // Create aggregate node let node_id = self.circuit.add_node( @@ -644,14 +964,17 @@ impl DbspCompiler { Ok(node_id) } LogicalPlan::TableScan(scan) => { - // Create input node (no executable needed for input) + // Create input node with InputOperator for uniform handling + let executable: Box = + Box::new(InputOperator::new(scan.table_name.clone())); + let node_id = self.circuit.add_node( DbspOperator::Input { name: scan.table_name.clone(), schema: scan.schema.clone(), }, vec![], - None, + executable, ); Ok(node_id) } @@ -925,10 +1248,15 @@ impl DbspCompiler { #[cfg(test)] mod tests { use super::*; - use crate::incremental::operator::{Delta, FilterOperator, FilterPredicate}; + use crate::incremental::dbsp::Delta; + use crate::incremental::operator::{FilterOperator, FilterPredicate}; use crate::schema::{BTreeTable, Column as SchemaColumn, Schema, Type}; + use crate::storage::pager::CreateBTreeFlags; use crate::translate::logical::LogicalPlanBuilder; use crate::translate::logical::LogicalSchema; + use crate::util::IOExt; + use crate::{Database, MemoryIO, Pager, IO}; + use std::rc::Rc; use std::sync::Arc; use turso_parser::ast; use turso_parser::parser::Parser; @@ -984,13 +1312,71 @@ mod tests { unique_sets: None, }; schema.add_btree_table(Arc::new(users_table)); + let sales_table = BTreeTable { + name: "sales".to_string(), + root_page: 2, + primary_key_columns: vec![], + columns: vec![ + SchemaColumn { + name: Some("product_id".to_string()), + ty: Type::Integer, + ty_str: "INTEGER".to_string(), + primary_key: false, + is_rowid_alias: false, + notnull: false, + default: None, + unique: false, + collation: None, + hidden: false, + }, + SchemaColumn { + name: Some("amount".to_string()), + ty: Type::Integer, + ty_str: "INTEGER".to_string(), + primary_key: false, + is_rowid_alias: false, + notnull: false, + default: None, + unique: false, + collation: None, + hidden: false, + }, + ], + has_rowid: true, + is_strict: false, + unique_sets: None, + }; + schema.add_btree_table(Arc::new(sales_table)); + schema }}; } + fn setup_btree_for_circuit() -> (Rc, usize, usize) { + let io: Arc = Arc::new(MemoryIO::new()); + let db = Database::open_file(io.clone(), ":memory:", false, false).unwrap(); + let conn = db.connect().unwrap(); + let pager = conn.pager.borrow().clone(); + + let _ = pager.io.block(|| pager.allocate_page1()).unwrap(); + + let main_root_page = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .unwrap() as usize; + + let dbsp_state_page = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .unwrap() as usize; + + (pager, main_root_page, dbsp_state_page) + } + // Macro to compile SQL to DBSP circuit macro_rules! compile_sql { ($sql:expr) => {{ + let (pager, main_root_page, dbsp_state_page) = setup_btree_for_circuit(); let schema = test_schema!(); let mut parser = Parser::new($sql.as_bytes()); let cmd = parser @@ -1002,7 +1388,12 @@ mod tests { ast::Cmd::Stmt(stmt) => { let mut builder = LogicalPlanBuilder::new(&schema); let logical_plan = builder.build_statement(&stmt).unwrap(); - DbspCompiler::new().compile(&logical_plan).unwrap() + ( + DbspCompiler::new(main_root_page, dbsp_state_page) + .compile(&logical_plan) + .unwrap(), + pager, + ) } _ => panic!("Only SQL statements are supported"), } @@ -1108,40 +1499,72 @@ mod tests { circuit.nodes.get(¤t_id).expect("Node not found") } - // Helper to get the current accumulated state of the circuit (from the root operator) - // This returns the internal state including bookkeeping entries - fn get_current_state(circuit: &DbspCircuit) -> Result { - if let Some(root_id) = circuit.root { - let node = circuit - .nodes - .get(&root_id) - .ok_or_else(|| LimboError::ParseError("Root node not found".to_string()))?; - - if let Some(ref executable) = node.executable { - Ok(executable.get_current_state()) - } else { - // Input nodes don't have executables but also don't have state - Ok(Delta::new()) - } - } else { - Err(LimboError::ParseError( - "Circuit has no root node".to_string(), - )) + // Helper function for tests to execute circuit and extract the Delta result + #[cfg(test)] + fn test_execute( + circuit: &mut DbspCircuit, + inputs: HashMap, + pager: Rc, + ) -> Result { + let mut execute_state = ExecuteState::Init { + input_data: DeltaSet::from_map(inputs), + }; + match circuit.execute(pager, &mut execute_state)? { + IOResult::Done(delta) => Ok(delta), + IOResult::IO(_) => panic!("Unexpected I/O in test"), } } - // Helper to create a DeltaSet from a HashMap (for tests) - fn delta_set_from_map(map: HashMap) -> DeltaSet { - let mut delta_set = DeltaSet::new(); - for (key, value) in map { - delta_set.insert(key, value); + // Helper to get the committed BTree state from main_data_root + // This reads the actual persisted data from the BTree + #[cfg(test)] + fn get_current_state(pager: Rc, circuit: &DbspCircuit) -> Result { + let mut delta = Delta::new(); + + let main_data_root = circuit.main_data_root; + let num_columns = circuit.output_schema.columns.len() + 1; + + // Create a cursor to read the btree + let mut btree_cursor = + BTreeCursor::new_table(None, pager.clone(), main_data_root, num_columns); + + // Rewind to the beginning + pager.io.block(|| btree_cursor.rewind())?; + + // Read all rows from the BTree + loop { + // Check if cursor is empty (no more rows) + if btree_cursor.is_empty() { + break; + } + + // Get the rowid + let rowid = pager.io.block(|| btree_cursor.rowid()).unwrap().unwrap(); + + // Get the record at this position + let record = pager + .io + .block(|| btree_cursor.record()) + .unwrap() + .unwrap() + .to_owned(); + + let values_ref = record.get_values(); + let num_data_columns = values_ref.len() - 1; // Get length before consuming + let values: Vec = values_ref + .into_iter() + .take(num_data_columns) // Skip the weight column + .map(|x| x.to_owned()) + .collect(); + delta.insert(rowid, values); + pager.io.block(|| btree_cursor.next()).unwrap(); } - delta_set + Ok(delta) } #[test] fn test_simple_projection() { - let circuit = compile_sql!("SELECT name FROM users"); + let (circuit, _) = compile_sql!("SELECT name FROM users"); // Circuit has 2 nodes with Projection at root assert_circuit!(circuit, depth: 2, root: Projection); @@ -1153,7 +1576,7 @@ mod tests { #[test] fn test_filter_with_projection() { - let circuit = compile_sql!("SELECT name FROM users WHERE age > 18"); + let (circuit, _) = compile_sql!("SELECT name FROM users WHERE age > 18"); // Circuit has 3 nodes with Projection at root assert_circuit!(circuit, depth: 3, root: Projection); @@ -1167,7 +1590,7 @@ mod tests { #[test] fn test_select_star() { - let mut circuit = compile_sql!("SELECT * FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1192,8 +1615,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have all rows with all columns assert_eq!(result.changes.len(), 2); @@ -1207,7 +1633,7 @@ mod tests { #[test] fn test_execute_filter() { - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Create test data let mut input_delta = Delta::new(); @@ -1240,8 +1666,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should only have Alice and Charlie (age > 18) assert_eq!( @@ -1284,7 +1713,7 @@ mod tests { #[test] fn test_simple_column_projection() { - let mut circuit = compile_sql!("SELECT name, age FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT name, age FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1309,8 +1738,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have all rows but only 2 columns (name, age) assert_eq!(result.changes.len(), 2); @@ -1327,7 +1759,7 @@ mod tests { #[test] fn test_simple_aggregation() { // Test COUNT(*) with GROUP BY - let mut circuit = compile_sql!("SELECT age, COUNT(*) FROM users GROUP BY age"); + let (mut circuit, pager) = compile_sql!("SELECT age, COUNT(*) FROM users GROUP BY age"); // Create test data let mut input_delta = Delta::new(); @@ -1360,8 +1792,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have 2 groups: age 25 with count 2, age 30 with count 1 assert_eq!(result.changes.len(), 2); @@ -1392,7 +1827,7 @@ mod tests { #[test] fn test_sum_aggregation() { // Test SUM with GROUP BY - let mut circuit = compile_sql!("SELECT name, SUM(age) FROM users GROUP BY name"); + let (mut circuit, pager) = compile_sql!("SELECT name, SUM(age) FROM users GROUP BY name"); // Create test data - some names appear multiple times let mut input_delta = Delta::new(); @@ -1425,8 +1860,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have 2 groups: Alice with sum 55, Bob with sum 20 assert_eq!(result.changes.len(), 2); @@ -1448,7 +1886,7 @@ mod tests { #[test] fn test_aggregation_without_group_by() { // Test aggregation without GROUP BY - should produce a single row - let mut circuit = compile_sql!("SELECT COUNT(*), SUM(age), AVG(age) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT COUNT(*), SUM(age), AVG(age) FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1481,8 +1919,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); // Should have exactly 1 row with all aggregates assert_eq!( @@ -1521,7 +1962,7 @@ mod tests { #[test] fn test_expression_projection_execution() { // Test that complex expressions work through VDBE compilation - let mut circuit = compile_sql!("SELECT hex(id) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT hex(id) FROM users"); // Create test data let mut input_delta = Delta::new(); @@ -1546,8 +1987,11 @@ mod tests { let mut inputs = HashMap::new(); inputs.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(inputs).unwrap(); + let result = test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); assert_eq!(result.changes.len(), 2); @@ -1586,7 +2030,7 @@ mod tests { fn test_projection_aggregation_projection_pattern() { // Test pattern: projection -> aggregation -> projection // Query: SELECT HEX(SUM(age + 2)) FROM users - let mut circuit = compile_sql!("SELECT HEX(SUM(age + 2)) FROM users"); + let (mut circuit, pager) = compile_sql!("SELECT HEX(SUM(age + 2)) FROM users"); // Initial input data let mut input_delta = Delta::new(); @@ -1618,8 +2062,11 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - // Initialize the circuit with the initial data - let result = circuit.initialize(input_data).unwrap(); + let result = test_execute(&mut circuit, input_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(input_data.clone(), pager.clone())) + .unwrap(); // Expected: SUM(age + 2) = (25+2) + (30+2) + (35+2) = 27 + 32 + 37 = 96 // HEX(96) should be the hex representation of the string "96" = "3936" @@ -1649,7 +2096,7 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - let result = circuit.execute(input_data, DeltaSet::empty()).unwrap(); + let result = test_execute(&mut circuit, input_data, pager.clone()).unwrap(); // Expected: new SUM(age + 2) = 96 + (40+2) = 138 // HEX(138) = hex of "138" = "313338" @@ -1674,7 +2121,8 @@ mod tests { fn test_nested_projection_with_groupby() { // Test pattern: projection -> aggregation with GROUP BY -> projection // Query: SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name - let mut circuit = compile_sql!("SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name"); + let (mut circuit, pager) = + compile_sql!("SELECT name, HEX(SUM(age * 2)) FROM users GROUP BY name"); // Initial input data let mut input_delta = Delta::new(); @@ -1706,8 +2154,11 @@ mod tests { let mut input_data = HashMap::new(); input_data.insert("users".to_string(), input_delta); - // Initialize circuit with initial data - let result = circuit.initialize(input_data).unwrap(); + let result = test_execute(&mut circuit, input_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(input_data.clone(), pager.clone())) + .unwrap(); // Expected results: // Alice: SUM(25*2 + 35*2) = 50 + 70 = 120, HEX("120") = "313230" @@ -1746,7 +2197,7 @@ mod tests { fn test_transaction_context() { // Test that uncommitted changes are visible within a transaction // but don't affect the operator's internal state - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1769,10 +2220,13 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + let state = pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); - // Verify initial state: only Alice (age > 18) - let state = get_current_state(&circuit).unwrap(); + // Verify initial delta : only Alice (age > 18) assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.values[1], Value::Text("Alice".into())); @@ -1801,9 +2255,7 @@ mod tests { // Execute with uncommitted data - this simulates processing the uncommitted changes // through the circuit to see what would be visible - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // The result should show Charlie being added (passes filter, age > 18) // David is filtered out (age 15 < 18) @@ -1826,9 +2278,7 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // The commit result should show Charlie being added assert_eq!(commit_result.changes.len(), 1, "Should see Charlie added"); @@ -1838,17 +2288,20 @@ mod tests { ); // Commit the change to make it permanent - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // Now if we execute again with no changes, we should see no delta - let empty_result = circuit.execute(HashMap::new(), DeltaSet::empty()).unwrap(); + let empty_result = test_execute(&mut circuit, HashMap::new(), pager.clone()).unwrap(); assert_eq!(empty_result.changes.len(), 0, "No changes when no new data"); } #[test] fn test_uncommitted_delete() { // Test that uncommitted deletes are handled correctly without affecting operator state - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1879,10 +2332,13 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + let state = pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); - // Verify initial state: Alice, Bob, Charlie (all age > 18) - let state = get_current_state(&circuit).unwrap(); + // Verify initial delta: Alice, Bob, Charlie (all age > 18) assert_eq!(state.changes.len(), 3); // Create uncommitted delete for Bob @@ -1899,9 +2355,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted delete - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show the deleted row that passed the filter assert_eq!( @@ -1911,7 +2365,7 @@ mod tests { ); // Verify operator's internal state is unchanged (still has all 3 users) - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after.changes.len(), 3, @@ -1931,12 +2385,13 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Actually commit the delete to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // The commit result should show Bob being deleted assert_eq!(commit_result.changes.len(), 1, "Should see Bob deleted"); @@ -1950,7 +2405,7 @@ mod tests { ); // After commit, internal state should have only Alice and Charlie - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 2, @@ -1976,7 +2431,7 @@ mod tests { #[test] fn test_uncommitted_update() { // Test that uncommitted updates (delete + insert) are handled correctly - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -1999,7 +2454,11 @@ mod tests { ); // Bob is 17, filtered out init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Create uncommitted update: Bob turns 19 (update from 17 to 19) // This is modeled as delete + insert @@ -2024,9 +2483,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted update - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Bob should now appear in the result (age 19 > 18) // Consolidate to see the final state @@ -2062,10 +2519,13 @@ mod tests { commit_data.insert("users".to_string(), commit_delta); // Commit the update - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After committing, Bob should be in the view's state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); let mut consolidated_state = state; consolidated_state.consolidate(); @@ -2094,7 +2554,7 @@ mod tests { #[test] fn test_uncommitted_filtered_delete() { // Test deleting a row that doesn't pass the filter - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with mixed data let mut init_data = HashMap::new(); @@ -2117,7 +2577,11 @@ mod tests { ); // Bob doesn't pass filter init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Create uncommitted delete for Bob (who isn't in the view because age=15) let mut uncommitted = HashMap::new(); @@ -2133,9 +2597,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted delete - should produce no output changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted)) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted, pager.clone()).unwrap(); // Bob wasn't in the view, so deleting him produces no output assert_eq!( @@ -2145,7 +2607,7 @@ mod tests { ); // The view state should still only have Alice - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 1, "View still has only Alice"); assert_eq!(state.changes[0].0.values[1], Value::Text("Alice".into())); } @@ -2153,7 +2615,7 @@ mod tests { #[test] fn test_uncommitted_mixed_operations() { // Test multiple uncommitted operations together - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data let mut init_data = HashMap::new(); @@ -2176,10 +2638,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2); // Create uncommitted changes: @@ -2236,9 +2702,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show all changes: delete Alice, update Bob, insert Charlie and David assert_eq!( @@ -2248,7 +2712,7 @@ mod tests { ); // Verify operator's internal state is unchanged - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state_after.changes.len(), 2, "Still has Alice and Bob"); // Commit all changes @@ -2296,19 +2760,20 @@ mod tests { ); commit_data.insert("users".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Should see: Alice deleted, Bob deleted, Bob inserted, Charlie inserted // (David filtered out) assert_eq!(commit_result.changes.len(), 4, "Should see 4 changes"); // Actually commit the changes to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After all commits, execute with no changes should return empty delta - let empty_result = circuit.execute(HashMap::new(), DeltaSet::empty()).unwrap(); + let empty_result = test_execute(&mut circuit, HashMap::new(), pager.clone()).unwrap(); assert_eq!(empty_result.changes.len(), 0, "No changes when no new data"); } @@ -2319,56 +2784,9 @@ mod tests { // and we need to see correct aggregation results within the transaction // Create a sales table schema for testing - let mut schema = Schema::new(false); - let sales_table = BTreeTable { - name: "sales".to_string(), - root_page: 2, - primary_key_columns: vec![], - columns: vec![ - SchemaColumn { - name: Some("product_id".to_string()), - ty: Type::Integer, - ty_str: "INTEGER".to_string(), - primary_key: false, - is_rowid_alias: false, - notnull: false, - default: None, - unique: false, - collation: None, - hidden: false, - }, - SchemaColumn { - name: Some("amount".to_string()), - ty: Type::Integer, - ty_str: "INTEGER".to_string(), - primary_key: false, - is_rowid_alias: false, - notnull: false, - default: None, - unique: false, - collation: None, - hidden: false, - }, - ], - has_rowid: true, - is_strict: false, - unique_sets: None, - }; - schema.add_btree_table(Arc::new(sales_table)); + let _ = test_schema!(); - // Parse and compile the aggregation query - let sql = "SELECT product_id, SUM(amount) as total, COUNT(*) as cnt FROM sales GROUP BY product_id"; - let mut parser = Parser::new(sql.as_bytes()); - let cmd = parser.next().unwrap().unwrap(); - - let mut circuit = match cmd { - ast::Cmd::Stmt(stmt) => { - let mut builder = LogicalPlanBuilder::new(&schema); - let logical_plan = builder.build_statement(&stmt).unwrap(); - DbspCompiler::new().compile(&logical_plan).unwrap() - } - _ => panic!("Expected SQL statement"), - }; + let (mut circuit, pager) = compile_sql!("SELECT product_id, SUM(amount) as total, COUNT(*) as cnt FROM sales GROUP BY product_id"); // Initialize with base data: (1, 100), (1, 200), (2, 150), (2, 250) let mut init_data = HashMap::new(); @@ -2379,10 +2797,14 @@ mod tests { delta.insert(4, vec![Value::Integer(2), Value::Integer(250)]); init_data.insert("sales".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state: product 1 total=300, product 2 total=400 - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2, "Should have 2 product groups"); // Build a map of product_id -> (total, count) @@ -2430,9 +2852,7 @@ mod tests { uncommitted.insert("sales".to_string(), uncommitted_delta); // Execute with uncommitted data - simulating a read within transaction - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Result should show the aggregate changes from uncommitted data // Product 1: retraction of (300, 2) and insertion of (350, 3) @@ -2444,7 +2864,7 @@ mod tests { ); // IMPORTANT: Verify operator's internal state is unchanged - let state_after = get_current_state(&circuit).unwrap(); + let state_after = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after.changes.len(), 2, @@ -2498,9 +2918,7 @@ mod tests { commit_delta.insert(6, vec![Value::Integer(3), Value::Integer(300)]); commit_data.insert("sales".to_string(), commit_delta); - let commit_result = circuit - .execute(commit_data.clone(), DeltaSet::empty()) - .unwrap(); + let commit_result = test_execute(&mut circuit, commit_data.clone(), pager.clone()).unwrap(); // Should see changes for product 1 (updated) and product 3 (new) assert_eq!( @@ -2510,10 +2928,13 @@ mod tests { ); // Actually commit the changes to update operator state - circuit.commit(commit_data).unwrap(); + pager + .io + .block(|| circuit.commit(commit_data.clone(), pager.clone())) + .unwrap(); // After commit, verify final state - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 3, @@ -2566,7 +2987,7 @@ mod tests { // Test that uncommitted INSERTs are visible within the same transaction // This simulates: BEGIN; INSERT ...; SELECT * FROM view; COMMIT; - let mut circuit = compile_sql!("SELECT * FROM users WHERE age > 18"); + let (mut circuit, pager) = compile_sql!("SELECT * FROM users WHERE age > 18"); // Initialize with some data - need to match the schema (id, name, age) let mut init_data = HashMap::new(); @@ -2589,10 +3010,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data.clone()).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state.len(), 2, @@ -2622,9 +3047,7 @@ mod tests { // Execute with uncommitted data - this should return the uncommitted changes // that passed through the filter (age > 18) - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // IMPORTANT: tx_result should contain the filtered uncommitted changes! // Both Charlie (35) and David (20) should pass the age > 18 filter @@ -2648,7 +3071,7 @@ mod tests { ); // CRITICAL: Verify the operator state wasn't modified by uncommitted execution - let state_after_uncommitted = get_current_state(&circuit).unwrap(); + let state_after_uncommitted = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after_uncommitted.len(), 2, @@ -2680,7 +3103,8 @@ mod tests { // Similar to test_uncommitted_aggregation but explicitly tests rollback semantics // Create a simple aggregation circuit - let mut circuit = compile_sql!("SELECT age, COUNT(*) as cnt FROM users GROUP BY age"); + let (mut circuit, pager) = + compile_sql!("SELECT age, COUNT(*) as cnt FROM users GROUP BY age"); // Initialize with some data let mut init_data = HashMap::new(); @@ -2719,10 +3143,14 @@ mod tests { ); init_data.insert("users".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state: age 25 count=2, age 30 count=2 - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 2); let initial_counts: HashMap = state @@ -2783,9 +3211,7 @@ mod tests { uncommitted.insert("users".to_string(), uncommitted_delta); // Execute with uncommitted changes - let tx_result = circuit - .execute(HashMap::new(), delta_set_from_map(uncommitted.clone())) - .unwrap(); + let tx_result = test_execute(&mut circuit, uncommitted.clone(), pager.clone()).unwrap(); // Should see the aggregate changes from uncommitted data // Age 25: retraction of count 1 and insertion of count 2 @@ -2796,7 +3222,7 @@ mod tests { ); // Verify internal state is unchanged (simulating rollback by not committing) - let state_after_rollback = get_current_state(&circuit).unwrap(); + let state_after_rollback = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( state_after_rollback.changes.len(), 2, @@ -2836,8 +3262,10 @@ mod tests { #[test] fn test_circuit_rowid_update_consolidation() { + let (pager, p1, p2) = setup_btree_for_circuit(); + // Test that circuit properly consolidates state when rowid changes - let mut circuit = DbspCircuit::new(); + let mut circuit = DbspCircuit::new(p1, p2); // Create a simple filter node let schema = Arc::new(LogicalSchema::new(vec![ @@ -2845,14 +3273,14 @@ mod tests { ("value".to_string(), Type::Integer), ])); - // First create an input node + // First create an input node with InputOperator let input_id = circuit.add_node( DbspOperator::Input { name: "test".to_string(), schema: schema.clone(), }, vec![], - None, // Input nodes don't have executables + Box::new(InputOperator::new("test".to_string())), ); let filter_op = FilterOperator::new( @@ -2873,10 +3301,10 @@ mod tests { let filter_id = circuit.add_node( DbspOperator::Filter { predicate }, vec![input_id], // Filter takes input from the input node - Some(Box::new(filter_op)), + Box::new(filter_op), ); - circuit.root = Some(filter_id); + circuit.set_root(filter_id, schema.clone()); // Initialize with a row let mut init_data = HashMap::new(); @@ -2884,10 +3312,14 @@ mod tests { delta.insert(5, vec![Value::Integer(5), Value::Integer(20)]); init_data.insert("test".to_string(), delta); - circuit.initialize(init_data).unwrap(); + let _ = test_execute(&mut circuit, init_data.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(init_data.clone(), pager.clone())) + .unwrap(); // Verify initial state - let state = get_current_state(&circuit).unwrap(); + let state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 5); @@ -2898,15 +3330,16 @@ mod tests { update_delta.insert(3, vec![Value::Integer(3), Value::Integer(20)]); update_data.insert("test".to_string(), update_delta); - circuit - .execute(update_data.clone(), DeltaSet::empty()) - .unwrap(); + test_execute(&mut circuit, update_data.clone(), pager.clone()).unwrap(); // Commit the changes to update operator state - circuit.commit(update_data).unwrap(); + pager + .io + .block(|| circuit.commit(update_data.clone(), pager.clone())) + .unwrap(); // The circuit should consolidate the state properly - let final_state = get_current_state(&circuit).unwrap(); + let final_state = get_current_state(pager.clone(), &circuit).unwrap(); assert_eq!( final_state.changes.len(), 1, @@ -2919,4 +3352,65 @@ mod tests { ); assert_eq!(final_state.changes[0].1, 1); } + + #[test] + fn test_circuit_respects_multiplicities() { + let (mut circuit, pager) = compile_sql!("SELECT * from users"); + + // Insert same row twice (multiplicity 2) + let mut delta = Delta::new(); + delta.insert( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + delta.insert( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + + let mut inputs = HashMap::new(); + inputs.insert("users".to_string(), delta); + test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); + + // Delete once (should leave multiplicity 1) + let mut delete_one = Delta::new(); + delete_one.delete( + 1, + vec![ + Value::Integer(1), + Value::Text("Alice".into()), + Value::Integer(25), + ], + ); + + let mut inputs = HashMap::new(); + inputs.insert("users".to_string(), delete_one); + test_execute(&mut circuit, inputs.clone(), pager.clone()).unwrap(); + pager + .io + .block(|| circuit.commit(inputs.clone(), pager.clone())) + .unwrap(); + + // With proper DBSP: row still exists (weight 2 - 1 = 1) + let state = get_current_state(pager.clone(), &circuit).unwrap(); + let mut consolidated = state; + consolidated.consolidate(); + assert_eq!( + consolidated.len(), + 1, + "Row should still exist with multiplicity 1" + ); + } } diff --git a/core/incremental/cursor.rs b/core/incremental/cursor.rs new file mode 100644 index 000000000..1e0e91af8 --- /dev/null +++ b/core/incremental/cursor.rs @@ -0,0 +1,1618 @@ +use crate::{ + incremental::{ + compiler::{DeltaSet, ExecuteState}, + dbsp::{Delta, RowKeyZSet}, + hashable_row::HashableRow, + view::{IncrementalView, ViewTransactionState}, + }, + return_if_io, + storage::btree::BTreeCursor, + types::{IOResult, SeekKey, SeekOp, SeekResult, Value}, + LimboError, Pager, Result, +}; +use std::rc::Rc; +use std::sync::{Arc, Mutex}; + +/// State machine for seek operations +#[derive(Debug)] +enum SeekState { + /// Initial state before seeking + Init, + + /// Actively seeking with btree and uncommitted iterators + Seek { + /// The row we are trying to find + target: i64, + }, + + /// Seek completed successfully + Done, +} + +/// Cursor for reading materialized views that combines: +/// 1. Persistent btree data (committed state) +/// 2. Transaction-specific DBSP deltas (uncommitted changes) +/// +/// Works like a regular table cursor - reads from disk on-demand +/// and overlays transaction changes as needed. +pub struct MaterializedViewCursor { + // Core components + btree_cursor: Box, + view: Arc>, + pager: Rc, + + // Current changes that are uncommitted + uncommitted: RowKeyZSet, + + // Reference to shared transaction state for this specific view - shared with Connection + tx_state: Rc, + + // The transaction state always grows. It never gets reduced. That is in the very nature of + // DBSP, because deletions are just appends with weight < 0. So we will use the length of the + // state to check if we have to recompute the transaction state + last_tx_state_len: usize, + + // Current row cache - only cache the current row we're looking at + current_row: Option<(i64, Vec)>, + + // Execution state for circuit processing + execute_state: ExecuteState, + + // State machine for seek operations + seek_state: SeekState, +} + +impl MaterializedViewCursor { + pub fn new( + btree_cursor: Box, + view: Arc>, + pager: Rc, + tx_state: Rc, + ) -> Result { + Ok(Self { + btree_cursor, + view, + pager, + uncommitted: RowKeyZSet::new(), + tx_state, + last_tx_state_len: 0, + current_row: None, + execute_state: ExecuteState::Uninitialized, + seek_state: SeekState::Init, + }) + } + + /// Compute transaction changes lazily on first access + fn ensure_tx_changes_computed(&mut self) -> Result> { + // Check if we've already processed the current state + let current_len = self.tx_state.len(); + if current_len == self.last_tx_state_len { + return Ok(IOResult::Done(())); + } + + // Get the view and the current transaction state + let mut view_guard = self.view.lock().unwrap(); + let tx_delta = self.tx_state.get_delta(); + + // Process the delta through the circuit to get materialized changes + let mut uncommitted = DeltaSet::new(); + uncommitted.insert(view_guard.base_table().name.clone(), tx_delta); + + let processed_delta = return_if_io!(view_guard.execute_with_uncommitted( + uncommitted, + self.pager.clone(), + &mut self.execute_state + )); + + self.uncommitted = RowKeyZSet::from_delta(&processed_delta); + self.last_tx_state_len = current_len; + Ok(IOResult::Done(())) + } + + // Read the current btree entry as a vector (empty if no current position) + fn read_btree_delta_entry(&mut self) -> Result>> { + let btree_rowid = return_if_io!(self.btree_cursor.rowid()); + let rowid = match btree_rowid { + None => return Ok(IOResult::Done(Vec::new())), + Some(rowid) => rowid, + }; + + let btree_record = return_if_io!(self.btree_cursor.record()); + let btree_ref_values = btree_record + .ok_or_else(|| { + crate::LimboError::InternalError( + "Invalid data in materialized view: found a rowid, but not the row!" + .to_string(), + ) + })? + .get_values(); + + // Convert RefValues to Values (copying for now - can optimize later) + let mut btree_values: Vec = + btree_ref_values.iter().map(|rv| rv.to_owned()).collect(); + + // The last column should be the weight + let weight_value = btree_values.pop().ok_or_else(|| { + crate::LimboError::InternalError( + "Invalid data in materialized view: no weight column found".to_string(), + ) + })?; + + // Convert the Value to isize weight + let weight = match weight_value { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid data in materialized view: expected integer weight, found {weight_value:?}" + ))) + } + }; + + if !(-1..=1).contains(&weight) { + return Err(crate::LimboError::InternalError(format!( + "Invalid data in materialized view: expected weight -1, 0, or 1, found {weight}" + ))); + } + + Ok(IOResult::Done(vec![( + HashableRow::new(rowid, btree_values), + weight, + )])) + } + + /// Internal seek implementation that doesn't check preconditions + fn do_seek(&mut self, target_rowid: i64, op: SeekOp) -> Result> { + loop { + // Process state machine - need to handle mutable borrow carefully + match &mut self.seek_state { + SeekState::Init => { + self.current_row = None; + self.seek_state = SeekState::Seek { + target: target_rowid, + }; + } + SeekState::Seek { target } => { + let target = *target; + let btree_result = + return_if_io!(self.btree_cursor.seek(SeekKey::TableRowId(target), op)); + + let changes = if btree_result == SeekResult::Found { + return_if_io!(self.read_btree_delta_entry()) + } else { + Vec::new() + }; + + let mut btree_entries = Delta { changes }; + let changes = self.uncommitted.seek(target, op); + + let uncommitted_entries = Delta { changes }; + btree_entries.merge(&uncommitted_entries); + + // if empty pre-zset, means nothing was found. Empty post-zset can mean that + // we just canceled weights. + if btree_entries.is_empty() { + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::NotFound)); + } + + let min_seen = btree_entries + .changes + .first() + .expect("cannot be empty, we just tested for it") + .0 + .rowid; + let max_seen = btree_entries + .changes + .last() + .expect("cannot be empty, we just tested for it") + .0 + .rowid; + + let zset = RowKeyZSet::from_delta(&btree_entries); + let ret = zset.seek(target_rowid, op); + + if !ret.is_empty() { + let (row, _) = &ret[0]; + self.current_row = Some((row.rowid, row.values.clone())); + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::Found)); + } + + let new_target = match op { + SeekOp::GT => Some(max_seen), + SeekOp::GE { eq_only: false } => Some(max_seen + 1), + SeekOp::LT => Some(min_seen), + SeekOp::LE { eq_only: false } => Some(min_seen - 1), + SeekOp::LE { eq_only: true } | SeekOp::GE { eq_only: true } => None, + }; + + if let Some(target) = new_target { + self.seek_state = SeekState::Seek { target }; + } else { + self.seek_state = SeekState::Done; + return Ok(IOResult::Done(SeekResult::NotFound)); + } + } + SeekState::Done => { + // We always return before setting the state to done. Meaning if we got here, + // this is a new seek. + self.seek_state = SeekState::Init; + } + } + } + } + + pub fn seek(&mut self, key: SeekKey, op: SeekOp) -> Result> { + // Ensure transaction changes are computed + return_if_io!(self.ensure_tx_changes_computed()); + + let target_rowid = match &key { + SeekKey::TableRowId(rowid) => *rowid, + SeekKey::IndexKey(_) => { + return Err(LimboError::ParseError( + "Cannot search a materialized view with an index key".to_string(), + )); + } + }; + + self.do_seek(target_rowid, op) + } + + pub fn next(&mut self) -> Result> { + // If cursor is not positioned (no current_row), return false + // This matches BTreeCursor behavior when valid_state == Invalid + let Some((current_rowid, _)) = &self.current_row else { + return Ok(IOResult::Done(false)); + }; + + // Use GT to find the next row after current position + let result = return_if_io!(self.do_seek(*current_rowid, SeekOp::GT)); + Ok(IOResult::Done(result == SeekResult::Found)) + } + + pub fn column(&mut self, col: usize) -> Result> { + if let Some((_, ref values)) = self.current_row { + Ok(IOResult::Done( + values.get(col).cloned().unwrap_or(Value::Null), + )) + } else { + Ok(IOResult::Done(Value::Null)) + } + } + + pub fn rowid(&self) -> Result>> { + Ok(IOResult::Done(self.current_row.as_ref().map(|(id, _)| *id))) + } + + pub fn rewind(&mut self) -> Result> { + return_if_io!(self.ensure_tx_changes_computed()); + // Seek GT from i64::MIN to find the first row using internal do_seek + let _result = return_if_io!(self.do_seek(i64::MIN, SeekOp::GT)); + Ok(IOResult::Done(())) + } + + pub fn is_valid(&self) -> Result { + Ok(self.current_row.is_some()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::IOExt; + use crate::{Connection, Database, OpenFlags}; + use std::rc::Rc; + use std::sync::Arc; + + /// Helper to create a test connection with a table and materialized view + fn create_test_connection() -> Result> { + // Create an in-memory database with experimental views enabled + let io = Arc::new(crate::io::MemoryIO::new()); + let db = Database::open_file_with_flags( + io, + ":memory:", + OpenFlags::default(), + crate::DatabaseOpts { + enable_mvcc: false, + enable_indexes: false, + enable_views: true, + enable_strict: false, + }, + )?; + let conn = db.connect()?; + + // Create a test table + conn.execute("CREATE TABLE test_table (id INTEGER PRIMARY KEY, value INTEGER)")?; + + // Create materialized view + conn.execute("CREATE MATERIALIZED VIEW test_view AS SELECT id, value FROM test_table")?; + + Ok(conn) + } + + /// Helper to create a test cursor for the materialized view + fn create_test_cursor( + conn: &Arc, + ) -> Result<(MaterializedViewCursor, Rc, Rc)> { + // Get the schema and view + let view_mutex = conn + .schema + .borrow() + .get_materialized_view("test_view") + .ok_or(crate::LimboError::InternalError( + "View not found".to_string(), + ))?; + + // Get the view's root page + let view = view_mutex.lock().unwrap(); + let root_page = view.get_root_page(); + if root_page == 0 { + return Err(crate::LimboError::InternalError( + "View not materialized".to_string(), + )); + } + let num_columns = view.columns.len(); + drop(view); + + // Create a btree cursor + let pager = conn.get_pager(); + let btree_cursor = Box::new(BTreeCursor::new( + None, // No MvCursor + pager.clone(), + root_page, + num_columns, + )); + + // Get or create transaction state for this view + let tx_state = conn.view_transaction_states.get_or_create("test_view"); + + // Create the materialized view cursor + let cursor = MaterializedViewCursor::new( + btree_cursor, + view_mutex.clone(), + pager.clone(), + tx_state.clone(), + )?; + + Ok((cursor, tx_state, pager)) + } + + /// Helper to populate test table with data through SQL + fn populate_test_table(conn: &Arc, rows: Vec<(i64, i64)>) -> Result<()> { + for (id, value) in rows { + let sql = format!("INSERT INTO test_table (id, value) VALUES ({id}, {value})"); + conn.execute(&sql)?; + } + Ok(()) + } + + /// Helper to apply changes through ViewTransactionState + fn apply_changes_to_tx_state( + tx_state: &ViewTransactionState, + changes: Vec<(i64, Vec, isize)>, + ) { + for (rowid, values, weight) in changes { + if weight > 0 { + tx_state.insert(rowid, values); + } else if weight < 0 { + tx_state.delete(rowid, values); + } + } + } + + #[test] + fn test_seek_key_exists_in_btree() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with test data: rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // No uncommitted changes - tx_state is already empty + + // Test 1: Seek exact match (row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 2: Seek GE (row 4 should find row 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Test 3: Seek GT (row 3 should find row 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Test 4: Seek LE (row 4 should find row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 5: Seek LT (row 5 should find row 3) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::LT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + Ok(()) + } + + #[test] + fn test_seek_key_exists_only_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 5, 7 + populate_test_table(&conn, vec![(1, 10), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted changes: insert rows 3 and 6 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), // Insert row 3 + (6, vec![Value::Integer(6), Value::Integer(60)], 1), // Insert row 6 + ], + ); + + // Test 1: Seek exact match for uncommitted row 3 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + // Test 2: Seek GE for row 2 should find uncommitted row 3 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Test 3: Seek GT for row 5 should find uncommitted row 6 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(60)); + + // Test 4: Seek LE for row 6 should find uncommitted row 6 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(6), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + Ok(()) + } + + #[test] + fn test_seek_key_deleted_by_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete row 3 and 5 in uncommitted changes + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), // Delete row 3 + (5, vec![Value::Integer(5), Value::Integer(50)], -1), // Delete row 5 + ], + ); + + // Test 1: Seek exact match for deleted row 3 should not find it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek GE for row 2 should skip deleted row 3 and find row 7 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Test 3: Seek GT for row 1 should skip deleted rows and find row 7 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(1), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Test 4: Seek LE for row 5 should find row 1 (skipping deleted 3 and 5) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::LE { eq_only: false }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } + + #[test] + fn test_seek_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 3 (delete old + insert new) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), // Delete old row 3 + (3, vec![Value::Integer(3), Value::Integer(35)], 1), // Insert new row 3 + ], + ); + + // Test: Seek for updated row 3 should find it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + // The values should be from the uncommitted set (35 instead of 30) + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(35)); + + Ok(()) + } + + #[test] + fn test_seek_boundary_conditions() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 5, 10 + populate_test_table(&conn, vec![(5, 50), (10, 100)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // No uncommitted changes - tx_state is already empty + + // Test 1: Seek LT for minimum value (should find nothing) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(1), SeekOp::LT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek GT for maximum value (should find nothing) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(15), SeekOp::GT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 3: Seek exact for non-existent key + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(7), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + Ok(()) + } + + #[test] + fn test_seek_complex_uncommitted_weights() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with row 5 + populate_test_table(&conn, vec![(5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Complex uncommitted changes with multiple operations on same row + apply_changes_to_tx_state( + &tx_state, + vec![ + (5, vec![Value::Integer(5), Value::Integer(50)], -1), // Delete original + (5, vec![Value::Integer(5), Value::Integer(51)], 1), // Insert update 1 + (5, vec![Value::Integer(5), Value::Integer(51)], -1), // Delete update 1 + (5, vec![Value::Integer(5), Value::Integer(52)], 1), // Insert update 2 + // Net effect: row 5 exists with value 52 + ], + ); + + // Seek for row 5 should find it (net weight = 1 from btree + 0 from uncommitted = 1) + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + // The final value should be 52 from the last update + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(52)); + + Ok(()) + } + + #[test] + fn test_seek_affected_by_transaction_state_changes() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1 and 3 + populate_test_table(&conn, vec![(1, 10), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Seek for row 2 - doesn't exist + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::NotFound); + + // Add row 2 to uncommitted + tx_state.insert(2, vec![Value::Integer(2), Value::Integer(20)]); + + // Now seek for row 2 finds it + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(20)); + + Ok(()) + } + + #[test] + fn test_rewind_btree_first_uncommitted_later() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 8, 10 (all larger than btree rows) + apply_changes_to_tx_state( + &tx_state, + vec![ + (8, vec![Value::Integer(8), Value::Integer(80)], 1), + (10, vec![Value::Integer(10), Value::Integer(100)], 1), + ], + ); + + // Initially cursor is not positioned + assert!(!cursor.is_valid()?); + + // Rewind should position at first btree row (1) since uncommitted are all larger + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } + + #[test] + fn test_rewind_with_uncommitted_first() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 5, 7 + populate_test_table(&conn, vec![(5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 2 (smaller than any btree row) + apply_changes_to_tx_state( + &tx_state, + vec![(2, vec![Value::Integer(2), Value::Integer(20)], 1)], + ); + + // Rewind should position at row 2 (uncommitted) + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(20)); + + Ok(()) + } + + #[test] + fn test_rewind_skip_deleted_first() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete row 1 in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![(1, vec![Value::Integer(1), Value::Integer(10)], -1)], + ); + + // Rewind should skip deleted row 1 and position at row 3 + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + Ok(()) + } + + #[test] + fn test_rewind_empty_btree_with_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows (no btree data) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), + (7, vec![Value::Integer(7), Value::Integer(70)], 1), + ], + ); + + // Rewind should find first uncommitted row + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + Ok(()) + } + + #[test] + fn test_rewind_all_deleted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4 + populate_test_table(&conn, vec![(2, 20), (4, 40)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete all rows in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (4, vec![Value::Integer(4), Value::Integer(40)], -1), + ], + ); + + // Rewind should find no valid rows + pager.io.block(|| cursor.rewind())?; + assert!(!cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, None); + + Ok(()) + } + + #[test] + fn test_rewind_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3 + populate_test_table(&conn, vec![(1, 10), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 1 (delete + insert with new value) + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), + (1, vec![Value::Integer(1), Value::Integer(15)], 1), + ], + ); + + // Rewind should position at row 1 with updated value + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(15)); + + Ok(()) + } + + // ===== NEXT() TEST SUITE ===== + + #[test] + fn test_next_btree_only_sequential() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Start with rewind to position at first row + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Next should move to row 3 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Next should move to row 5 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Next should move to row 7 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_uncommitted_only() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (no btree data) + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 2, 4, 6 + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], 1), + (4, vec![Value::Integer(4), Value::Integer(40)], 1), + (6, vec![Value::Integer(6), Value::Integer(60)], 1), + ], + ); + + // Start with rewind to position at first row + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + // Next should move to row 4 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + // Next should move to row 6 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_mixed_btree_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 5, 9 + populate_test_table(&conn, vec![(1, 10), (5, 50), (9, 90)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows 3, 7 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], 1), + (7, vec![Value::Integer(7), Value::Integer(70)], 1), + ], + ); + + // Should iterate in order: 1, 3, 5, 7, 9 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_skip_deleted_rows() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3, 4, 5 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30), (4, 40), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete rows 2 and 4 in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (4, vec![Value::Integer(4), Value::Integer(40)], -1), + ], + ); + + // Should iterate: 1, 3, 5 (skipping deleted 2 and 4) + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_with_updates() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Update row 3 (delete old + insert new) + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(30)], -1), + (3, vec![Value::Integer(3), Value::Integer(35)], 1), + ], + ); + + // Should iterate all rows with updated values + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(35)); // Updated value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_from_uninitialized() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4 + populate_test_table(&conn, vec![(2, 20), (4, 40)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Cursor not positioned initially + assert!(!cursor.is_valid()?); + + // Next on uninitialized cursor should return false (matching BTreeCursor behavior) + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + // Position cursor with rewind first + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + // Now next should work + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_empty_table() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (empty table) + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Next on empty table should return false + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_all_deleted() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete all rows + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), + (2, vec![Value::Integer(2), Value::Integer(20)], -1), + (3, vec![Value::Integer(3), Value::Integer(30)], -1), + ], + ); + + // Next should find nothing + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_complex_interleaving() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 2, 4, 6, 8 + populate_test_table(&conn, vec![(2, 20), (4, 40), (6, 60), (8, 80)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Complex changes: + // - Insert row 1 + // - Delete row 2 + // - Insert row 3 + // - Update row 4 + // - Insert row 5 + // - Delete row 6 + // - Insert row 7 + // - Keep row 8 as-is + // - Insert row 9 + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], 1), // Insert 1 + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete 2 + (3, vec![Value::Integer(3), Value::Integer(30)], 1), // Insert 3 + (4, vec![Value::Integer(4), Value::Integer(40)], -1), // Delete old 4 + (4, vec![Value::Integer(4), Value::Integer(45)], 1), // Insert new 4 + (5, vec![Value::Integer(5), Value::Integer(50)], 1), // Insert 5 + (6, vec![Value::Integer(6), Value::Integer(60)], -1), // Delete 6 + (7, vec![Value::Integer(7), Value::Integer(70)], 1), // Insert 7 + (9, vec![Value::Integer(9), Value::Integer(90)], 1), // Insert 9 + ], + ); + + // Should iterate: 1, 3, 4(updated), 5, 7, 8, 9 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(45)); // Updated value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(8)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_after_seek() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5, 7, 9 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50), (7, 70), (9, 90)])?; + + // Create cursor for testing + let (mut cursor, _tx_state, pager) = create_test_cursor(&conn)?; + + // Seek to row 5 + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // Next should move to row 7 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(7)); + + // Next should move to row 9 + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(9)); + + // Next should reach end + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_next_multiple_weights_same_row() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with row 1 + populate_test_table(&conn, vec![(1, 10)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Multiple operations on same row: + apply_changes_to_tx_state( + &tx_state, + vec![ + (1, vec![Value::Integer(1), Value::Integer(10)], -1), // Delete original + (1, vec![Value::Integer(1), Value::Integer(11)], 1), // Insert v1 + (1, vec![Value::Integer(1), Value::Integer(11)], -1), // Delete v1 + (1, vec![Value::Integer(1), Value::Integer(12)], 1), // Insert v2 + (1, vec![Value::Integer(1), Value::Integer(12)], -1), // Delete v2 + // Net weight: 1 (btree) - 1 + 1 - 1 + 1 - 1 = 0 (row deleted) + ], + ); + + // Row should be deleted + assert!(!pager.io.block(|| cursor.next())?); + assert!(!cursor.is_valid()?); + + Ok(()) + } + + #[test] + fn test_next_only_uncommitted_large_gaps() -> Result<()> { + let conn = create_test_connection()?; + + // Create cursor for testing (no btree data) + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows with large gaps + apply_changes_to_tx_state( + &tx_state, + vec![ + (100, vec![Value::Integer(100), Value::Integer(1000)], 1), + (500, vec![Value::Integer(500), Value::Integer(5000)], 1), + (999, vec![Value::Integer(999), Value::Integer(9990)], 1), + ], + ); + + // Should iterate through all with large gaps + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(100)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(500)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(999)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_multiple_updates_same_row_single_transaction() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Multiple successive updates to row 2 in the same transaction + // 20 -> 25 -> 28 -> 32 (final value should be 32) + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete original + (2, vec![Value::Integer(2), Value::Integer(25)], 1), // First update + (2, vec![Value::Integer(2), Value::Integer(25)], -1), // Delete first update + (2, vec![Value::Integer(2), Value::Integer(28)], 1), // Second update + (2, vec![Value::Integer(2), Value::Integer(28)], -1), // Delete second update + (2, vec![Value::Integer(2), Value::Integer(32)], 1), // Final update + ], + ); + + // Seek to row 2 should find the final value + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(2), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(32)); + + // Next through all rows to verify only final values are seen + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(10)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(32)); // Final value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(30)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_empty_materialized_view_with_uncommitted() -> Result<()> { + let conn = create_test_connection()?; + + // Don't populate any data - view is created but empty + // This tests a materialized view that was never populated + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows to empty materialized view + apply_changes_to_tx_state( + &tx_state, + vec![ + (5, vec![Value::Integer(5), Value::Integer(50)], 1), + (10, vec![Value::Integer(10), Value::Integer(100)], 1), + (15, vec![Value::Integer(15), Value::Integer(150)], 1), + ], + ); + + // Test seek on empty materialized view with uncommitted data + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(10), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + // Test GT seek + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(7), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + // Test rewind and next + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(10)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(15)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_exact_match_btree_uncommitted_same_rowid_different_values() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 3 with different value (not a delete+insert, just insert) + // This simulates a case where uncommitted has a new version of row 3 + apply_changes_to_tx_state( + &tx_state, + vec![ + (3, vec![Value::Integer(3), Value::Integer(35)], 1), // New version with positive weight + ], + ); + + // Exact match seek for row 3 should find the uncommitted version (35) + // because when both exist with positive weight, uncommitted takes precedence + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(3), SeekOp::GE { eq_only: true }))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // This test verifies which value we get when both btree and uncommitted + // have the same rowid with positive weights + // The expected behavior needs to be defined - typically uncommitted wins + // or they get merged based on the DBSP semantics + + Ok(()) + } + + #[test] + fn test_boundary_value_seeks() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with some normal values + populate_test_table(&conn, vec![(100, 1000), (200, 2000)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted rows at extreme positions + apply_changes_to_tx_state( + &tx_state, + vec![ + ( + i64::MIN + 1, + vec![Value::Integer(i64::MIN + 1), Value::Integer(-999)], + 1, + ), + ( + i64::MAX - 1, + vec![Value::Integer(i64::MAX - 1), Value::Integer(999)], + 1, + ), + ], + ); + + // Test 1: Seek GT with i64::MAX should find nothing + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MAX), SeekOp::GT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 2: Seek LT with i64::MIN should find nothing + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MIN), SeekOp::LT))?; + assert_eq!(result, SeekResult::NotFound); + + // Test 3: Seek GE with i64::MAX - 1 should find our extreme row + let result = pager.io.block(|| { + cursor.seek( + SeekKey::TableRowId(i64::MAX - 1), + SeekOp::GE { eq_only: false }, + ) + })?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MAX - 1)); + + // Test 4: Seek LE with i64::MIN + 1 should find our extreme low row + let result = pager.io.block(|| { + cursor.seek( + SeekKey::TableRowId(i64::MIN + 1), + SeekOp::LE { eq_only: false }, + ) + })?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MIN + 1)); + + // Test 5: Seek GT from i64::MIN should find the smallest row + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MIN), SeekOp::GT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MIN + 1)); + + // Test 6: Seek LT from i64::MAX should find the largest row + let result = pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(i64::MAX), SeekOp::LT))?; + assert_eq!(result, SeekResult::Found); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(i64::MAX - 1)); + + Ok(()) + } + + #[test] + fn test_next_concurrent_btree_uncommitted_advance() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 2, 3, 4, 5 + populate_test_table(&conn, vec![(1, 10), (2, 20), (3, 30), (4, 40), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Delete some btree rows and add replacements in uncommitted + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], -1), // Delete btree row 2 + (2, vec![Value::Integer(2), Value::Integer(25)], 1), // Replace with new value + (4, vec![Value::Integer(4), Value::Integer(40)], -1), // Delete btree row 4 + ], + ); + + // Should iterate: 1, 2(new), 3, 5 + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + assert_eq!(pager.io.block(|| cursor.column(1))?, Value::Integer(25)); // New value + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_transaction_state_changes_mid_iteration() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Start iteration + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Move to next row + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + // Now add new uncommitted changes mid-iteration + apply_changes_to_tx_state( + &tx_state, + vec![ + (2, vec![Value::Integer(2), Value::Integer(20)], 1), // Insert before current + (4, vec![Value::Integer(4), Value::Integer(40)], 1), // Insert after current + (6, vec![Value::Integer(6), Value::Integer(60)], 1), // Insert at end + ], + ); + + // Continue iteration - cursor continues from where it was, sees row 5 next + // (new changes are only visible after rewind/seek) + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + // No more rows in original iteration + assert!(!pager.io.block(|| cursor.next())?); + + // Rewind and verify we see all rows including the newly added ones + pager.io.block(|| cursor.rewind())?; + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(4)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(6)); + + assert!(!pager.io.block(|| cursor.next())?); + + Ok(()) + } + + #[test] + fn test_rewind_after_failed_seek() -> Result<()> { + let conn = create_test_connection()?; + + // Populate table with rows 1, 3, 5 + populate_test_table(&conn, vec![(1, 10), (3, 30), (5, 50)])?; + + // Create cursor for testing + let (mut cursor, tx_state, pager) = create_test_cursor(&conn)?; + + // Add uncommitted row 2 + apply_changes_to_tx_state( + &tx_state, + vec![(2, vec![Value::Integer(2), Value::Integer(20)], 1)], + ); + + // Seek to non-existent row 4 with exact match + assert_eq!( + pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(4), SeekOp::GE { eq_only: true }))?, + SeekResult::NotFound + ); + assert!(!cursor.is_valid()?); + + // Rewind should work correctly after failed seek + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + // Verify we can iterate through all rows + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(2)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(3)); + + assert!(pager.io.block(|| cursor.next())?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(5)); + + assert!(!pager.io.block(|| cursor.next())?); + + // Try another failed seek (GT on maximum value) + assert_eq!( + pager + .io + .block(|| cursor.seek(SeekKey::TableRowId(5), SeekOp::GT))?, + SeekResult::NotFound + ); + assert!(!cursor.is_valid()?); + + // Rewind again + pager.io.block(|| cursor.rewind())?; + assert!(cursor.is_valid()?); + assert_eq!(pager.io.block(|| cursor.rowid())?, Some(1)); + + Ok(()) + } +} diff --git a/core/incremental/dbsp.rs b/core/incremental/dbsp.rs index 962ceb9c8..607fd562b 100644 --- a/core/incremental/dbsp.rs +++ b/core/incremental/dbsp.rs @@ -1,19 +1,86 @@ // Simplified DBSP integration for incremental view maintenance // For now, we'll use a basic approach and can expand to full DBSP later -use std::collections::HashMap; +use super::hashable_row::HashableRow; +use crate::Value; +use std::collections::{BTreeMap, HashMap}; + +type DeltaEntry = (HashableRow, isize); +/// A delta represents ordered changes to data +#[derive(Debug, Clone, Default)] +pub struct Delta { + /// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete + /// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete + + /// insert. If this is not ordered, it would be applied in arbitrary order and break the view. + pub changes: Vec, +} + +impl Delta { + pub fn new() -> Self { + Self { + changes: Vec::new(), + } + } + + pub fn insert(&mut self, row_key: i64, values: Vec) { + let row = HashableRow::new(row_key, values); + self.changes.push((row, 1)); + } + + pub fn delete(&mut self, row_key: i64, values: Vec) { + let row = HashableRow::new(row_key, values); + self.changes.push((row, -1)); + } + + pub fn is_empty(&self) -> bool { + self.changes.is_empty() + } + + pub fn len(&self) -> usize { + self.changes.len() + } + + /// Merge another delta into this one + /// This preserves the order of operations - no consolidation is done + /// to maintain the full history of changes + pub fn merge(&mut self, other: &Delta) { + // Simply append all changes from other, preserving order + self.changes.extend(other.changes.iter().cloned()); + } + + /// Consolidate changes by combining entries with the same HashableRow + pub fn consolidate(&mut self) { + if self.changes.is_empty() { + return; + } + + // Use a HashMap to accumulate weights + let mut consolidated: HashMap = HashMap::new(); + + for (row, weight) in self.changes.drain(..) { + *consolidated.entry(row).or_insert(0) += weight; + } + + // Convert back to vec, filtering out zero weights + self.changes = consolidated + .into_iter() + .filter(|(_, weight)| *weight != 0) + .collect(); + } +} /// A simplified ZSet for incremental computation /// Each element has a weight: positive for additions, negative for deletions #[derive(Clone, Debug, Default)] pub struct SimpleZSet { - data: HashMap, + data: BTreeMap, } -impl SimpleZSet { +#[allow(dead_code)] +impl SimpleZSet { pub fn new() -> Self { Self { - data: HashMap::new(), + data: BTreeMap::new(), } } @@ -45,36 +112,121 @@ impl SimpleZSet { self.insert(item.clone(), weight); } } -} -/// A simplified stream for incremental computation -#[derive(Clone, Debug)] -pub struct SimpleStream { - current: SimpleZSet, -} - -impl SimpleStream { - pub fn from_zset(zset: SimpleZSet) -> Self { - Self { current: zset } + /// Get the weight for a specific item (0 if not present) + pub fn get(&self, item: &T) -> isize { + self.data.get(item).copied().unwrap_or(0) } - /// Apply a delta (change) to the stream - pub fn apply_delta(&mut self, delta: &SimpleZSet) { - self.current.merge(delta); + /// Get the first element (smallest key) in the Z-set + pub fn first(&self) -> Option<(&T, isize)> { + self.data.iter().next().map(|(k, &v)| (k, v)) } - /// Get the current state as a vector of items (only positive weights) - pub fn to_vec(&self) -> Vec { - self.current.to_vec() + /// Get the last element (largest key) in the Z-set + pub fn last(&self) -> Option<(&T, isize)> { + self.data.iter().next_back().map(|(k, &v)| (k, v)) + } + + /// Get a range of elements + pub fn range(&self, range: R) -> impl Iterator + '_ + where + R: std::ops::RangeBounds, + { + self.data.range(range).map(|(k, &v)| (k, v)) + } + + /// Check if empty + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Get the number of elements + pub fn len(&self) -> usize { + self.data.len() } } // Type aliases for convenience -use super::hashable_row::HashableRow; - pub type RowKey = HashableRow; pub type RowKeyZSet = SimpleZSet; -pub type RowKeyStream = SimpleStream; + +impl RowKeyZSet { + /// Create a Z-set from a Delta by consolidating all changes + pub fn from_delta(delta: &Delta) -> Self { + let mut zset = Self::new(); + + // Add all changes from the delta, consolidating as we go + for (row, weight) in &delta.changes { + zset.insert(row.clone(), *weight); + } + + zset + } + + /// Seek to find ALL entries for the best matching rowid + /// For GT/GE: returns all entries for the smallest rowid that satisfies the condition + /// For LT/LE: returns all entries for the largest rowid that satisfies the condition + /// Returns empty vec if no match found + pub fn seek(&self, target: i64, op: crate::types::SeekOp) -> Vec<(HashableRow, isize)> { + use crate::types::SeekOp; + + // First find the best matching rowid + let best_rowid = match op { + SeekOp::GT => { + // Find smallest rowid > target + self.data + .iter() + .filter(|(row, _)| row.rowid > target) + .map(|(row, _)| row.rowid) + .min() + } + SeekOp::GE { eq_only: false } => { + // Find smallest rowid >= target + self.data + .iter() + .filter(|(row, _)| row.rowid >= target) + .map(|(row, _)| row.rowid) + .min() + } + SeekOp::GE { eq_only: true } | SeekOp::LE { eq_only: true } => { + // Need exact match + if self.data.iter().any(|(row, _)| row.rowid == target) { + Some(target) + } else { + None + } + } + SeekOp::LT => { + // Find largest rowid < target + self.data + .iter() + .filter(|(row, _)| row.rowid < target) + .map(|(row, _)| row.rowid) + .max() + } + SeekOp::LE { eq_only: false } => { + // Find largest rowid <= target + self.data + .iter() + .filter(|(row, _)| row.rowid <= target) + .map(|(row, _)| row.rowid) + .max() + } + }; + + // Now get ALL entries with that rowid + match best_rowid { + Some(rowid) => self + .data + .iter() + .filter(|(row, _)| row.rowid == rowid) + .map(|(k, &v)| (k.clone(), v)) + .collect(), + None => Vec::new(), + } + } +} #[cfg(test)] mod tests { diff --git a/core/incremental/hashable_row.rs b/core/incremental/hashable_row.rs index 46be59bde..799f88e87 100644 --- a/core/incremental/hashable_row.rs +++ b/core/incremental/hashable_row.rs @@ -78,3 +78,23 @@ impl Hash for HashableRow { self.cached_hash.hash(state); } } + +impl PartialOrd for HashableRow { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for HashableRow { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // First compare by rowid, then by values if rowids are equal + // This ensures Ord is consistent with Eq (which compares all fields) + match self.rowid.cmp(&other.rowid) { + std::cmp::Ordering::Equal => { + // If rowids are equal, compare values to maintain consistency with Eq + self.values.cmp(&other.values) + } + other => other, + } + } +} diff --git a/core/incremental/mod.rs b/core/incremental/mod.rs index 4c26b91ba..755a27351 100644 --- a/core/incremental/mod.rs +++ b/core/incremental/mod.rs @@ -1,4 +1,5 @@ pub mod compiler; +pub mod cursor; pub mod dbsp; pub mod expr_compiler; pub mod hashable_row; diff --git a/core/incremental/operator.rs b/core/incremental/operator.rs index 4f148e943..46a933b87 100644 --- a/core/incremental/operator.rs +++ b/core/incremental/operator.rs @@ -2,15 +2,321 @@ // Operator DAG for DBSP-style incremental computation // Based on Feldera DBSP design but adapted for Turso's architecture +use crate::function::{AggFunc, Func}; +use crate::incremental::dbsp::Delta; use crate::incremental::expr_compiler::CompiledExpression; use crate::incremental::hashable_row::HashableRow; -use crate::types::Text; -use crate::{Connection, Database, SymbolTable, Value}; -use std::collections::{HashMap, HashSet}; +use crate::storage::btree::{BTreeCursor, BTreeKey}; +use crate::types::{IOResult, SeekKey, SeekOp, SeekResult, Text}; +use crate::{ + return_and_restore_if_io, return_if_io, Connection, Database, Result, SymbolTable, Value, +}; +use std::collections::{BTreeMap, HashMap}; use std::fmt::{self, Debug, Display}; -use std::sync::Arc; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use turso_macros::match_ignore_ascii_case; +use turso_parser::ast::{As, Expr, Literal, Name, OneSelect, Operator, ResultColumn}; + +#[derive(Debug)] +pub enum ReadRecord { + GetRecord, + Done { state: Option }, +} + +impl ReadRecord { + fn new() -> Self { + ReadRecord::GetRecord + } + + fn read_record( + &mut self, + key: SeekKey, + aggregates: &[AggregateFunction], + cursor: &mut BTreeCursor, + ) -> Result>> { + loop { + match self { + ReadRecord::GetRecord => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + *self = ReadRecord::Done { state: None }; + } else { + let record = return_if_io!(cursor.record()); + let r = record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found key {key:?} in aggregate storage but could not read record" + )) + })?; + let values = r.get_values(); + let blob = values[1].to_owned(); + + let (state, _group_key) = match blob { + Value::Blob(blob) => AggregateState::from_blob(&blob, aggregates) + .ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Cannot deserialize aggregate state {blob:?}", + )) + }), + _ => Err(crate::LimboError::ParseError( + "Value in aggregator not blob".to_string(), + )), + }?; + *self = ReadRecord::Done { state: Some(state) } + } + } + ReadRecord::Done { state } => return Ok(IOResult::Done(state.clone())), + } + } + } +} + +#[derive(Debug)] +pub(crate) enum WriteRecord { + GetRecord, + Delete { final_weight: isize }, + Insert { final_weight: isize }, + Done, +} +impl WriteRecord { + fn new() -> Self { + WriteRecord::GetRecord + } + + fn write_record( + &mut self, + key: SeekKey, + record: HashableRow, + weight: isize, + cursor: &mut BTreeCursor, + ) -> Result> { + loop { + match self { + WriteRecord::GetRecord => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + *self = WriteRecord::Insert { + final_weight: weight, + }; + } else { + let existing_record = return_if_io!(cursor.record()); + let r = existing_record.ok_or_else(|| { + crate::LimboError::InternalError(format!( + "Found key {key:?} in aggregate storage but could not read record" + )) + })?; + let values = r.get_values(); + // values[2] should contain the weight + let existing_weight = match values[2].to_owned() { + Value::Integer(w) => w as isize, + _ => { + return Err(crate::LimboError::InternalError(format!( + "Invalid weight value in aggregate storage for key {key:?}" + ))) + } + }; + let final_weight = existing_weight + weight; + if final_weight <= 0 { + *self = WriteRecord::Delete { final_weight } + } else { + *self = WriteRecord::Insert { final_weight } + } + } + } + WriteRecord::Delete { final_weight: _ } => { + let res = return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + if !matches!(res, SeekResult::Found) { + return Err(crate::LimboError::InternalError(format!( + "record not found for {key:?}, but we had just GetRecord! Should not be possible" + ))); + } + // Done - row was deleted and weights cancel out. + // If we iniated the delete we will complete, so Done has to be set + // before so we don't come back here. + *self = WriteRecord::Done; + return_if_io!(cursor.delete()); + } + WriteRecord::Insert { final_weight } => { + return_if_io!(cursor.seek(key.clone(), SeekOp::GE { eq_only: true })); + // Build the key and insert the record + let key_i64 = match key { + SeekKey::TableRowId(id) => id, + _ => { + return Err(crate::LimboError::InternalError( + "Expected TableRowId for aggregate storage".to_string(), + )) + } + }; + // Create the record values: key, blob, weight + let record_values = vec![ + Value::Integer(key_i64), + record.values[0].clone(), // The blob with serialized state + Value::Integer(*final_weight as i64), + ]; + + // Create an ImmutableRecord from the values + let immutable_record = crate::types::ImmutableRecord::from_values( + &record_values, + record_values.len(), + ); + let btree_key = BTreeKey::new_table_rowid(key_i64, Some(&immutable_record)); + + *self = WriteRecord::Done; + return_if_io!(cursor.insert(&btree_key)); + } + WriteRecord::Done => { + return Ok(IOResult::Done(())); + } + } + } + } +} + +type ComputedStates = HashMap, AggregateState)>; // group_key_str -> (group_key, state) +#[derive(Debug)] +enum AggregateCommitState { + Idle, + Eval { + eval_state: EvalState, + }, + PersistDelta { + delta: Delta, + computed_states: ComputedStates, + current_idx: usize, + write_record: WriteRecord, + }, + Done { + delta: Delta, + }, + Invalid, +} + +// eval() has uncommitted data, so it can't be a member attribute of the Operator. +// The state has to be kept by the caller +#[derive(Debug)] +pub enum EvalState { + Uninitialized, + Init { + delta: Delta, + }, + FetchData { + delta: Delta, // Keep original delta for merge operation + current_idx: usize, + groups_to_read: Vec<(String, Vec)>, // Changed to Vec for index-based access + existing_groups: HashMap, + old_values: HashMap>, + read_record_state: Box, + }, + Done, +} + +impl From for EvalState { + fn from(delta: Delta) -> Self { + EvalState::Init { delta } + } +} + +impl EvalState { + fn from_delta(delta: Delta) -> Self { + Self::Init { delta } + } + + fn delta_ref(&self) -> &Delta { + match self { + EvalState::Init { delta } => delta, + _ => panic!("delta_ref() can only be called when in Init state",), + } + } + pub fn extract_delta(&mut self) -> Delta { + match self { + EvalState::Init { delta } => { + let extracted = std::mem::take(delta); + *self = EvalState::Uninitialized; + extracted + } + _ => panic!("extract_delta() can only be called when in Init state"), + } + } + + fn advance(&mut self, groups_to_read: BTreeMap>) { + let delta = match self { + EvalState::Init { delta } => std::mem::take(delta), + _ => panic!("advance() can only be called when in Init state, current state: {self:?}"), + }; + + let _ = std::mem::replace( + self, + EvalState::FetchData { + delta, + current_idx: 0, + groups_to_read: groups_to_read.into_iter().collect(), // Convert BTreeMap to Vec + existing_groups: HashMap::new(), + old_values: HashMap::new(), + read_record_state: Box::new(ReadRecord::new()), + }, + ); + } + fn process_delta( + &mut self, + operator: &mut AggregateOperator, + cursor: &mut BTreeCursor, + ) -> Result> { + loop { + match self { + EvalState::Uninitialized => { + panic!("Cannot process_delta with Uninitialized state"); + } + EvalState::Init { .. } => { + panic!("State machine not supposed to reach the init state! advance() should have been called"); + } + EvalState::FetchData { + delta, + current_idx, + groups_to_read, + existing_groups, + old_values, + read_record_state, + } => { + if *current_idx >= groups_to_read.len() { + // All groups processed, compute final output + let result = + operator.merge_delta_with_existing(delta, existing_groups, old_values); + *self = EvalState::Done; + return Ok(IOResult::Done(result)); + } else { + // Get the current group to read + let (group_key_str, group_key) = &groups_to_read[*current_idx]; + + let seek_key = operator.generate_storage_key(group_key_str); + let key = SeekKey::TableRowId(seek_key); + + let state = return_if_io!(read_record_state.read_record( + key, + &operator.aggregates, + cursor + )); + + // Anything that mutates state has to happen after return_if_io! + // Unfortunately there's no good way to enforce that without turning + // this into a hot mess of mem::takes. + if let Some(state) = state { + let mut old_row = group_key.clone(); + old_row.extend(state.to_values(&operator.aggregates)); + old_values.insert(group_key_str.clone(), old_row); + existing_groups.insert(group_key_str.clone(), state.clone()); + } + + // All attributes mutated in place. + *current_idx += 1; + *read_record_state = Box::new(ReadRecord::new()); + } + } + EvalState::Done => { + return Ok(IOResult::Done((Delta::new(), HashMap::new()))); + } + } + } + } +} /// Tracks computation counts to verify incremental behavior (for tests now), and in the future /// should be used to provide statistics. @@ -56,69 +362,6 @@ impl ComputationTracker { } } -/// A delta represents ordered changes to data -#[derive(Debug, Clone, Default)] -pub struct Delta { - /// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete - /// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete + - /// insert. If this is not ordered, it would be applied in arbitrary order and break the view. - pub changes: Vec<(HashableRow, isize)>, -} - -impl Delta { - pub fn new() -> Self { - Self { - changes: Vec::new(), - } - } - - pub fn insert(&mut self, row_key: i64, values: Vec) { - let row = HashableRow::new(row_key, values); - self.changes.push((row, 1)); - } - - pub fn delete(&mut self, row_key: i64, values: Vec) { - let row = HashableRow::new(row_key, values); - self.changes.push((row, -1)); - } - - pub fn is_empty(&self) -> bool { - self.changes.is_empty() - } - - pub fn len(&self) -> usize { - self.changes.len() - } - - /// Merge another delta into this one - /// This preserves the order of operations - no consolidation is done - /// to maintain the full history of changes - pub fn merge(&mut self, other: &Delta) { - // Simply append all changes from other, preserving order - self.changes.extend(other.changes.iter().cloned()); - } - - /// Consolidate changes by combining entries with the same HashableRow - pub fn consolidate(&mut self) { - if self.changes.is_empty() { - return; - } - - // Use a HashMap to accumulate weights - let mut consolidated: HashMap = HashMap::new(); - - for (row, weight) in self.changes.drain(..) { - *consolidated.entry(row).or_insert(0) += weight; - } - - // Convert back to vec, filtering out zero weights - self.changes = consolidated - .into_iter() - .filter(|(_, weight)| *weight != 0) - .collect(); - } -} - #[cfg(test)] mod hashable_row_tests { use super::*; @@ -240,8 +483,6 @@ impl FilterPredicate { /// Parse a SQL AST expression into a FilterPredicate /// This centralizes all SQL-to-predicate parsing logic pub fn from_sql_expr(expr: &turso_parser::ast::Expr) -> crate::Result { - use turso_parser::ast::*; - let Expr::Binary(lhs, op, rhs) = expr else { return Err(crate::LimboError::ParseError( "Unsupported WHERE clause for incremental views: not a binary expression" @@ -323,8 +564,6 @@ impl FilterPredicate { /// Parse a WHERE clause from a SELECT statement pub fn from_select(select: &turso_parser::ast::Select) -> crate::Result { - use turso_parser::ast::*; - if let OneSelect::Select { ref where_clause, .. } = select.body.select @@ -391,8 +630,6 @@ impl AggregateFunction { func: &crate::function::Func, input_column: Option, ) -> Option { - use crate::function::{AggFunc, Func}; - match func { Func::Agg(agg_func) => { match agg_func { @@ -412,34 +649,77 @@ impl AggregateFunction { /// Operator DAG (Directed Acyclic Graph) /// Base trait for incremental operators pub trait IncrementalOperator: Debug { - /// Initialize with base data - fn initialize(&mut self, data: Delta); - - /// Evaluate the operator with a delta, without modifying internal state - /// This is used during query execution to compute results including uncommitted changes + /// Evaluate the operator with a state, without modifying internal state + /// This is used during query execution to compute results + /// May need to read from storage to get current state (e.g., for aggregates) /// /// # Arguments - /// * `delta` - The committed delta to process - /// * `uncommitted` - Optional uncommitted changes from the current transaction - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta; + /// * `state` - The evaluation state (may be in progress from a previous I/O operation) + /// * `cursor` - Cursor for reading operator state from storage + /// + /// # Returns + /// The output delta from the evaluation + fn eval(&mut self, state: &mut EvalState, cursor: &mut BTreeCursor) -> Result>; /// Commit a delta to the operator's internal state and return the output /// This is called when a transaction commits, making changes permanent /// Returns the output delta (what downstream operators should see) - fn commit(&mut self, delta: Delta) -> Delta; - - /// Get current accumulated state - fn get_current_state(&self) -> Delta; + /// The cursor parameter is for operators that need to persist state + fn commit(&mut self, delta: Delta, cursor: &mut BTreeCursor) -> Result>; /// Set computation tracker fn set_tracker(&mut self, tracker: Arc>); } +/// Input operator - passes through input data unchanged +/// This operator is used for input nodes in the circuit to provide a uniform interface +#[derive(Debug)] +pub struct InputOperator { + name: String, +} + +impl InputOperator { + pub fn new(name: String) -> Self { + Self { name } + } + + pub fn name(&self) -> &str { + &self.name + } +} + +impl IncrementalOperator for InputOperator { + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + match state { + EvalState::Init { delta } => { + let output = std::mem::take(delta); + *state = EvalState::Done; + Ok(IOResult::Done(output)) + } + _ => unreachable!( + "InputOperator doesn't execute the state machine. Should be in Init state" + ), + } + } + + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { + // Input operator passes through the delta unchanged during commit + Ok(IOResult::Done(delta)) + } + + fn set_tracker(&mut self, _tracker: Arc>) { + // Input operator doesn't need tracking + } +} + /// Filter operator - filters rows based on predicate #[derive(Debug)] pub struct FilterOperator { predicate: FilterPredicate, - current_state: Delta, column_names: Vec, tracker: Option>>, } @@ -448,7 +728,6 @@ impl FilterOperator { pub fn new(predicate: FilterPredicate, column_names: Vec) -> Self { Self { predicate, - current_state: Delta::new(), column_names, tracker: None, } @@ -549,33 +828,22 @@ impl FilterOperator { } impl IncrementalOperator for FilterOperator { - fn initialize(&mut self, data: Delta) { - // Process initial data through filter - for (row, weight) in data.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_filter(); - } - - if self.evaluate_predicate(&row.values) { - self.current_state.changes.push((row, weight)); - } - } - } - - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - let mut output_delta = Delta::new(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + let delta = match state { + EvalState::Init { delta } => std::mem::take(delta), + _ => unreachable!( + "FilterOperator doesn't execute the state machine. Should be in Init state" + ), }; - // Process the combined delta through the filter - for (row, weight) in combined_delta.changes { + let mut output_delta = Delta::new(); + + // Process the delta through the filter + for (row, weight) in delta.changes { if let Some(tracker) = &self.tracker { tracker.lock().unwrap().record_filter(); } @@ -588,10 +856,11 @@ impl IncrementalOperator for FilterOperator { } } - output_delta + *state = EvalState::Done; + Ok(IOResult::Done(output_delta)) } - fn commit(&mut self, delta: Delta) -> Delta { + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { let mut output_delta = Delta::new(); // Commit the delta to our internal state @@ -605,19 +874,11 @@ impl IncrementalOperator for FilterOperator { // For deletes, this means the row was in the view (its values pass the filter) // For inserts, this means the row should be in the view if self.evaluate_predicate(&row.values) { - self.current_state.changes.push((row.clone(), weight)); output_delta.changes.push((row, weight)); } } - output_delta - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated + Ok(IOResult::Done(output_delta)) } fn set_tracker(&mut self, tracker: Arc>) { @@ -631,7 +892,6 @@ pub struct ProjectOperator { columns: Vec, input_column_names: Vec, output_column_names: Vec, - current_state: Delta, tracker: Option>>, // Internal in-memory connection for expression evaluation // Programs are very dependent on having a connection, so give it one. @@ -652,7 +912,6 @@ impl std::fmt::Debug for ProjectOperator { .field("columns", &self.columns) .field("input_column_names", &self.input_column_names) .field("output_column_names", &self.output_column_names) - .field("current_state", &self.current_state) .field("tracker", &self.tracker) .finish_non_exhaustive() } @@ -665,8 +924,6 @@ impl ProjectOperator { input_column_names: Vec, schema: &crate::schema::Schema, ) -> crate::Result { - use turso_parser::ast::*; - // Set up internal connection for expression evaluation let io = Arc::new(crate::MemoryIO::new()); let db = Database::open_file( @@ -769,7 +1026,6 @@ impl ProjectOperator { columns, input_column_names, output_column_names, - current_state: Delta::new(), tracker: None, internal_conn, }) @@ -809,7 +1065,6 @@ impl ProjectOperator { columns, input_column_names, output_column_names, - current_state: Delta::new(), tracker: None, internal_conn, }) @@ -839,7 +1094,6 @@ impl ProjectOperator { } fn evaluate_expression(&self, expr: &turso_parser::ast::Expr, values: &[Value]) -> Value { - use turso_parser::ast::*; match expr { Expr::Id(name) => { if let Some(idx) = self @@ -970,44 +1224,35 @@ impl ProjectOperator { } impl IncrementalOperator for ProjectOperator { - fn initialize(&mut self, data: Delta) { - for (row, weight) in &data.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_project(); - } - - let projected = self.project_values(&row.values); - let projected_row = HashableRow::new(row.rowid, projected); - self.current_state.changes.push((projected_row, *weight)); - } - } - - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - let mut output_delta = Delta::new(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta + fn eval( + &mut self, + state: &mut EvalState, + _cursor: &mut BTreeCursor, + ) -> Result> { + let delta = match state { + EvalState::Init { delta } => std::mem::take(delta), + _ => unreachable!( + "ProjectOperator doesn't execute the state machine. Should be in Init state" + ), }; - for (row, weight) in &combined_delta.changes { + let mut output_delta = Delta::new(); + + for (row, weight) in delta.changes { if let Some(tracker) = &self.tracker { tracker.lock().unwrap().record_project(); } let projected = self.project_values(&row.values); let projected_row = HashableRow::new(row.rowid, projected); - output_delta.changes.push((projected_row, *weight)); + output_delta.changes.push((projected_row, weight)); } - output_delta + *state = EvalState::Done; + Ok(IOResult::Done(output_delta)) } - fn commit(&mut self, delta: Delta) -> Delta { + fn commit(&mut self, delta: Delta, _cursor: &mut BTreeCursor) -> Result> { let mut output_delta = Delta::new(); // Commit the delta to our internal state and build output @@ -1017,20 +1262,10 @@ impl IncrementalOperator for ProjectOperator { } let projected = self.project_values(&row.values); let projected_row = HashableRow::new(row.rowid, projected); - self.current_state - .changes - .push((projected_row.clone(), *weight)); output_delta.changes.push((projected_row, *weight)); } - output_delta - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated + Ok(crate::types::IOResult::Done(output_delta)) } fn set_tracker(&mut self, tracker: Arc>) { @@ -1040,28 +1275,29 @@ impl IncrementalOperator for ProjectOperator { /// Aggregate operator - performs incremental aggregation with GROUP BY /// Maintains running totals/counts that are updated incrementally -#[derive(Debug, Clone)] +/// +/// Note that the AggregateOperator essentially implements a ZSet, even +/// though the ZSet structure is never used explicitly. The on-disk btree +/// plays the role of the set! +#[derive(Debug)] pub struct AggregateOperator { + // Unique operator ID for indexing in persistent storage + operator_id: usize, // GROUP BY columns group_by: Vec, // Aggregate functions to compute aggregates: Vec, // Column names from input pub input_column_names: Vec, - // Aggregation state: group_key_str -> aggregate values - // For each group, we store the aggregate results - // We use String representation of group keys since Value doesn't implement Hash - group_states: HashMap, - // Map to keep track of actual group key values for output - group_key_values: HashMap>, - // Current output state as a Delta - current_state: Delta, tracker: Option>>, + + // State machine for commit operation + commit_state: AggregateCommitState, } /// State for a single group's aggregates #[derive(Debug, Clone)] -struct AggregateState { +pub struct AggregateState { // For COUNT: just the count count: i64, // For SUM: column_name -> sum value @@ -1081,6 +1317,158 @@ impl AggregateState { } } + // Serialize the aggregate state to a binary blob including group key values + // The reason we serialize it like this, instead of just writing the actual values, is that + // The same table may have different aggregators in the circuit. They will all have different + // columns. + fn to_blob(&self, aggregates: &[AggregateFunction], group_key: &[Value]) -> Vec { + let mut blob = Vec::new(); + + // Write version byte for future compatibility + blob.push(1u8); + + // Write number of group key values + blob.extend_from_slice(&(group_key.len() as u32).to_le_bytes()); + + // Write each group key value + for value in group_key { + // Write value type tag + match value { + Value::Null => blob.push(0u8), + Value::Integer(i) => { + blob.push(1u8); + blob.extend_from_slice(&i.to_le_bytes()); + } + Value::Float(f) => { + blob.push(2u8); + blob.extend_from_slice(&f.to_le_bytes()); + } + Value::Text(s) => { + blob.push(3u8); + let text_str = s.as_str(); + let bytes = text_str.as_bytes(); + blob.extend_from_slice(&(bytes.len() as u32).to_le_bytes()); + blob.extend_from_slice(bytes); + } + Value::Blob(b) => { + blob.push(4u8); + blob.extend_from_slice(&(b.len() as u32).to_le_bytes()); + blob.extend_from_slice(b); + } + } + } + + // Write count as 8 bytes (little-endian) + blob.extend_from_slice(&self.count.to_le_bytes()); + + // Write each aggregate's state + for agg in aggregates { + match agg { + AggregateFunction::Sum(col_name) => { + let sum = self.sums.get(col_name).copied().unwrap_or(0.0); + blob.extend_from_slice(&sum.to_le_bytes()); + } + AggregateFunction::Avg(col_name) => { + let (sum, count) = self.avgs.get(col_name).copied().unwrap_or((0.0, 0)); + blob.extend_from_slice(&sum.to_le_bytes()); + blob.extend_from_slice(&count.to_le_bytes()); + } + AggregateFunction::Count => { + // Count is already written above + } + } + } + + blob + } + + /// Deserialize aggregate state from a binary blob + /// Returns the aggregate state and the group key values + fn from_blob(blob: &[u8], aggregates: &[AggregateFunction]) -> Option<(Self, Vec)> { + let mut cursor = 0; + + // Check version byte + if blob.get(cursor) != Some(&1u8) { + return None; + } + cursor += 1; + + // Read number of group key values + let num_group_keys = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + + // Read group key values + let mut group_key = Vec::new(); + for _ in 0..num_group_keys { + let value_type = *blob.get(cursor)?; + cursor += 1; + + let value = match value_type { + 0 => Value::Null, + 1 => { + let i = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + Value::Integer(i) + } + 2 => { + let f = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + Value::Float(f) + } + 3 => { + let len = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + let bytes = blob.get(cursor..cursor + len)?; + cursor += len; + let text_str = std::str::from_utf8(bytes).ok()?; + Value::Text(text_str.to_string().into()) + } + 4 => { + let len = + u32::from_le_bytes(blob.get(cursor..cursor + 4)?.try_into().ok()?) as usize; + cursor += 4; + let bytes = blob.get(cursor..cursor + len)?; + cursor += len; + Value::Blob(bytes.to_vec()) + } + _ => return None, + }; + group_key.push(value); + } + + // Read count + let count = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + + let mut state = Self::new(); + state.count = count; + + // Read each aggregate's state + for agg in aggregates { + match agg { + AggregateFunction::Sum(col_name) => { + let sum = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + state.sums.insert(col_name.clone(), sum); + } + AggregateFunction::Avg(col_name) => { + let sum = f64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + let count = i64::from_le_bytes(blob.get(cursor..cursor + 8)?.try_into().ok()?); + cursor += 8; + state.avgs.insert(col_name.clone(), (sum, count)); + } + AggregateFunction::Count => { + // Count was already read above + } + } + } + + Some((state, group_key)) + } + /// Apply a delta to this aggregate state fn apply_delta( &mut self, @@ -1168,25 +1556,146 @@ impl AggregateState { impl AggregateOperator { pub fn new( + operator_id: usize, group_by: Vec, aggregates: Vec, input_column_names: Vec, ) -> Self { Self { + operator_id, group_by, aggregates, input_column_names, - group_states: HashMap::new(), - group_key_values: HashMap::new(), - current_state: Delta::new(), tracker: None, + commit_state: AggregateCommitState::Idle, } } + fn eval_internal( + &mut self, + state: &mut EvalState, + cursor: &mut BTreeCursor, + ) -> Result> { + match state { + EvalState::Uninitialized => { + panic!("Cannot eval AggregateOperator with Uninitialized state"); + } + EvalState::Init { delta } => { + if delta.changes.is_empty() { + *state = EvalState::Done; + return Ok(IOResult::Done((Delta::new(), HashMap::new()))); + } + + let mut groups_to_read = BTreeMap::new(); + for (row, _weight) in &delta.changes { + // Extract group key using cloned fields + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + groups_to_read.insert(group_key_str, group_key); + } + state.advance(groups_to_read); + } + EvalState::FetchData { .. } => { + // Already in progress, continue processing on process_delta below. + } + EvalState::Done => { + panic!("unreachable state! should have returned"); + } + } + + // Process the delta through the state machine + let result = return_if_io!(state.process_delta(self, cursor)); + Ok(IOResult::Done(result)) + } + + fn merge_delta_with_existing( + &mut self, + delta: &Delta, + existing_groups: &mut HashMap, + old_values: &mut HashMap>, + ) -> (Delta, HashMap, AggregateState)>) { + let mut output_delta = Delta::new(); + let mut temp_keys: HashMap> = HashMap::new(); + + // Process each change in the delta + for (row, weight) in &delta.changes { + if let Some(tracker) = &self.tracker { + tracker.lock().unwrap().record_aggregation(); + } + + // Extract group key + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + + let state = existing_groups + .entry(group_key_str.clone()) + .or_insert_with(AggregateState::new); + + temp_keys.insert(group_key_str.clone(), group_key.clone()); + + // Apply the delta to the temporary state + state.apply_delta( + &row.values, + *weight, + &self.aggregates, + &self.input_column_names, + ); + } + + // Generate output delta from temporary states and collect final states + let mut final_states = HashMap::new(); + + for (group_key_str, state) in existing_groups { + let group_key = temp_keys.get(group_key_str).cloned().unwrap_or_default(); + + // Generate a unique rowid for this group + let result_key = self.generate_group_rowid(group_key_str); + + if let Some(old_row_values) = old_values.get(group_key_str) { + let old_row = HashableRow::new(result_key, old_row_values.clone()); + output_delta.changes.push((old_row, -1)); + } + + // Always store the state for persistence (even if count=0, we need to delete it) + final_states.insert(group_key_str.clone(), (group_key.clone(), state.clone())); + + // Only include groups with count > 0 in the output delta + if state.count > 0 { + // Build output row: group_by columns + aggregate values + let mut output_values = group_key.clone(); + output_values.extend(state.to_values(&self.aggregates)); + + let output_row = HashableRow::new(result_key, output_values); + output_delta.changes.push((output_row, 1)); + } + } + (output_delta, final_states) + } + pub fn set_tracker(&mut self, tracker: Arc>) { self.tracker = Some(tracker); } + /// Generate a rowid for a group + /// For no GROUP BY: always returns 0 + /// For GROUP BY: returns a hash of the group key string + fn generate_group_rowid(&self, group_key_str: &str) -> i64 { + if self.group_by.is_empty() { + 0 + } else { + group_key_str + .bytes() + .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)) + } + } + + /// Generate the composite key for BTree storage + /// Combines operator_id and group hash + fn generate_storage_key(&self, group_key_str: &str) -> i64 { + let group_hash = self.generate_group_rowid(group_key_str); + (self.operator_id as i64) << 32 | (group_hash & 0xFFFFFFFF) + } + /// Extract group key values from a row fn extract_group_key(&self, values: &[Value]) -> Vec { let mut key = Vec::new(); @@ -1214,215 +1723,102 @@ impl AggregateOperator { .join(",") } - /// Process a delta and update aggregate state incrementally - pub fn process_delta(&mut self, delta: Delta) -> Delta { - let mut output_delta = Delta::new(); - - // Track which groups were modified and their old values - let mut modified_groups = HashSet::new(); - let mut old_values: HashMap> = HashMap::new(); - - // Process each change in the delta - for (row, weight) in &delta.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_aggregation(); - } - - // Extract group key - let group_key = self.extract_group_key(&row.values); - let group_key_str = Self::group_key_to_string(&group_key); - - // Store old aggregate values BEFORE applying the delta - // (only for the first time we see this group in this batch) - if !modified_groups.contains(&group_key_str) { - if let Some(state) = self.group_states.get(&group_key_str) { - let mut old_row = group_key.clone(); - old_row.extend(state.to_values(&self.aggregates)); - old_values.insert(group_key_str.clone(), old_row); - } - } - - modified_groups.insert(group_key_str.clone()); - - // Store the actual group key values - self.group_key_values - .insert(group_key_str.clone(), group_key.clone()); - - // Get or create aggregate state for this group - let state = self - .group_states - .entry(group_key_str.clone()) - .or_insert_with(AggregateState::new); - - // Apply the delta to the aggregate state - state.apply_delta( - &row.values, - *weight, - &self.aggregates, - &self.input_column_names, - ); - } - - // Generate output delta for modified groups - for group_key_str in modified_groups { - // Get the actual group key values - let group_key = self - .group_key_values - .get(&group_key_str) - .cloned() - .unwrap_or_default(); - - // Generate a unique key for this group - // We use a hash of the group key to ensure consistency - let result_key = group_key_str - .bytes() - .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)); - - // Emit retraction for old value if it existed - if let Some(old_row_values) = old_values.get(&group_key_str) { - let old_row = HashableRow::new(result_key, old_row_values.clone()); - output_delta.changes.push((old_row.clone(), -1)); - // Also remove from current state - self.current_state.changes.push((old_row, -1)); - } - - if let Some(state) = self.group_states.get(&group_key_str) { - // Build output row: group_by columns + aggregate values - let mut output_values = group_key.clone(); - output_values.extend(state.to_values(&self.aggregates)); - - // Check if group should be removed (count is 0) - if state.count > 0 { - // Add to output delta with positive weight - let output_row = HashableRow::new(result_key, output_values.clone()); - output_delta.changes.push((output_row.clone(), 1)); - - // Update current state - self.current_state.changes.push((output_row, 1)); - } else { - // Group has count=0, remove from state - // (we already emitted the retraction above if needed) - self.group_states.remove(&group_key_str); - self.group_key_values.remove(&group_key_str); - } - } - } - - // Consolidate current state to handle removals - self.current_state.consolidate(); - - output_delta + fn seek_key_from_str(&self, group_key_str: &str) -> SeekKey { + // Calculate the composite key for seeking + let key_i64 = self.generate_storage_key(group_key_str); + SeekKey::TableRowId(key_i64) } - pub fn get_current_state(&self) -> &Delta { - &self.current_state + fn seek_key(&self, row: HashableRow) -> SeekKey { + // Extract group key for first row + let group_key = self.extract_group_key(&row.values); + let group_key_str = Self::group_key_to_string(&group_key); + self.seek_key_from_str(&group_key_str) } } impl IncrementalOperator for AggregateOperator { - fn initialize(&mut self, data: Delta) { - // Process all initial data - this modifies state during initialization - let _ = self.process_delta(data); + fn eval(&mut self, state: &mut EvalState, cursor: &mut BTreeCursor) -> Result> { + let (delta, _) = return_if_io!(self.eval_internal(state, cursor)); + Ok(IOResult::Done(delta)) } - fn eval(&self, delta: Delta, uncommitted: Option) -> Delta { - // Clone the current state to work with temporarily - let mut temp_group_states = self.group_states.clone(); - let mut temp_group_key_values = self.group_key_values.clone(); - - // Merge delta with uncommitted if present - let combined_delta = if let Some(uncommitted) = uncommitted { - let mut combined = delta; - combined.merge(&uncommitted); - combined - } else { - delta - }; - - let mut output_delta = Delta::new(); - let mut modified_groups = HashSet::new(); - let mut old_values: HashMap> = HashMap::new(); - - // Process each change in the combined delta using temporary state - for (row, weight) in &combined_delta.changes { - if let Some(tracker) = &self.tracker { - tracker.lock().unwrap().record_aggregation(); - } - - // Extract group key - let group_key = self.extract_group_key(&row.values); - let group_key_str = Self::group_key_to_string(&group_key); - - // Store old aggregate values BEFORE applying the delta - if !modified_groups.contains(&group_key_str) { - if let Some(state) = temp_group_states.get(&group_key_str) { - let mut old_row = group_key.clone(); - old_row.extend(state.to_values(&self.aggregates)); - old_values.insert(group_key_str.clone(), old_row); + fn commit(&mut self, delta: Delta, cursor: &mut BTreeCursor) -> Result> { + loop { + // Note: because we std::mem::replace here (without it, the borrow checker goes nuts, + // because we call self.eval_interval, which requires a mutable borrow), we have to + // restore the state if we return I/O. So we can't use return_if_io! + let mut state = + std::mem::replace(&mut self.commit_state, AggregateCommitState::Invalid); + match &mut state { + AggregateCommitState::Invalid => { + panic!("Reached invalid state! State was replaced, and not replaced back"); } - } + AggregateCommitState::Idle => { + let eval_state = EvalState::from_delta(delta.clone()); + self.commit_state = AggregateCommitState::Eval { eval_state }; + } + AggregateCommitState::Eval { ref mut eval_state } => { + let (output_delta, computed_states) = return_and_restore_if_io!( + &mut self.commit_state, + state, + self.eval_internal(eval_state, cursor) + ); + self.commit_state = AggregateCommitState::PersistDelta { + delta: output_delta, + computed_states, + current_idx: 0, + write_record: WriteRecord::new(), + }; + } + AggregateCommitState::PersistDelta { + delta, + computed_states, + current_idx, + write_record, + } => { + let states_vec: Vec<_> = computed_states.iter().collect(); - modified_groups.insert(group_key_str.clone()); - temp_group_key_values.insert(group_key_str.clone(), group_key.clone()); + if *current_idx >= states_vec.len() { + self.commit_state = AggregateCommitState::Done { + delta: delta.clone(), + }; + } else { + let (group_key_str, (group_key, agg_state)) = states_vec[*current_idx]; - // Get or create aggregate state for this group in temporary state - let state = temp_group_states - .entry(group_key_str.clone()) - .or_insert_with(AggregateState::new); + let seek_key = self.seek_key_from_str(group_key_str); - // Apply the delta to the temporary aggregate state - state.apply_delta( - &row.values, - *weight, - &self.aggregates, - &self.input_column_names, - ); - } + // Determine weight: -1 to delete (cancels existing weight=1), 1 to insert/update + let weight = if agg_state.count == 0 { -1 } else { 1 }; - // Generate output delta for modified groups using temporary state - for group_key_str in modified_groups { - let group_key = temp_group_key_values - .get(&group_key_str) - .cloned() - .unwrap_or_default(); + // Serialize the aggregate state with group key (even for deletion, we need a row) + let state_blob = agg_state.to_blob(&self.aggregates, group_key); + let blob_row = HashableRow::new(0, vec![Value::Blob(state_blob)]); - // Generate a unique key for this group - let result_key = group_key_str - .bytes() - .fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64)); + return_and_restore_if_io!( + &mut self.commit_state, + state, + write_record.write_record(seek_key, blob_row, weight, cursor) + ); - // Emit retraction for old value if it existed - if let Some(old_row_values) = old_values.get(&group_key_str) { - let old_row = HashableRow::new(result_key, old_row_values.clone()); - output_delta.changes.push((old_row, -1)); - } + let delta = std::mem::take(delta); + let computed_states = std::mem::take(computed_states); - if let Some(state) = temp_group_states.get(&group_key_str) { - // Build output row: group_by columns + aggregate values - let mut output_values = group_key.clone(); - output_values.extend(state.to_values(&self.aggregates)); - - // Check if group should be included (count > 0) - if state.count > 0 { - let output_row = HashableRow::new(result_key, output_values); - output_delta.changes.push((output_row, 1)); + self.commit_state = AggregateCommitState::PersistDelta { + delta, + computed_states, + current_idx: *current_idx + 1, + write_record: WriteRecord::new(), // Reset for next write + }; + } + } + AggregateCommitState::Done { delta } => { + self.commit_state = AggregateCommitState::Idle; + let delta = std::mem::take(delta); + return Ok(IOResult::Done(delta)); } } } - - output_delta - } - - fn commit(&mut self, delta: Delta) -> Delta { - // Actually update the internal state when committing and return the output - self.process_delta(delta) - } - - fn get_current_state(&self) -> Delta { - // Return a consolidated view of the current state - let mut consolidated = self.current_state.clone(); - consolidated.consolidate(); - consolidated } fn set_tracker(&mut self, tracker: Arc>) { @@ -1433,10 +1829,101 @@ impl IncrementalOperator for AggregateOperator { #[cfg(test)] mod tests { use super::*; + use crate::storage::pager::CreateBTreeFlags; use crate::types::Text; + use crate::util::IOExt; use crate::Value; + use crate::{Database, MemoryIO, IO}; use std::sync::{Arc, Mutex}; + /// Create a test pager for operator tests + fn create_test_pager() -> (std::rc::Rc, usize) { + let io: Arc = Arc::new(MemoryIO::new()); + let db = Database::open_file(io.clone(), ":memory:", false, false).unwrap(); + let conn = db.connect().unwrap(); + + let pager = conn.pager.borrow().clone(); + + // Allocate page 1 first (database header) + let _ = pager.io.block(|| pager.allocate_page1()); + + // Properly create a BTree for aggregate state using the pager API + let root_page_id = pager + .io + .block(|| pager.btree_create(&CreateBTreeFlags::new_table())) + .expect("Failed to create BTree for aggregate state") + as usize; + + (pager, root_page_id) + } + + /// Read the current state from the BTree (for testing) + /// Returns a Delta with all the current aggregate values + fn get_current_state_from_btree( + agg: &AggregateOperator, + pager: &std::rc::Rc, + cursor: &mut BTreeCursor, + ) -> Delta { + let mut result = Delta::new(); + + // Rewind to start of table + pager.io.block(|| cursor.rewind()).unwrap(); + + loop { + // Check if cursor is empty (no more rows) + if cursor.is_empty() { + break; + } + + // Get the record at this position + let record = pager + .io + .block(|| cursor.record()) + .unwrap() + .unwrap() + .to_owned(); + + let values_ref = record.get_values(); + let values: Vec = values_ref.into_iter().map(|x| x.to_owned()).collect(); + + // Check if this record belongs to our operator + if let Some(Value::Integer(key)) = values.first() { + let operator_part = (key >> 32) as usize; + + // Skip if not our operator + if operator_part != agg.operator_id { + pager.io.block(|| cursor.next()).unwrap(); + continue; + } + + // Get the blob data + if let Some(Value::Blob(blob)) = values.get(1) { + // Deserialize the state + if let Some((state, group_key)) = + AggregateState::from_blob(blob, &agg.aggregates) + { + // Should not have made it this far. + assert!(state.count != 0); + // Build output row: group_by columns + aggregate values + let mut output_values = group_key.clone(); + output_values.extend(state.to_values(&agg.aggregates)); + + let group_key_str = AggregateOperator::group_key_to_string(&group_key); + let rowid = agg.generate_group_rowid(&group_key_str); + + let output_row = HashableRow::new(rowid, output_values); + result.changes.push((output_row, 1)); + } + } + } + + pager.io.block(|| cursor.next()).unwrap(); + } + + result.consolidate(); + result + } + /// Assert that we're doing incremental work, not full recomputation fn assert_incremental(tracker: &ComputationTracker, expected_ops: usize, data_size: usize) { assert!( @@ -1464,8 +1951,13 @@ mod tests { // the operator emits both a retraction (-1) of the old value // and an insertion (+1) of the new value. + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + // Create an aggregate operator for SUM(age) with no GROUP BY let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec![], // No GROUP BY vec![AggregateFunction::Sum("age".to_string())], vec!["id".to_string(), "name".to_string(), "age".to_string()], @@ -1499,10 +1991,13 @@ mod tests { ); // Initialize with initial data - agg.initialize(initial_delta); + pager + .io + .block(|| agg.commit(initial_delta.clone(), &mut cursor)) + .unwrap(); // Verify initial state: SUM(age) = 25 + 30 + 35 = 90 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 1, "Should have one aggregate row"); let (row, weight) = &state.changes[0]; assert_eq!(*weight, 1, "Aggregate row should have weight 1"); @@ -1520,8 +2015,10 @@ mod tests { ); // Process the incremental update - let output_delta = agg.eval(update_delta.clone(), None); - agg.commit(update_delta); + let output_delta = pager + .io + .block(|| agg.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // CRITICAL: The output delta should contain TWO changes: // 1. Retraction of old aggregate value (90) with weight -1 @@ -1568,7 +2065,12 @@ mod tests { // the operator emits both retractions and insertions correctly for each group. // Create an aggregate operator for SUM(score) GROUP BY team + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["team".to_string()], // GROUP BY team vec![AggregateFunction::Sum("score".to_string())], vec![ @@ -1610,10 +2112,13 @@ mod tests { ); // Initialize with initial data - agg.initialize(initial_delta); + pager + .io + .block(|| agg.commit(initial_delta.clone(), &mut cursor)) + .unwrap(); // Verify initial state: red team = 30, blue team = 15 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2, "Should have two groups"); // Find the red and blue team aggregates @@ -1653,8 +2158,10 @@ mod tests { ); // Process the incremental update - let output_delta = agg.eval(update_delta.clone(), None); - agg.commit(update_delta); + let output_delta = pager + .io + .block(|| agg.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // Should have 2 changes: retraction of old red team sum, insertion of new red team sum // Blue team should NOT be affected @@ -1703,8 +2210,13 @@ mod tests { fn test_count_increments_not_recounts() { let tracker = Arc::new(Mutex::new(ComputationTracker::new())); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + // Create COUNT(*) GROUP BY category let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![AggregateFunction::Count], vec![ @@ -1728,7 +2240,10 @@ mod tests { ], ); } - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Reset tracker for delta processing tracker.lock().unwrap().aggregation_updates = 0; @@ -1744,15 +2259,15 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); - // Should update one group (cat_0) twice - once in eval, once in commit - // This is still incremental - we're not recounting all groups - assert_eq!(tracker.lock().unwrap().aggregation_updates, 2); + assert_eq!(tracker.lock().unwrap().aggregation_updates, 1); // Check the final state - cat_0 should now have count 11 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_0 = final_state .changes .iter() @@ -1770,7 +2285,12 @@ mod tests { let tracker = Arc::new(Mutex::new(ComputationTracker::new())); // Create SUM(amount) GROUP BY product + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["product".to_string()], vec![AggregateFunction::Sum("amount".to_string())], vec![ @@ -1807,10 +2327,13 @@ mod tests { Value::Integer(150), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state: Widget=250, Gadget=200 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let widget_sum = state .changes .iter() @@ -1833,14 +2356,15 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); - // Should update Widget group twice (once in eval, once in commit) - assert_eq!(tracker.lock().unwrap().aggregation_updates, 2); + assert_eq!(tracker.lock().unwrap().aggregation_updates, 1); // Check final state - Widget should now be 300 (250 + 50) - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let widget = final_state .changes .iter() @@ -1852,7 +2376,12 @@ mod tests { #[test] fn test_count_and_sum_together() { // Test the example from DBSP_ROADMAP: COUNT(*) and SUM(amount) GROUP BY user_id + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["user_id".to_string()], vec![ AggregateFunction::Count, @@ -1879,12 +2408,15 @@ mod tests { 3, vec![Value::Integer(3), Value::Integer(2), Value::Integer(150)], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state // User 1: count=2, sum=300 // User 2: count=1, sum=150 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2); let user1 = state @@ -1911,11 +2443,13 @@ mod tests { 4, vec![Value::Integer(4), Value::Integer(1), Value::Integer(50)], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - user 1 should have updated count and sum - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let user1 = final_state .changes .iter() @@ -1928,7 +2462,12 @@ mod tests { #[test] fn test_avg_maintains_sum_and_count() { // Test AVG aggregation + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![AggregateFunction::Avg("value".to_string())], vec![ @@ -1964,12 +2503,15 @@ mod tests { Value::Integer(30), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial averages // Category A: avg = (10 + 20) / 2 = 15 // Category B: avg = 30 / 1 = 30 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = state .changes .iter() @@ -1996,11 +2538,13 @@ mod tests { Value::Integer(30), ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - Category A avg should now be (10 + 20 + 30) / 3 = 20 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = final_state .changes .iter() @@ -2012,7 +2556,12 @@ mod tests { #[test] fn test_delete_updates_aggregates() { // Test that deletes (negative weights) properly update aggregates + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![ AggregateFunction::Count, @@ -2043,10 +2592,13 @@ mod tests { Value::Integer(200), ], ); - agg.initialize(initial); + pager + .io + .block(|| agg.commit(initial.clone(), &mut cursor)) + .unwrap(); // Check initial state: count=2, sum=300 - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert!(!state.changes.is_empty()); let (row, _weight) = &state.changes[0]; assert_eq!(row.values[1], Value::Integer(2)); // count @@ -2063,11 +2615,13 @@ mod tests { ], ); - let _output = agg.eval(delta.clone(), None); - agg.commit(delta); + pager + .io + .block(|| agg.commit(delta.clone(), &mut cursor)) + .unwrap(); // Check final state - should update to count=1, sum=200 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let cat_a = final_state .changes .iter() @@ -2083,17 +2637,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(10)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(30)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial counts - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2); // Find group A and B @@ -2115,14 +2681,16 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(1, vec![Value::Text("A".into()), Value::Integer(10)]); - let output = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + let output = pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Should emit retraction for old count and insertion for new count assert_eq!(output.changes.len(), 2); // Check final state - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a_final = final_state .changes .iter() @@ -2134,13 +2702,15 @@ mod tests { let mut delete_all_b = Delta::new(); delete_all_b.delete(3, vec![Value::Text("B".into()), Value::Integer(30)]); - let output_b = agg.eval(delete_all_b.clone(), None); - agg.commit(delete_all_b); + let output_b = pager + .io + .block(|| agg.commit(delete_all_b.clone(), &mut cursor)) + .unwrap(); assert_eq!(output_b.changes.len(), 1); // Only retraction, no new row assert_eq!(output_b.changes[0].1, -1); // Retraction // Final state should not have group B - let final_state2 = agg.get_current_state(); + let final_state2 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state2.changes.len(), 1); // Only group A remains assert_eq!(final_state2.changes[0].0.values[0], Value::Text("A".into())); } @@ -2151,7 +2721,16 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); @@ -2159,10 +2738,13 @@ mod tests { init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(30)]); init_data.insert(4, vec![Value::Text("B".into()), Value::Integer(15)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial sums - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2181,11 +2763,13 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(2, vec![Value::Text("A".into()), Value::Integer(20)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check updated sum - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2198,11 +2782,13 @@ mod tests { delete_all_b.delete(3, vec![Value::Text("B".into()), Value::Integer(30)]); delete_all_b.delete(4, vec![Value::Text("B".into()), Value::Integer(15)]); - let _ = agg.eval(delete_all_b.clone(), None); - agg.commit(delete_all_b); + pager + .io + .block(|| agg.commit(delete_all_b.clone(), &mut cursor)) + .unwrap(); // Group B should be gone - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state.changes.len(), 1); // Only group A remains assert_eq!(final_state.changes[0].0.values[0], Value::Text("A".into())); } @@ -2213,17 +2799,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(10)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(20)]); init_data.insert(3, vec![Value::Text("A".into()), Value::Integer(30)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial average - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.values[1], Value::Float(20.0)); // AVG = (10+20+30)/3 = 20 @@ -2231,21 +2829,25 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(2, vec![Value::Text("A".into()), Value::Integer(20)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check updated average - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes[0].0.values[1], Value::Float(20.0)); // AVG = (10+30)/2 = 20 (same!) // Delete another to change the average let mut delete_another = Delta::new(); delete_another.delete(3, vec![Value::Text("A".into()), Value::Integer(30)]); - let _ = agg.eval(delete_another.clone(), None); - agg.commit(delete_another); + pager + .io + .block(|| agg.commit(delete_another.clone(), &mut cursor)) + .unwrap(); - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes[0].0.values[1], Value::Float(10.0)); // AVG = 10/1 = 10 } @@ -2260,17 +2862,29 @@ mod tests { let group_by = vec!["category".to_string()]; let input_columns = vec!["category".to_string(), "value".to_string()]; - let mut agg = AggregateOperator::new(group_by, aggregates.clone(), input_columns); + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + + let mut agg = AggregateOperator::new( + 1, // operator_id for testing + group_by, + aggregates.clone(), + input_columns, + ); // Initialize with data let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Text("A".into()), Value::Integer(100)]); init_data.insert(2, vec![Value::Text("A".into()), Value::Integer(200)]); init_data.insert(3, vec![Value::Text("B".into()), Value::Integer(50)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2285,11 +2899,13 @@ mod tests { let mut delete_delta = Delta::new(); delete_delta.delete(1, vec![Value::Text("A".into()), Value::Integer(100)]); - let _ = agg.eval(delete_delta.clone(), None); - agg.commit(delete_delta); + pager + .io + .block(|| agg.commit(delete_delta.clone(), &mut cursor)) + .unwrap(); // Check all aggregates updated correctly - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2304,10 +2920,12 @@ mod tests { let mut insert_delta = Delta::new(); insert_delta.insert(4, vec![Value::Text("A".into()), Value::Float(50.5)]); - let _ = agg.eval(insert_delta.clone(), None); - agg.commit(insert_delta); + pager + .io + .block(|| agg.commit(insert_delta.clone(), &mut cursor)) + .unwrap(); - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); let group_a = state .changes .iter() @@ -2324,6 +2942,10 @@ mod tests { // When a row's rowid changes (e.g., UPDATE t SET a=1 WHERE a=3 on INTEGER PRIMARY KEY), // the operator should properly consolidate the state + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut filter = FilterOperator::new( FilterPredicate::GreaterThan { column: "b".to_string(), @@ -2335,10 +2957,12 @@ mod tests { // Initialize with a row (rowid=3, values=[3, 3]) let mut init_data = Delta::new(); init_data.insert(3, vec![Value::Integer(3), Value::Integer(3)]); - filter.initialize(init_data); + let state = pager + .io + .block(|| filter.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state - let state = filter.get_current_state(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 3); assert_eq!( @@ -2352,29 +2976,15 @@ mod tests { update_delta.delete(3, vec![Value::Integer(3), Value::Integer(3)]); update_delta.insert(1, vec![Value::Integer(1), Value::Integer(3)]); - let output = filter.eval(update_delta.clone(), None); - filter.commit(update_delta); + let output = pager + .io + .block(|| filter.commit(update_delta.clone(), &mut cursor)) + .unwrap(); // The output delta should have both changes (both pass the filter b > 2) assert_eq!(output.changes.len(), 2); assert_eq!(output.changes[0].1, -1); // delete weight assert_eq!(output.changes[1].1, 1); // insert weight - - // The current state should be consolidated to only have rows with positive weight - let final_state = filter.get_current_state(); - - // After consolidation, we should have only one row with rowid=1 - assert_eq!( - final_state.changes.len(), - 1, - "State should be consolidated to have only one row" - ); - assert_eq!(final_state.changes[0].0.rowid, 1); - assert_eq!( - final_state.changes[0].0.values, - vec![Value::Integer(1), Value::Integer(3)] - ); - assert_eq!(final_state.changes[0].1, 1); // positive weight } // ============================================================================ @@ -2388,6 +2998,10 @@ mod tests { #[test] fn test_filter_eval_with_uncommitted() { + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut filter = FilterOperator::new( FilterPredicate::GreaterThan { column: "age".to_string(), @@ -2414,10 +3028,12 @@ mod tests { Value::Integer(20), ], ); - filter.initialize(init_data); + let state = pager + .io + .block(|| filter.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Verify initial state (only Alice passes filter) - let state = filter.get_current_state(); assert_eq!(state.changes.len(), 1); assert_eq!(state.changes[0].0.rowid, 1); @@ -2441,7 +3057,11 @@ mod tests { ); // Eval with uncommitted - should return filtered uncommitted rows - let result = filter.eval(Delta::new(), Some(uncommitted.clone())); + let mut eval_state = uncommitted.clone().into(); + let result = pager + .io + .block(|| filter.eval(&mut eval_state, &mut cursor)) + .unwrap(); assert_eq!( result.changes.len(), 1, @@ -2449,23 +3069,16 @@ mod tests { ); assert_eq!(result.changes[0].0.rowid, 3); - // Verify state hasn't changed - let state_after_eval = filter.get_current_state(); - assert_eq!( - state_after_eval.changes.len(), - 1, - "State should still only have Alice" - ); - assert_eq!(state_after_eval.changes[0].0.rowid, 1); - // Now commit the changes - filter.commit(uncommitted); + let state = pager + .io + .block(|| filter.commit(uncommitted.clone(), &mut cursor)) + .unwrap(); // State should now include Charlie (who passes filter) - let final_state = filter.get_current_state(); assert_eq!( - final_state.changes.len(), - 2, + state.changes.len(), + 1, "State should now have Alice and Charlie" ); } @@ -2473,7 +3086,12 @@ mod tests { #[test] fn test_aggregate_eval_with_uncommitted_preserves_state() { // This is the critical test - aggregations must not modify internal state during eval + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["category".to_string()], vec![ AggregateFunction::Count, @@ -2512,10 +3130,13 @@ mod tests { Value::Integer(150), ], ); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Check initial state: A -> (count=2, sum=300), B -> (count=1, sum=150) - let initial_state = agg.get_current_state(); + let initial_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(initial_state.changes.len(), 2); // Store initial state for comparison @@ -2547,7 +3168,11 @@ mod tests { ); // Eval with uncommitted should return the delta (changes to aggregates) - let result = agg.eval(Delta::new(), Some(uncommitted.clone())); + let mut eval_state = uncommitted.clone().into(); + let result = pager + .io + .block(|| agg.eval(&mut eval_state, &mut cursor)) + .unwrap(); // Result should contain updates for A and new group C // For A: retraction of old (2, 300) and insertion of new (3, 350) @@ -2555,7 +3180,7 @@ mod tests { assert!(!result.changes.is_empty(), "Should have aggregate changes"); // CRITICAL: Verify internal state hasn't changed - let state_after_eval = agg.get_current_state(); + let state_after_eval = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!( state_after_eval.changes.len(), 2, @@ -2579,10 +3204,13 @@ mod tests { ); // Now commit the changes - agg.commit(uncommitted); + pager + .io + .block(|| agg.commit(uncommitted.clone(), &mut cursor)) + .unwrap(); // State should now be updated - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(final_state.changes.len(), 3, "Should now have A, B, and C"); let a_final = final_state @@ -2622,7 +3250,12 @@ mod tests { fn test_aggregate_eval_multiple_times_without_commit() { // Test that calling eval multiple times with different uncommitted data // doesn't pollute the internal state + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec![], // No GROUP BY vec![ AggregateFunction::Count, @@ -2635,10 +3268,13 @@ mod tests { let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Integer(1), Value::Integer(100)]); init_data.insert(2, vec![Value::Integer(2), Value::Integer(200)]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Initial state: count=2, sum=300 - let initial_state = agg.get_current_state(); + let initial_state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(initial_state.changes.len(), 1); assert_eq!(initial_state.changes[0].0.values[0], Value::Integer(2)); assert_eq!(initial_state.changes[0].0.values[1], Value::Float(300.0)); @@ -2646,10 +3282,14 @@ mod tests { // First eval with uncommitted let mut uncommitted1 = Delta::new(); uncommitted1.insert(3, vec![Value::Integer(3), Value::Integer(50)]); - let _ = agg.eval(Delta::new(), Some(uncommitted1)); + let mut eval_state1 = uncommitted1.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state1, &mut cursor)) + .unwrap(); // State should be unchanged - let state1 = agg.get_current_state(); + let state1 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state1.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state1.changes[0].0.values[1], Value::Float(300.0)); @@ -2657,20 +3297,28 @@ mod tests { let mut uncommitted2 = Delta::new(); uncommitted2.insert(4, vec![Value::Integer(4), Value::Integer(75)]); uncommitted2.insert(5, vec![Value::Integer(5), Value::Integer(25)]); - let _ = agg.eval(Delta::new(), Some(uncommitted2)); + let mut eval_state2 = uncommitted2.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state2, &mut cursor)) + .unwrap(); // State should STILL be unchanged - let state2 = agg.get_current_state(); + let state2 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state2.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state2.changes[0].0.values[1], Value::Float(300.0)); // Third eval with deletion as uncommitted let mut uncommitted3 = Delta::new(); uncommitted3.delete(1, vec![Value::Integer(1), Value::Integer(100)]); - let _ = agg.eval(Delta::new(), Some(uncommitted3)); + let mut eval_state3 = uncommitted3.clone().into(); + let _ = pager + .io + .block(|| agg.eval(&mut eval_state3, &mut cursor)) + .unwrap(); // State should STILL be unchanged - let state3 = agg.get_current_state(); + let state3 = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state3.changes[0].0.values[0], Value::Integer(2)); assert_eq!(state3.changes[0].0.values[1], Value::Float(300.0)); } @@ -2678,7 +3326,12 @@ mod tests { #[test] fn test_aggregate_eval_with_mixed_committed_and_uncommitted() { // Test eval with both committed delta and uncommitted changes + // Create a persistent pager for the test + let (pager, root_page_id) = create_test_pager(); + let mut cursor = BTreeCursor::new_table(None, pager.clone(), root_page_id, 10); + let mut agg = AggregateOperator::new( + 1, // operator_id for testing vec!["type".to_string()], vec![AggregateFunction::Count], vec!["id".to_string(), "type".to_string()], @@ -2688,7 +3341,10 @@ mod tests { let mut init_data = Delta::new(); init_data.insert(1, vec![Value::Integer(1), Value::Text("X".into())]); init_data.insert(2, vec![Value::Integer(2), Value::Text("Y".into())]); - agg.initialize(init_data); + pager + .io + .block(|| agg.commit(init_data.clone(), &mut cursor)) + .unwrap(); // Create a committed delta (to be processed) let mut committed_delta = Delta::new(); @@ -2700,20 +3356,76 @@ mod tests { uncommitted.insert(5, vec![Value::Integer(5), Value::Text("Z".into())]); // Eval with both - should process both but not commit - let result = agg.eval(committed_delta.clone(), Some(uncommitted)); + let mut combined = committed_delta.clone(); + combined.merge(&uncommitted); + let mut eval_state = combined.clone().into(); + let result = pager + .io + .block(|| agg.eval(&mut eval_state, &mut cursor)) + .unwrap(); // Result should reflect changes from both - assert!(!result.changes.is_empty()); + assert!(!result.changes.is_empty(), "Result should not be empty"); + + // Verify the DBSP pattern: retraction (-1) followed by insertion (1) for updates, + // and just insertion (1) for new groups + + // We expect exactly 5 changes: + // - X: retraction + insertion (was 1, now 2) + // - Y: retraction + insertion (was 1, now 2) + // - Z: insertion only (new group with count 1) + assert_eq!( + result.changes.len(), + 5, + "Should have 5 changes (2 retractions + 3 insertions)" + ); + + // Sort by group name then by weight to get predictable order + let mut sorted_changes: Vec<_> = result.changes.iter().collect(); + sorted_changes.sort_by(|a, b| { + let a_group = &a.0.values[0]; + let b_group = &b.0.values[0]; + match a_group.partial_cmp(b_group).unwrap() { + std::cmp::Ordering::Equal => a.1.cmp(&b.1), // Sort by weight if same group + other => other, + } + }); + + // Check X group: should have retraction (-1) for count=1, then insertion (1) for count=2 + assert_eq!(sorted_changes[0].0.values[0], Value::Text("X".into())); + assert_eq!(sorted_changes[0].0.values[1], Value::Integer(1)); // old count + assert_eq!(sorted_changes[0].1, -1); // retraction + + assert_eq!(sorted_changes[1].0.values[0], Value::Text("X".into())); + assert_eq!(sorted_changes[1].0.values[1], Value::Integer(2)); // new count + assert_eq!(sorted_changes[1].1, 1); // insertion + + // Check Y group: should have retraction (-1) for count=1, then insertion (1) for count=2 + assert_eq!(sorted_changes[2].0.values[0], Value::Text("Y".into())); + assert_eq!(sorted_changes[2].0.values[1], Value::Integer(1)); // old count + assert_eq!(sorted_changes[2].1, -1); // retraction + + assert_eq!(sorted_changes[3].0.values[0], Value::Text("Y".into())); + assert_eq!(sorted_changes[3].0.values[1], Value::Integer(2)); // new count + assert_eq!(sorted_changes[3].1, 1); // insertion + + // Check Z group: should only have insertion (1) for count=1 (new group) + assert_eq!(sorted_changes[4].0.values[0], Value::Text("Z".into())); + assert_eq!(sorted_changes[4].0.values[1], Value::Integer(1)); // new count + assert_eq!(sorted_changes[4].1, 1); // insertion only (no retraction as it's new); // But internal state should be unchanged - let state = agg.get_current_state(); + let state = get_current_state_from_btree(&agg, &pager, &mut cursor); assert_eq!(state.changes.len(), 2, "Should still have only X and Y"); // Now commit only the committed_delta - agg.commit(committed_delta); + pager + .io + .block(|| agg.commit(committed_delta.clone(), &mut cursor)) + .unwrap(); // State should now have X count=2, Y count=1 - let final_state = agg.get_current_state(); + let final_state = get_current_state_from_btree(&agg, &pager, &mut cursor); let x = final_state .changes .iter() diff --git a/core/incremental/view.rs b/core/incremental/view.rs index e7ba76980..b15faf847 100644 --- a/core/incremental/view.rs +++ b/core/incremental/view.rs @@ -1,13 +1,16 @@ use super::compiler::{DbspCircuit, DbspCompiler, DeltaSet}; -use super::dbsp::{RowKeyStream, RowKeyZSet}; -use super::operator::{ComputationTracker, Delta, FilterPredicate}; +use super::dbsp::Delta; +use super::operator::{ComputationTracker, FilterPredicate}; use crate::schema::{BTreeTable, Column, Schema}; +use crate::storage::btree::BTreeCursor; use crate::translate::logical::LogicalPlanBuilder; -use crate::types::{IOCompletions, IOResult, Value}; +use crate::types::{IOResult, Value}; use crate::util::extract_view_columns; -use crate::{io_yield_one, Completion, LimboError, Result, Statement}; -use std::collections::{BTreeMap, HashMap}; +use crate::{return_if_io, LimboError, Pager, Result, Statement}; +use std::cell::RefCell; +use std::collections::HashMap; use std::fmt; +use std::rc::Rc; use std::sync::{Arc, Mutex}; use turso_parser::ast; use turso_parser::{ @@ -23,18 +26,26 @@ pub enum PopulateState { Processing { stmt: Box, rows_processed: usize, + /// If we're in the middle of processing a row (merge_delta returned I/O) + pending_row: Option<(i64, Vec)>, // (rowid, values) }, /// Population complete Done, } +/// State machine for merge_delta to handle I/O operations impl fmt::Debug for PopulateState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { PopulateState::Start => write!(f, "Start"), - PopulateState::Processing { rows_processed, .. } => f + PopulateState::Processing { + rows_processed, + pending_row, + .. + } => f .debug_struct("Processing") .field("rows_processed", rows_processed) + .field("has_pending", &pending_row.is_some()) .finish(), PopulateState::Done => write!(f, "Done"), } @@ -45,11 +56,95 @@ impl fmt::Debug for PopulateState { #[derive(Debug, Clone, Default)] pub struct ViewTransactionState { // Per-connection delta for uncommitted changes (contains both weights and values) - pub delta: Delta, + // Using RefCell for interior mutability + delta: RefCell, } -/// Incremental view that maintains a stream of row keys using DBSP-style computation -/// The actual row data is stored as transformed Values +impl ViewTransactionState { + /// Create a new transaction state + pub fn new() -> Self { + Self { + delta: RefCell::new(Delta::new()), + } + } + + /// Insert a row into the delta + pub fn insert(&self, key: i64, values: Vec) { + self.delta.borrow_mut().insert(key, values); + } + + /// Delete a row from the delta + pub fn delete(&self, key: i64, values: Vec) { + self.delta.borrow_mut().delete(key, values); + } + + /// Clear all changes in the delta + pub fn clear(&self) { + self.delta.borrow_mut().changes.clear(); + } + + /// Get a clone of the current delta + pub fn get_delta(&self) -> Delta { + self.delta.borrow().clone() + } + + /// Check if the delta is empty + pub fn is_empty(&self) -> bool { + self.delta.borrow().is_empty() + } + + /// Returns how many elements exist in the delta. + pub fn len(&self) -> usize { + self.delta.borrow().len() + } +} + +/// Container for all view transaction states within a connection +/// Provides interior mutability for the map of view states +#[derive(Debug, Clone, Default)] +pub struct AllViewsTxState { + states: Rc>>>, +} + +impl AllViewsTxState { + /// Create a new container for view transaction states + pub fn new() -> Self { + Self { + states: Rc::new(RefCell::new(HashMap::new())), + } + } + + /// Get or create a transaction state for a view + pub fn get_or_create(&self, view_name: &str) -> Rc { + let mut states = self.states.borrow_mut(); + states + .entry(view_name.to_string()) + .or_insert_with(|| Rc::new(ViewTransactionState::new())) + .clone() + } + + /// Get a transaction state for a view if it exists + pub fn get(&self, view_name: &str) -> Option> { + self.states.borrow().get(view_name).cloned() + } + + /// Clear all transaction states + pub fn clear(&self) { + self.states.borrow_mut().clear(); + } + + /// Check if there are no transaction states + pub fn is_empty(&self) -> bool { + self.states.borrow().is_empty() + } + + /// Get all view names that have transaction states + pub fn get_view_names(&self) -> Vec { + self.states.borrow().keys().cloned().collect() + } +} + +/// Incremental view that maintains its state through a DBSP circuit /// /// This version keeps everything in-memory. This is acceptable for small views, since DBSP /// doesn't have to track the history of changes. Still for very large views (think of the result @@ -62,12 +157,7 @@ pub struct ViewTransactionState { /// Uses DBSP circuits for incremental computation. #[derive(Debug)] pub struct IncrementalView { - // Stream of row keys for this view - stream: RowKeyStream, name: String, - // Store the actual row data as Values, keyed by row_key - // Using BTreeMap for ordered iteration - pub records: BTreeMap>, // WHERE clause predicate for filtering (kept for compatibility) pub where_predicate: FilterPredicate, // The SELECT statement that defines how to transform input data @@ -75,8 +165,6 @@ pub struct IncrementalView { // DBSP circuit that encapsulates the computation circuit: DbspCircuit, - // Track whether circuit has been initialized with data - circuit_initialized: bool, // Tables referenced by this view (extracted from FROM clause and JOINs) base_table: Arc, @@ -88,6 +176,8 @@ pub struct IncrementalView { // We will use this one day to export rows_read, but for now, will just test that we're doing the expected amount of compute #[cfg_attr(not(test), allow(dead_code))] pub tracker: Arc>, + // Root page of the btree storing the materialized state (0 for unmaterialized) + root_page: usize, } impl IncrementalView { @@ -110,6 +200,8 @@ impl IncrementalView { select: &ast::Select, schema: &Schema, _base_table: &Arc, + main_data_root: usize, + internal_state_root: usize, ) -> Result { // Build the logical plan from the SELECT statement let mut builder = LogicalPlanBuilder::new(schema); @@ -117,8 +209,8 @@ impl IncrementalView { let stmt = ast::Stmt::Select(select.clone()); let logical_plan = builder.build_statement(&stmt)?; - // Compile the logical plan to a DBSP circuit - let compiler = DbspCompiler::new(); + // Compile the logical plan to a DBSP circuit with the storage roots + let compiler = DbspCompiler::new(main_data_root, internal_state_root); let circuit = compiler.compile(&logical_plan)?; Ok(circuit) @@ -145,7 +237,37 @@ impl IncrementalView { false } - pub fn from_sql(sql: &str, schema: &Schema) -> Result { + /// Validate a SELECT statement and extract the columns it would produce + /// This is used during CREATE MATERIALIZED VIEW to validate the view before storing it + pub fn validate_and_extract_columns( + select: &ast::Select, + schema: &Schema, + ) -> Result> { + // For now, just extract columns from a simple select + // This will need to be expanded to handle joins, aggregates, etc. + + // Get the base table name + let base_table_name = Self::extract_base_table(select).ok_or_else(|| { + LimboError::ParseError("Cannot extract base table from SELECT".to_string()) + })?; + + // Get the table from schema + let table = schema + .get_table(&base_table_name) + .and_then(|t| t.btree()) + .ok_or_else(|| LimboError::ParseError(format!("Table {base_table_name} not found")))?; + + // For now, return all columns from the base table + // In the future, this should parse the select list and handle projections + Ok(table.columns.clone()) + } + + pub fn from_sql( + sql: &str, + schema: &Schema, + main_data_root: usize, + internal_state_root: usize, + ) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next_cmd()?; let cmd = cmd.expect("View is an empty statement"); @@ -155,7 +277,13 @@ impl IncrementalView { view_name, columns: _, select, - }) => IncrementalView::from_stmt(view_name, select, schema), + }) => IncrementalView::from_stmt( + view_name, + select, + schema, + main_data_root, + internal_state_root, + ), _ => Err(LimboError::ParseError(format!( "View is not a CREATE MATERIALIZED VIEW statement: {sql}" ))), @@ -166,6 +294,8 @@ impl IncrementalView { view_name: ast::QualifiedName, select: ast::Select, schema: &Schema, + main_data_root: usize, + internal_state_root: usize, ) -> Result { let name = view_name.name.as_str().to_string(); @@ -203,9 +333,12 @@ impl IncrementalView { base_table, view_columns, schema, + main_data_root, + internal_state_root, ) } + #[allow(clippy::too_many_arguments)] pub fn new( name: String, where_predicate: FilterPredicate, @@ -213,30 +346,31 @@ impl IncrementalView { base_table: Arc, columns: Vec, schema: &Schema, + main_data_root: usize, + internal_state_root: usize, ) -> Result { - let records = BTreeMap::new(); - // Create the tracker that will be shared by all operators let tracker = Arc::new(Mutex::new(ComputationTracker::new())); // Compile the SELECT statement into a DBSP circuit - let circuit = Self::try_compile_circuit(&select_stmt, schema, &base_table)?; - - // Circuit will be initialized when we first call merge_delta - let circuit_initialized = false; + let circuit = Self::try_compile_circuit( + &select_stmt, + schema, + &base_table, + main_data_root, + internal_state_root, + )?; Ok(Self { - stream: RowKeyStream::from_zset(RowKeyZSet::new()), name, - records, where_predicate, select_stmt, circuit, - circuit_initialized, base_table, columns, populate_state: PopulateState::Start, tracker, + root_page: main_data_root, }) } @@ -244,6 +378,29 @@ impl IncrementalView { &self.name } + pub fn base_table(&self) -> &Arc { + &self.base_table + } + + /// Execute the circuit with uncommitted changes to get processed delta + pub fn execute_with_uncommitted( + &mut self, + uncommitted: DeltaSet, + pager: Rc, + execute_state: &mut crate::incremental::compiler::ExecuteState, + ) -> crate::Result> { + // Initialize execute_state with the input data + *execute_state = crate::incremental::compiler::ExecuteState::Init { + input_data: uncommitted, + }; + self.circuit.execute(pager, execute_state) + } + + /// Get the root page for this materialized view's btree + pub fn get_root_page(&self) -> usize { + self.root_page + } + /// Get all table names referenced by this view pub fn get_referenced_table_names(&self) -> Vec { vec![self.base_table.name.clone()] @@ -348,132 +505,189 @@ impl IncrementalView { /// Populate the view by scanning the source table using a state machine /// This can be called multiple times and will resume from where it left off + /// This method is only for materialized views and will persist data to the btree pub fn populate_from_table( &mut self, conn: &std::sync::Arc, + pager: &std::rc::Rc, + _btree_cursor: &mut BTreeCursor, ) -> crate::Result> { // If already populated, return immediately if matches!(self.populate_state, PopulateState::Done) { return Ok(IOResult::Done(())); } - const BATCH_SIZE: usize = 100; // Process 100 rows at a time before yielding + // Assert that this is a materialized view with a root page + assert!( + self.root_page != 0, + "populate_from_table should only be called for materialized views with root_page" + ); loop { - match &mut self.populate_state { - PopulateState::Start => { - // Generate the SQL query for populating the view - // It is best to use a standard query than a cursor for two reasons: - // 1) Using a sql query will allow us to be much more efficient in cases where we only want - // some rows, in particular for indexed filters - // 2) There are two types of cursors: index and table. In some situations (like for example - // if the table has an integer primary key), the key will be exclusively in the index - // btree and not in the table btree. Using cursors would force us to be aware of this - // distinction (and others), and ultimately lead to reimplementing the whole query - // machinery (next step is which index is best to use, etc) - let query = self.sql_for_populate()?; + // To avoid borrow checker issues, we need to handle state transitions carefully + let needs_start = matches!(self.populate_state, PopulateState::Start); - // Prepare the statement - let stmt = conn.prepare(&query)?; + if needs_start { + // Generate the SQL query for populating the view + // It is best to use a standard query than a cursor for two reasons: + // 1) Using a sql query will allow us to be much more efficient in cases where we only want + // some rows, in particular for indexed filters + // 2) There are two types of cursors: index and table. In some situations (like for example + // if the table has an integer primary key), the key will be exclusively in the index + // btree and not in the table btree. Using cursors would force us to be aware of this + // distinction (and others), and ultimately lead to reimplementing the whole query + // machinery (next step is which index is best to use, etc) + let query = self.sql_for_populate()?; - self.populate_state = PopulateState::Processing { - stmt: Box::new(stmt), - rows_processed: 0, - }; - // Continue to next state + // Prepare the statement + let stmt = conn.prepare(&query)?; + + self.populate_state = PopulateState::Processing { + stmt: Box::new(stmt), + rows_processed: 0, + pending_row: None, + }; + // Continue to next state + continue; + } + + // Handle Done state + if matches!(self.populate_state, PopulateState::Done) { + return Ok(IOResult::Done(())); + } + + // Handle Processing state - extract state to avoid borrow issues + let (mut stmt, mut rows_processed, pending_row) = + match std::mem::replace(&mut self.populate_state, PopulateState::Done) { + PopulateState::Processing { + stmt, + rows_processed, + pending_row, + } => (stmt, rows_processed, pending_row), + _ => unreachable!("We already handled Start and Done states"), + }; + + // If we have a pending row from a previous I/O interruption, process it first + if let Some((rowid, values)) = pending_row { + // Create a single-row delta for the pending row + let mut single_row_delta = Delta::new(); + single_row_delta.insert(rowid, values.clone()); + + // Process the pending row with the pager + match self.merge_delta(&single_row_delta, pager.clone())? { + IOResult::Done(_) => { + // Row processed successfully, continue to next row + rows_processed += 1; + // Continue to fetch next row from statement + } + IOResult::IO(io) => { + // Still not done, save state with pending row + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: Some((rowid, values)), // Keep the pending row + }; + return Ok(IOResult::IO(io)); + } } + } - PopulateState::Processing { - stmt, - rows_processed, - } => { - // Collect rows into a delta batch - let mut batch_delta = Delta::new(); - let mut batch_count = 0; + // Process rows one at a time - no batching + loop { + // This step() call resumes from where the statement left off + match stmt.step()? { + crate::vdbe::StepResult::Row => { + // Get the row + let row = stmt.row().unwrap(); - loop { - if batch_count >= BATCH_SIZE { - // Process this batch through the standard pipeline - self.merge_delta(&batch_delta); - // Yield control after processing a batch - // TODO: currently this inner statement is the one that is tracking completions - // so as a stop gap we can just return a dummy completion here - io_yield_one!(Completion::new_dummy()); - } + // Extract values from the row + let all_values: Vec = + row.get_values().cloned().collect(); - // This step() call resumes from where the statement left off - match stmt.step()? { - crate::vdbe::StepResult::Row => { - // Get the row - let row = stmt.row().unwrap(); - - // Extract values from the row - let all_values: Vec = - row.get_values().cloned().collect(); - - // Determine how to extract the rowid - // If there's a rowid alias (INTEGER PRIMARY KEY), the rowid is one of the columns - // Otherwise, it's the last value we explicitly selected - let (rowid, values) = if let Some((idx, _)) = - self.base_table.get_rowid_alias_column() - { - // The rowid is the value at the rowid alias column index - let rowid = match all_values.get(idx) { - Some(crate::types::Value::Integer(id)) => *id, - _ => { - // This shouldn't happen - rowid alias must be an integer - *rows_processed += 1; - batch_count += 1; - continue; - } - }; - // All values are table columns (no separate rowid was selected) - (rowid, all_values) - } else { - // The last value is the explicitly selected rowid - let rowid = match all_values.last() { - Some(crate::types::Value::Integer(id)) => *id, - _ => { - // This shouldn't happen - rowid must be an integer - *rows_processed += 1; - batch_count += 1; - continue; - } - }; - // Get all values except the rowid - let values = all_values[..all_values.len() - 1].to_vec(); - (rowid, values) + // Determine how to extract the rowid + // If there's a rowid alias (INTEGER PRIMARY KEY), the rowid is one of the columns + // Otherwise, it's the last value we explicitly selected + let (rowid, values) = + if let Some((idx, _)) = self.base_table.get_rowid_alias_column() { + // The rowid is the value at the rowid alias column index + let rowid = match all_values.get(idx) { + Some(crate::types::Value::Integer(id)) => *id, + _ => { + // This shouldn't happen - rowid alias must be an integer + rows_processed += 1; + continue; + } }; + // All values are table columns (no separate rowid was selected) + (rowid, all_values) + } else { + // The last value is the explicitly selected rowid + let rowid = match all_values.last() { + Some(crate::types::Value::Integer(id)) => *id, + _ => { + // This shouldn't happen - rowid must be an integer + rows_processed += 1; + continue; + } + }; + // Get all values except the rowid + let values = all_values[..all_values.len() - 1].to_vec(); + (rowid, values) + }; - // Add to batch delta - let merge_delta handle filtering and aggregation - batch_delta.insert(rowid, values); + // Create a single-row delta and process it immediately + let mut single_row_delta = Delta::new(); + single_row_delta.insert(rowid, values.clone()); - *rows_processed += 1; - batch_count += 1; + // Process this single row through merge_delta with the pager + match self.merge_delta(&single_row_delta, pager.clone())? { + IOResult::Done(_) => { + // Row processed successfully, continue to next row + rows_processed += 1; } - crate::vdbe::StepResult::Done => { - // Process any remaining rows in the batch - self.merge_delta(&batch_delta); - // All rows processed, move to Done state - self.populate_state = PopulateState::Done; - return Ok(IOResult::Done(())); - } - crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => { - return Err(LimboError::Busy); - } - crate::vdbe::StepResult::IO => { - // Process current batch before yielding - self.merge_delta(&batch_delta); - // The Statement needs to wait for IO - io_yield_one!(Completion::new_dummy()); + IOResult::IO(io) => { + // Save state and return I/O + // We'll resume at the SAME row when called again (don't increment rows_processed) + // The circuit still has unfinished work for this row + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, // Don't increment - row not done yet! + pending_row: Some((rowid, values)), // Save the row for resumption + }; + return Ok(IOResult::IO(io)); } } } - } - PopulateState::Done => { - // Already populated - return Ok(IOResult::Done(())); + crate::vdbe::StepResult::Done => { + // All rows processed, we're done + self.populate_state = PopulateState::Done; + return Ok(IOResult::Done(())); + } + + crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => { + // Save state before returning error + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: None, // No pending row when interrupted between rows + }; + return Err(LimboError::Busy); + } + + crate::vdbe::StepResult::IO => { + // Statement needs I/O - save state and return + self.populate_state = PopulateState::Processing { + stmt, + rows_processed, + pending_row: None, // No pending row when interrupted between rows + }; + // TODO: Get the actual I/O completion from the statement + let completion = crate::io::Completion::new_dummy(); + return Ok(IOResult::IO(crate::types::IOCompletions::Single( + completion, + ))); + } } } } @@ -555,95 +769,23 @@ impl IncrementalView { None } - /// Get the current records as an iterator - for cursor-based access - pub fn iter(&self) -> impl Iterator)> + '_ { - self.stream.to_vec().into_iter().filter_map(move |row| { - self.records - .get(&row.rowid) - .map(|values| (row.rowid, values.clone())) - }) - } - - /// Get current data merged with transaction state - pub fn current_data(&self, tx_state: Option<&ViewTransactionState>) -> Vec<(i64, Vec)> { - if let Some(tx_state) = tx_state { - // Use circuit to process uncommitted changes - let mut uncommitted = DeltaSet::new(); - uncommitted.insert(self.base_table.name.clone(), tx_state.delta.clone()); - - // Execute with uncommitted changes (won't affect circuit state) - match self.circuit.execute(HashMap::new(), uncommitted) { - Ok(processed_delta) => { - // Merge processed delta with committed records - let mut result_map: BTreeMap> = self.records.clone(); - for (row, weight) in &processed_delta.changes { - if *weight > 0 { - result_map.insert(row.rowid, row.values.clone()); - } else if *weight < 0 { - result_map.remove(&row.rowid); - } - } - result_map.into_iter().collect() - } - Err(e) => { - // Return error or panic - no fallback - panic!("Failed to execute circuit with uncommitted data: {e:?}"); - } - } - } else { - // No transaction state: return committed records - self.records.clone().into_iter().collect() - } - } - /// Merge a delta of changes into the view's current state - pub fn merge_delta(&mut self, delta: &Delta) { + pub fn merge_delta( + &mut self, + delta: &Delta, + pager: std::rc::Rc, + ) -> crate::Result> { // Early return if delta is empty if delta.is_empty() { - return; + return Ok(IOResult::Done(())); } - // Use the circuit to process the delta + // Use the circuit to process the delta and write to btree let mut input_data = HashMap::new(); input_data.insert(self.base_table.name.clone(), delta.clone()); - // If circuit hasn't been initialized yet, initialize it first - // This happens during populate_from_table - if !self.circuit_initialized { - // Initialize the circuit with empty state - self.circuit - .initialize(HashMap::new()) - .expect("Failed to initialize circuit"); - self.circuit_initialized = true; - } - - // Execute the circuit to process the delta - let current_delta = match self.circuit.execute(input_data.clone(), DeltaSet::empty()) { - Ok(output) => { - // Commit the changes to the circuit's internal state - self.circuit - .commit(input_data) - .expect("Failed to commit to circuit"); - output - } - Err(e) => { - panic!("Failed to execute circuit: {e:?}"); - } - }; - - // Update records and stream with the processed delta - let mut zset_delta = RowKeyZSet::new(); - - for (row, weight) in ¤t_delta.changes { - if *weight > 0 { - self.records.insert(row.rowid, row.values.clone()); - zset_delta.insert(row.clone(), 1); - } else if *weight < 0 { - self.records.remove(&row.rowid); - zset_delta.insert(row.clone(), -1); - } - } - - self.stream.apply_delta(&zset_delta); + // The circuit now handles all btree I/O internally with the provided pager + let _delta = return_if_io!(self.circuit.commit(input_data, pager)); + Ok(IOResult::Done(())) } } diff --git a/core/lib.rs b/core/lib.rs index 2ca949e76..2f32774df 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -32,7 +32,6 @@ mod uuid; mod vdbe; mod vector; mod vtab; -mod vtab_view; #[cfg(feature = "fuzz")] pub mod numeric; @@ -40,7 +39,7 @@ pub mod numeric; #[cfg(not(feature = "fuzz"))] mod numeric; -use crate::incremental::view::ViewTransactionState; +use crate::incremental::view::AllViewsTxState; use crate::storage::encryption::CipherMode; use crate::translate::optimizer::optimize_plan; use crate::translate::pragma::TURSO_CDC_DEFAULT_TABLE_NAME; @@ -441,13 +440,6 @@ impl Database { Ok(()) })?; } - // FIXME: the correct way to do this is to just materialize the view. - // But this will allow us to keep going. - let conn = db.connect()?; - let pager = conn.pager.borrow().clone(); - pager - .io - .block(|| conn.schema.borrow().populate_materialized_views(&conn))?; Ok(db) } @@ -489,7 +481,7 @@ impl Database { attached_databases: RefCell::new(DatabaseCatalog::new()), query_only: Cell::new(false), mv_tx_id: Cell::new(None), - view_transaction_states: RefCell::new(HashMap::new()), + view_transaction_states: AllViewsTxState::new(), metrics: RefCell::new(ConnectionMetrics::new()), is_nested_stmt: Cell::new(false), encryption_key: RefCell::new(None), @@ -926,7 +918,7 @@ pub struct Connection { /// Per-connection view transaction states for uncommitted changes. This represents /// one entry per view that was touched in the transaction. - view_transaction_states: RefCell>, + view_transaction_states: AllViewsTxState, /// Connection-level metrics aggregation pub metrics: RefCell, /// Whether the connection is executing a statement initiated by another statement. @@ -1072,7 +1064,7 @@ impl Connection { // Preserve existing views to avoid expensive repopulation. // TODO: We may not need to do this if we materialize our views. - let existing_views = self.schema.borrow().materialized_views.clone(); + let existing_views = self.schema.borrow().incremental_views.clone(); // TODO: this is hack to avoid a cyclical problem with schema reprepare // The problem here is that we prepare a statement here, but when the statement tries @@ -1096,13 +1088,6 @@ impl Connection { self.with_schema_mut(|schema| { *schema = fresh; }); - - { - let schema = self.schema.borrow(); - pager - .io - .block(|| schema.populate_materialized_views(self))?; - } Result::Ok(()) } @@ -1716,7 +1701,7 @@ impl Connection { .expect("query must be parsed to statement"); let syms = self.syms.borrow(); self.with_schema_mut(|schema| { - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); if let Err(LimboError::ExtensionError(e)) = parse_schema_rows(rows, schema, &syms, None, existing_views) { diff --git a/core/schema.rs b/core/schema.rs index 53a2a55e0..142101966 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,8 +1,4 @@ use crate::incremental::view::IncrementalView; -use crate::types::IOResult; - -/// Type alias for the materialized views collection -pub type MaterializedViewsMap = HashMap>>; /// Simple view structure for non-materialized views #[derive(Debug, Clone)] @@ -23,12 +19,12 @@ use crate::translate::plan::SelectPlan; use crate::util::{ module_args_from_sql, module_name_from_sql, type_from_name, IOExt, UnparsedFromSqlIndex, }; -use crate::{return_if_io, LimboError, MvCursor, Pager, RefValue, SymbolTable, VirtualTable}; use crate::{util::normalize_ident, Result}; +use crate::{LimboError, MvCursor, Pager, RefValue, SymbolTable, VirtualTable}; use core::fmt; use std::cell::RefCell; use std::collections::hash_map::Entry; -use std::collections::{BTreeSet, HashMap}; +use std::collections::{BTreeSet, HashMap, HashSet}; use std::ops::Deref; use std::rc::Rc; use std::sync::Arc; @@ -42,6 +38,7 @@ use turso_parser::{ const SCHEMA_TABLE_NAME: &str = "sqlite_schema"; const SCHEMA_TABLE_NAME_ALT: &str = "sqlite_master"; +pub const DBSP_TABLE_PREFIX: &str = "__turso_internal_dbsp_state_"; /// Check if a table name refers to a system table that should be protected from direct writes pub fn is_system_table(table_name: &str) -> bool { @@ -52,7 +49,14 @@ pub fn is_system_table(table_name: &str) -> bool { #[derive(Debug)] pub struct Schema { pub tables: HashMap>, - pub materialized_views: MaterializedViewsMap, + + /// Track which tables are actually materialized views + pub materialized_view_names: HashSet, + /// Store original SQL for materialized views (for .schema command) + pub materialized_view_sql: HashMap, + /// The incremental view objects (DBSP circuits) + pub incremental_views: HashMap>>, + pub views: ViewsMap, /// table_name to list of indexes for the table @@ -81,12 +85,16 @@ impl Schema { Arc::new(Table::Virtual(Arc::new((*function).clone()))), ); } - let materialized_views: MaterializedViewsMap = HashMap::new(); + let materialized_view_names = HashSet::new(); + let materialized_view_sql = HashMap::new(); + let incremental_views = HashMap::new(); let views: ViewsMap = HashMap::new(); let table_to_materialized_views: HashMap> = HashMap::new(); Self { tables, - materialized_views, + materialized_view_names, + materialized_view_sql, + incremental_views, views, indexes, has_indexes, @@ -102,41 +110,51 @@ impl Schema { .iter() .any(|idx| idx.1.iter().any(|i| i.name == name)) } - pub fn add_materialized_view(&mut self, view: IncrementalView) { + pub fn add_materialized_view(&mut self, view: IncrementalView, table: Arc, sql: String) { let name = normalize_ident(view.name()); - self.materialized_views + + // Add to tables (so it appears as a regular table) + self.tables.insert(name.clone(), table); + + // Track that this is a materialized view + self.materialized_view_names.insert(name.clone()); + self.materialized_view_sql.insert(name.clone(), sql); + + // Store the incremental view (DBSP circuit) + self.incremental_views .insert(name, Arc::new(Mutex::new(view))); } pub fn get_materialized_view(&self, name: &str) -> Option>> { let name = normalize_ident(name); - self.materialized_views.get(&name).cloned() + self.incremental_views.get(&name).cloned() + } + + pub fn is_materialized_view(&self, name: &str) -> bool { + let name = normalize_ident(name); + self.materialized_view_names.contains(&name) } pub fn remove_view(&mut self, name: &str) -> Result<()> { let name = normalize_ident(name); - // Check if we have both a regular view and a materialized view with the same name - // It should be impossible to have both - let has_regular_view = self.views.contains_key(&name); - let has_materialized_view = self.materialized_views.contains_key(&name); - - assert!( - !(has_regular_view && has_materialized_view), - "Found both regular view and materialized view with name: {name}" - ); - - if has_regular_view { + if self.views.contains_key(&name) { self.views.remove(&name); Ok(()) - } else if has_materialized_view { + } else if self.materialized_view_names.contains(&name) { + // Remove from tables + self.tables.remove(&name); + + // Remove from materialized view tracking + self.materialized_view_names.remove(&name); + self.materialized_view_sql.remove(&name); + self.incremental_views.remove(&name); + // Remove from table_to_materialized_views dependencies for views in self.table_to_materialized_views.values_mut() { views.retain(|v| v != &name); } - // Remove the materialized view itself - self.materialized_views.remove(&name); Ok(()) } else { Err(crate::LimboError::ParseError(format!( @@ -165,30 +183,6 @@ impl Schema { .unwrap_or_default() } - /// Get all materialized views that depend on a given table, skip normalizing ident. - /// We are basically assuming we already normalized the ident. - pub fn get_dependent_materialized_views_unnormalized( - &self, - table_name: &str, - ) -> Option<&Vec> { - self.table_to_materialized_views.get(table_name) - } - - /// Populate all materialized views by scanning their source tables - /// Returns IOResult to support async execution - pub fn populate_materialized_views( - &self, - conn: &Arc, - ) -> Result> { - for view in self.materialized_views.values() { - let mut view = view - .lock() - .map_err(|_| LimboError::InternalError("Failed to lock view".to_string()))?; - return_if_io!(view.populate_from_table(conn)); - } - Ok(IOResult::Done(())) - } - /// Add a regular (non-materialized) view pub fn add_view(&mut self, view: View) { let name = normalize_ident(&view.name); @@ -224,6 +218,12 @@ impl Schema { pub fn remove_table(&mut self, table_name: &str) { let name = normalize_ident(table_name); self.tables.remove(&name); + + // If this was a materialized view, also clean up the metadata + if self.materialized_view_names.remove(&name) { + self.incremental_views.remove(&name); + self.materialized_view_sql.remove(&name); + } } pub fn get_btree_table(&self, name: &str) -> Option> { @@ -297,8 +297,10 @@ impl Schema { let mut automatic_indices: HashMap> = HashMap::with_capacity(10); - // Collect materialized views for second pass to populate table_to_materialized_views mapping - let mut materialized_views_to_process: Vec<(String, Vec)> = Vec::new(); + // Store DBSP state table root pages: view_name -> dbsp_state_root_page + let mut dbsp_state_roots: HashMap = HashMap::new(); + // Store materialized view info (SQL and root page) for later creation + let mut materialized_view_info: HashMap = HashMap::new(); if matches!(pager.begin_read_tx()?, LimboResult::Busy) { return Err(LimboError::Busy); @@ -357,6 +359,18 @@ impl Schema { } let table = BTreeTable::from_sql(sql, root_page as usize)?; + + // Check if this is a DBSP state table + if table.name.starts_with(DBSP_TABLE_PREFIX) { + // Extract the view name from _dbsp_state_ + let view_name = table + .name + .strip_prefix(DBSP_TABLE_PREFIX) + .unwrap() + .to_string(); + dbsp_state_roots.insert(view_name, root_page as usize); + } + self.add_btree_table(Arc::new(table)); } "index" => { @@ -418,6 +432,14 @@ impl Schema { }; let name = name_text.as_str(); + // Get the root page (column 3) to determine if this is a materialized view + // Regular views have rootpage = 0, materialized views have rootpage != 0 + let root_page_value = record_cursor.get_value(&row, 3)?; + let RefValue::Integer(root_page_int) = root_page_value else { + return Err(LimboError::ConversionError("Expected integer value".into())); + }; + let root_page = root_page_int as usize; + let sql_value = record_cursor.get_value(&row, 4)?; let RefValue::Text(sql_text) = sql_value else { return Err(LimboError::ConversionError("Expected text value".into())); @@ -429,15 +451,12 @@ impl Schema { if let Ok(Some(Cmd::Stmt(stmt))) = parser.next_cmd() { match stmt { Stmt::CreateMaterializedView { .. } => { - // Create IncrementalView for materialized views - if let Ok(incremental_view) = IncrementalView::from_sql(sql, self) { - let referenced_tables = - incremental_view.get_referenced_table_names(); - let view_name = name.to_string(); - self.add_materialized_view(incremental_view); - materialized_views_to_process - .push((view_name, referenced_tables)); - } + // Store materialized view info for later creation + // We'll create the actual IncrementalView in a later pass + // when we have both the main root page and DBSP state root + let view_name = name.to_string(); + materialized_view_info + .insert(view_name, (sql.to_string(), root_page)); } Stmt::CreateView { view_name: _, @@ -481,14 +500,6 @@ impl Schema { pager.end_read_tx()?; - // Second pass: populate table_to_materialized_views mapping - for (view_name, referenced_tables) in materialized_views_to_process { - // Register this view as dependent on each referenced table - for table_name in referenced_tables { - self.add_materialized_view_dependency(&table_name, &view_name); - } - } - for unparsed_sql_from_index in from_sql_indexes { if !self.indexes_enabled() { self.table_set_has_index(&unparsed_sql_from_index.table_name); @@ -520,6 +531,39 @@ impl Schema { } } + // Third pass: Create materialized views now that we have both root pages + for (view_name, (sql, main_root)) in materialized_view_info { + // Look up the DBSP state root for this view - must exist for materialized views + let dbsp_state_root = dbsp_state_roots.get(&view_name).ok_or_else(|| { + LimboError::InternalError(format!( + "Materialized view {view_name} is missing its DBSP state table" + )) + })?; + + // Create the IncrementalView with both root pages + let incremental_view = + IncrementalView::from_sql(&sql, self, main_root, *dbsp_state_root)?; + let referenced_tables = incremental_view.get_referenced_table_names(); + + // Create a BTreeTable for the materialized view + let table = Arc::new(Table::BTree(Arc::new(BTreeTable { + name: view_name.clone(), + root_page: main_root, + columns: incremental_view.columns.clone(), + primary_key_columns: Vec::new(), + has_rowid: true, + is_strict: false, + unique_sets: None, + }))); + + self.add_materialized_view(incremental_view, table, sql); + + // Register dependencies + for table_name in referenced_tables { + self.add_materialized_view_dependency(&table_name, &view_name); + } + } + Ok(()) } } @@ -565,15 +609,19 @@ impl Clone for Schema { (name.clone(), indexes) }) .collect(); - let materialized_views = self - .materialized_views + let materialized_view_names = self.materialized_view_names.clone(); + let materialized_view_sql = self.materialized_view_sql.clone(); + let incremental_views = self + .incremental_views .iter() .map(|(name, view)| (name.clone(), view.clone())) .collect(); let views = self.views.clone(); Self { tables, - materialized_views, + materialized_view_names, + materialized_view_sql, + incremental_views, views, indexes, has_indexes: self.has_indexes.clone(), diff --git a/core/translate/delete.rs b/core/translate/delete.rs index 4a534b25e..dee30b2af 100644 --- a/core/translate/delete.rs +++ b/core/translate/delete.rs @@ -82,6 +82,12 @@ pub fn prepare_delete_plan( Some(table) => table, None => crate::bail_parse_error!("no such table: {}", tbl_name), }; + + // Check if this is a materialized view + if schema.is_materialized_view(&tbl_name) { + crate::bail_parse_error!("cannot modify materialized view {}", tbl_name); + } + let table = if let Some(table) = table.virtual_table() { Table::Virtual(table.clone()) } else if let Some(table) = table.btree() { diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ae21991a6..04ce1ef4f 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -63,6 +63,7 @@ pub fn translate_insert( if with.is_some() { crate::bail_parse_error!("WITH clause is not supported"); } + if on_conflict.is_some() { crate::bail_parse_error!("ON CONFLICT clause is not supported"); } @@ -86,6 +87,11 @@ pub fn translate_insert( None => crate::bail_parse_error!("no such table: {}", table_name), }; + // Check if this is a materialized view + if schema.is_materialized_view(table_name.as_str()) { + crate::bail_parse_error!("cannot modify materialized view {}", table_name); + } + let resolver = Resolver::new(schema, syms); if let Some(virtual_table) = &table.virtual_table() { diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 6774c04d2..3e44ff0bd 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -196,7 +196,8 @@ pub fn init_loop( t_ctx.meta_left_joins[table_index] = Some(lj_metadata); } } - let (table_cursor_id, index_cursor_id) = table.open_cursors(program, mode)?; + let (table_cursor_id, index_cursor_id) = + table.open_cursors(program, mode, t_ctx.resolver.schema)?; match &table.op { Operation::Scan(Scan::BTreeTable { index, .. }) => match (mode, &table.table) { (OperationMode::SELECT, Table::BTree(btree)) => { diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 082e39f96..e96dc4a1a 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -3,7 +3,7 @@ use turso_parser::ast::{self, SortOrder}; use crate::{ function::AggFunc, - schema::{BTreeTable, Column, FromClauseSubquery, Index, Table}, + schema::{BTreeTable, Column, FromClauseSubquery, Index, Schema, Table}, vdbe::{ builder::{CursorKey, CursorType, ProgramBuilder}, insn::{IdxInsertFlags, Insn}, @@ -852,6 +852,7 @@ impl JoinedTable { &self, program: &mut ProgramBuilder, mode: OperationMode, + schema: &Schema, ) -> Result<(Option, Option)> { let index = self.op.index(); match &self.table { @@ -863,10 +864,17 @@ impl JoinedTable { let table_cursor_id = if table_not_required { None } else { - Some(program.alloc_cursor_id_keyed( - CursorKey::table(self.internal_id), - CursorType::BTreeTable(btree.clone()), - )) + // Check if this is a materialized view + let cursor_type = + if let Some(view_mutex) = schema.get_materialized_view(&btree.name) { + CursorType::MaterializedView(btree.clone(), view_mutex) + } else { + CursorType::BTreeTable(btree.clone()) + }; + Some( + program + .alloc_cursor_id_keyed(CursorKey::table(self.internal_id), cursor_type), + ) }; let index_cursor_id = index.map(|index| { program.alloc_cursor_id_keyed( diff --git a/core/translate/planner.rs b/core/translate/planner.rs index cf5a4314d..ea4cc8f53 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -3,9 +3,9 @@ use std::sync::Arc; use super::{ expr::walk_expr, plan::{ - Aggregate, ColumnUsedMask, Distinctness, EvalAt, JoinInfo, JoinOrderMember, JoinedTable, - Operation, OuterQueryReference, Plan, QueryDestination, ResultSetColumn, Scan, - TableReferences, WhereTerm, + Aggregate, ColumnUsedMask, Distinctness, EvalAt, IterationDirection, JoinInfo, + JoinOrderMember, JoinedTable, Operation, OuterQueryReference, Plan, QueryDestination, + ResultSetColumn, Scan, TableReferences, WhereTerm, }, select::prepare_select_plan, SymbolTable, @@ -529,12 +529,29 @@ fn parse_table( schema.get_materialized_view(table_name.as_str()) }); if let Some(view) = view { - // Create a virtual table wrapper for the view - // We'll use the view's columns from the schema - let vtab = crate::vtab_view::create_view_virtual_table( - normalize_ident(table_name.as_str()).as_str(), - view.clone(), - )?; + // Check if this materialized view has persistent storage + let view_guard = view.lock().unwrap(); + let root_page = view_guard.get_root_page(); + + if root_page == 0 { + drop(view_guard); + return Err(crate::LimboError::InternalError( + "Materialized view has no storage allocated".to_string(), + )); + } + + // This is a materialized view with storage - treat it as a regular BTree table + // Create a BTreeTable from the view's metadata + let btree_table = Arc::new(crate::schema::BTreeTable { + name: view_guard.name().to_string(), + root_page, + columns: view_guard.columns.clone(), + primary_key_columns: Vec::new(), + has_rowid: true, + is_strict: false, + unique_sets: None, + }); + drop(view_guard); let alias = maybe_alias .map(|a| match a { @@ -544,12 +561,11 @@ fn parse_table( .map(|a| normalize_ident(a.as_str())); table_references.add_joined_table(JoinedTable { - op: Operation::Scan(Scan::VirtualTable { - idx_num: -1, - idx_str: None, - constraints: Vec::new(), + op: Operation::Scan(Scan::BTreeTable { + iter_dir: IterationDirection::Forwards, + index: None, }), - table: Table::Virtual(vtab), + table: Table::BTree(btree_table), identifier: alias.unwrap_or(normalized_qualified_name), internal_id: table_ref_counter.next(), join_info: None, diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 03025651c..5356e5a8a 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -690,6 +690,14 @@ pub fn translate_drop_table( } let table = table.unwrap(); // safe since we just checked for None + + // Check if this is a materialized view - if so, refuse to drop it with DROP TABLE + if schema.is_materialized_view(tbl_name.name.as_str()) { + bail_parse_error!( + "Cannot DROP TABLE on materialized view {}. Use DROP VIEW instead.", + tbl_name.name.as_str() + ); + } let cdc_table = prepare_cdc_if_necessary(&mut program, schema, SQLITE_TABLEID)?; let null_reg = program.alloc_register(); // r1 diff --git a/core/translate/update.rs b/core/translate/update.rs index 6f9c5af23..961046eea 100644 --- a/core/translate/update.rs +++ b/core/translate/update.rs @@ -140,6 +140,12 @@ pub fn prepare_update_plan( Some(table) => table, None => bail_parse_error!("Parse error: no such table: {}", table_name), }; + + // Check if this is a materialized view + if schema.is_materialized_view(table_name.as_str()) { + bail_parse_error!("cannot modify materialized view {}", table_name); + } + let table_name = table.get_name(); let iter_dir = body .order_by diff --git a/core/translate/view.rs b/core/translate/view.rs index fcb12df01..78e9c6a63 100644 --- a/core/translate/view.rs +++ b/core/translate/view.rs @@ -1,69 +1,14 @@ -use crate::schema::Schema; +use crate::schema::{Schema, DBSP_TABLE_PREFIX}; +use crate::storage::pager::CreateBTreeFlags; use crate::translate::emitter::Resolver; use crate::translate::schema::{emit_schema_entry, SchemaEntryType, SQLITE_TABLEID}; use crate::util::normalize_ident; use crate::vdbe::builder::{CursorType, ProgramBuilder}; -use crate::vdbe::insn::{CmpInsFlags, Cookie, Insn}; +use crate::vdbe::insn::{CmpInsFlags, Cookie, Insn, RegisterOrLiteral}; use crate::{Connection, Result, SymbolTable}; use std::sync::Arc; use turso_parser::ast; -/// Common logic for creating views (both regular and materialized) -fn emit_create_view_program( - schema: &Schema, - view_name: &str, - sql: String, - syms: &SymbolTable, - program: &mut ProgramBuilder, - populate_materialized: bool, -) -> Result<()> { - let normalized_view_name = normalize_ident(view_name); - - // Open cursor to sqlite_schema table - let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); - let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); - program.emit_insn(Insn::OpenWrite { - cursor_id: sqlite_schema_cursor_id, - root_page: 1usize.into(), - db: 0, - }); - - // Add the view entry to sqlite_schema - let resolver = Resolver::new(schema, syms); - emit_schema_entry( - program, - &resolver, - sqlite_schema_cursor_id, - None, // cdc_table_cursor_id, no cdc for views - SchemaEntryType::View, - &normalized_view_name, - &normalized_view_name, // for views, tbl_name is same as name - 0, // views don't have a root page - Some(sql), - )?; - - // Parse schema to load the new view - program.emit_insn(Insn::ParseSchema { - db: sqlite_schema_cursor_id, - where_clause: Some(format!("name = '{normalized_view_name}'")), - }); - - program.emit_insn(Insn::SetCookie { - db: 0, - cookie: Cookie::SchemaVersion, - value: (schema.schema_version + 1) as i32, - p5: 0, - }); - - // Populate materialized views if needed - // Note: This must come after SetCookie since it may do I/O operations - if populate_materialized { - program.emit_insn(Insn::PopulateMaterializedViews); - } - - Ok(()) -} - pub fn translate_create_materialized_view( schema: &Schema, view_name: &str, @@ -92,17 +37,144 @@ pub fn translate_create_materialized_view( ))); } - // Validate that this view can be created as an IncrementalView + // Validate the view can be created and extract its columns // This validation happens before updating sqlite_master to prevent // storing invalid view definitions use crate::incremental::view::IncrementalView; - IncrementalView::can_create_view(select_stmt)?; + use crate::schema::BTreeTable; + let view_columns = IncrementalView::validate_and_extract_columns(select_stmt, schema)?; - // Reconstruct the SQL string + // Reconstruct the SQL string for storage let sql = create_materialized_view_to_str(view_name, select_stmt); - // Use common logic to emit the view creation program - emit_create_view_program(schema, view_name, sql, syms, &mut program, true)?; + // Create a btree for storing the materialized view state + // This btree will hold the materialized rows (row_id -> values) + let view_root_reg = program.alloc_register(); + + program.emit_insn(Insn::CreateBtree { + db: 0, + root: view_root_reg, + flags: CreateBTreeFlags::new_table(), + }); + + // Create a second btree for DBSP operator state (e.g., aggregate state) + // This is stored as a hidden table: __turso_internal_dbsp_state_ + let dbsp_state_root_reg = program.alloc_register(); + + program.emit_insn(Insn::CreateBtree { + db: 0, + root: dbsp_state_root_reg, + flags: CreateBTreeFlags::new_table(), + }); + + // Create a proper BTreeTable for the cursor with the actual view columns + let view_table = Arc::new(BTreeTable { + root_page: 0, // Will be set to actual root page after creation + name: normalized_view_name.clone(), + columns: view_columns.clone(), + primary_key_columns: vec![], // Materialized views use implicit rowid + has_rowid: true, + is_strict: false, + unique_sets: None, + }); + + // Allocate a cursor for writing to the view's btree during population + let view_cursor_id = program.alloc_cursor_id(crate::vdbe::builder::CursorType::BTreeTable( + view_table.clone(), + )); + + // Open the cursor to the view's btree + program.emit_insn(Insn::OpenWrite { + cursor_id: view_cursor_id, + root_page: RegisterOrLiteral::Register(view_root_reg), + db: 0, + }); + + // Clear any existing data in the btree + // This is important because if we're reusing a page that previously held + // a materialized view, there might be old data still there + // We need to start with a clean slate + let clear_loop_label = program.allocate_label(); + let clear_done_label = program.allocate_label(); + + // Rewind to the beginning of the btree + program.emit_insn(Insn::Rewind { + cursor_id: view_cursor_id, + pc_if_empty: clear_done_label, + }); + + // Loop to delete all rows + program.preassign_label_to_next_insn(clear_loop_label); + program.emit_insn(Insn::Delete { + cursor_id: view_cursor_id, + table_name: normalized_view_name.clone(), + }); + program.emit_insn(Insn::Next { + cursor_id: view_cursor_id, + pc_if_next: clear_loop_label, + }); + + program.preassign_label_to_next_insn(clear_done_label); + + // Open cursor to sqlite_schema table + let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); + program.emit_insn(Insn::OpenWrite { + cursor_id: sqlite_schema_cursor_id, + root_page: 1usize.into(), + db: 0, + }); + + // Add the materialized view entry to sqlite_schema + let resolver = Resolver::new(schema, syms); + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id, no cdc for views + SchemaEntryType::View, + &normalized_view_name, + &normalized_view_name, + view_root_reg, // btree root for materialized view data + Some(sql), + )?; + + // Add the DBSP state table to sqlite_master (required for materialized views) + let dbsp_table_name = format!("{DBSP_TABLE_PREFIX}{normalized_view_name}"); + let dbsp_sql = format!("CREATE TABLE {dbsp_table_name} (key INTEGER PRIMARY KEY, state BLOB)"); + + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id + SchemaEntryType::Table, + &dbsp_table_name, + &dbsp_table_name, + dbsp_state_root_reg, // Root for DBSP state table + Some(dbsp_sql), + )?; + + // Parse schema to load the new view and DBSP state table + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: Some(format!( + "name = '{normalized_view_name}' OR name = '{dbsp_table_name}'" + )), + }); + + program.emit_insn(Insn::SetCookie { + db: 0, + cookie: Cookie::SchemaVersion, + value: (schema.schema_version + 1) as i32, + p5: 0, + }); + + // Populate the materialized view + let cursor_info = vec![(normalized_view_name.clone(), view_cursor_id)]; + program.emit_insn(Insn::PopulateMaterializedViews { + cursors: cursor_info, + }); program.epilogue(schema); Ok(program) @@ -137,8 +209,41 @@ pub fn translate_create_view( // Reconstruct the SQL string let sql = create_view_to_str(view_name, select_stmt); - // Use common logic to emit the view creation program - emit_create_view_program(schema, view_name, sql, syms, &mut program, false)?; + // Open cursor to sqlite_schema table + let table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); + let sqlite_schema_cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(table.clone())); + program.emit_insn(Insn::OpenWrite { + cursor_id: sqlite_schema_cursor_id, + root_page: 1usize.into(), + db: 0, + }); + + // Add the view entry to sqlite_schema + let resolver = Resolver::new(schema, syms); + emit_schema_entry( + &mut program, + &resolver, + sqlite_schema_cursor_id, + None, // cdc_table_cursor_id, no cdc for views + SchemaEntryType::View, + &normalized_view_name, + &normalized_view_name, + 0, // Regular views don't have a btree + Some(sql), + )?; + + // Parse schema to load the new view + program.emit_insn(Insn::ParseSchema { + db: sqlite_schema_cursor_id, + where_clause: Some(format!("name = '{normalized_view_name}'")), + }); + + program.emit_insn(Insn::SetCookie { + db: 0, + cookie: Cookie::SchemaVersion, + value: (schema.schema_version + 1) as i32, + p5: 0, + }); Ok(program) } @@ -156,10 +261,9 @@ pub fn translate_drop_view( let normalized_view_name = normalize_ident(view_name); // Check if view exists (either regular or materialized) - let view_exists = schema.get_view(&normalized_view_name).is_some() - || schema - .get_materialized_view(&normalized_view_name) - .is_some(); + let is_regular_view = schema.get_view(&normalized_view_name).is_some(); + let is_materialized_view = schema.is_materialized_view(&normalized_view_name); + let view_exists = is_regular_view || is_materialized_view; if !view_exists && !if_exists { return Err(crate::LimboError::ParseError(format!( @@ -172,6 +276,20 @@ pub fn translate_drop_view( return Ok(program); } + // If this is a materialized view, we need to destroy its btree as well + if is_materialized_view { + if let Some(table) = schema.get_table(&normalized_view_name) { + if let Some(btree_table) = table.btree() { + // Destroy the btree for the materialized view + program.emit_insn(Insn::Destroy { + root: btree_table.root_page, + former_root_reg: 0, // No autovacuum + is_temp: 0, + }); + } + } + } + // Open cursor to sqlite_schema table let schema_table = schema.get_btree_table(SQLITE_TABLEID).unwrap(); let sqlite_schema_cursor_id = @@ -217,6 +335,8 @@ pub fn translate_drop_view( // Check if type == 'view' and name == view_name let skip_delete_label = program.allocate_label(); + + // Both regular and materialized views are stored as type='view' in sqlite_schema program.emit_insn(Insn::Ne { lhs: col0_reg, rhs: type_reg, @@ -224,6 +344,7 @@ pub fn translate_drop_view( flags: CmpInsFlags::default(), collation: program.curr_collation(), }); + program.emit_insn(Insn::Ne { lhs: col1_reg, rhs: view_name_reg, diff --git a/core/types.rs b/core/types.rs index 25ad4655e..597f20cdd 100644 --- a/core/types.rs +++ b/core/types.rs @@ -2430,6 +2430,7 @@ pub enum Cursor { Pseudo(PseudoCursor), Sorter(Sorter), Virtual(VirtualTableCursor), + MaterializedView(Box), } impl Cursor { @@ -2445,6 +2446,12 @@ impl Cursor { Self::Sorter(cursor) } + pub fn new_materialized_view( + cursor: crate::incremental::cursor::MaterializedViewCursor, + ) -> Self { + Self::MaterializedView(Box::new(cursor)) + } + pub fn as_btree_mut(&mut self) -> &mut BTreeCursor { match self { Self::BTree(cursor) => cursor, @@ -2472,6 +2479,15 @@ impl Cursor { _ => panic!("Cursor is not a virtual cursor"), } } + + pub fn as_materialized_view_mut( + &mut self, + ) -> &mut crate::incremental::cursor::MaterializedViewCursor { + match self { + Self::MaterializedView(cursor) => cursor, + _ => panic!("Cursor is not a materialized view cursor"), + } + } } #[derive(Debug)] @@ -2549,6 +2565,23 @@ macro_rules! return_if_io { }; } +#[macro_export] +macro_rules! return_and_restore_if_io { + ($field:expr, $saved_state:expr, $e:expr) => { + match $e { + Ok(IOResult::Done(v)) => v, + Ok(IOResult::IO(io)) => { + let _ = std::mem::replace($field, $saved_state); + return Ok(IOResult::IO(io)); + } + Err(e) => { + let _ = std::mem::replace($field, $saved_state); + return Err(e); + } + } + }; +} + #[derive(Debug, PartialEq)] pub enum SeekResult { /// Record matching the [SeekOp] found in the B-tree and cursor was positioned to point onto that record diff --git a/core/util.rs b/core/util.rs index 097139d18..b259a90f3 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,14 +1,16 @@ #![allow(unused)] +use crate::incremental::view::IncrementalView; use crate::translate::expr::WalkControl; use crate::types::IOResult; use crate::{ - schema::{self, Column, MaterializedViewsMap, Schema, Type}, + schema::{self, BTreeTable, Column, Schema, Table, Type, DBSP_TABLE_PREFIX}, translate::{collate::CollationSeq, expr::walk_expr, plan::JoinOrderMember}, types::{Value, ValueType}, LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, }; use crate::{Connection, IO}; use std::{ + collections::HashMap, rc::Rc, sync::{Arc, Mutex}, }; @@ -148,7 +150,7 @@ pub fn parse_schema_rows( schema: &mut Schema, syms: &SymbolTable, mv_tx_id: Option, - mut existing_views: MaterializedViewsMap, + mut existing_views: HashMap>>, ) -> Result<()> { rows.set_mv_tx_id(mv_tx_id); // TODO: if we IO, this unparsed indexes is lost. Will probably need some state between @@ -156,8 +158,12 @@ pub fn parse_schema_rows( let mut from_sql_indexes = Vec::with_capacity(10); let mut automatic_indices = std::collections::HashMap::with_capacity(10); - // Collect views for second pass to populate table_to_views mapping - let mut views_to_process: Vec<(String, Vec)> = Vec::new(); + // Store DBSP state table root pages: view_name -> dbsp_state_root_page + let mut dbsp_state_roots: std::collections::HashMap = + std::collections::HashMap::new(); + // Store materialized view info (SQL and root page) for later creation + let mut materialized_view_info: std::collections::HashMap = + std::collections::HashMap::new(); loop { match rows.step()? { StepResult::Row => { @@ -189,6 +195,18 @@ pub fn parse_schema_rows( schema.add_virtual_table(vtab); } else { let table = schema::BTreeTable::from_sql(sql, root_page as usize)?; + + // Check if this is a DBSP state table + if table.name.starts_with(DBSP_TABLE_PREFIX) { + // Extract the view name from __turso_internal_dbsp_state_ + let view_name = table + .name + .strip_prefix(DBSP_TABLE_PREFIX) + .unwrap() + .to_string(); + dbsp_state_roots.insert(view_name, root_page as usize); + } + schema.add_btree_table(Arc::new(table)); } } @@ -228,6 +246,7 @@ pub fn parse_schema_rows( use turso_parser::parser::Parser; let name: &str = row.get::<&str>(1)?; + let root_page = row.get::(3)?; let sql: &str = row.get::<&str>(4)?; let view_name = name.to_string(); @@ -236,52 +255,17 @@ pub fn parse_schema_rows( if let Ok(Some(Cmd::Stmt(stmt))) = parser.next_cmd() { match stmt { Stmt::CreateMaterializedView { .. } => { - // Handle materialized view with potential reuse - let should_create_new = if let Some(existing_view) = - existing_views.remove(&view_name) - { - // Check if we can reuse this view (same SQL definition) - let can_reuse = if let Ok(view_guard) = existing_view.lock() - { - view_guard.has_same_sql(sql) - } else { - false - }; + // Store materialized view info for later creation + // We'll handle reuse logic and create the actual IncrementalView + // in a later pass when we have both the main root page and DBSP state root + materialized_view_info.insert( + view_name.clone(), + (sql.to_string(), root_page as usize), + ); - if can_reuse { - // Reuse the existing view - it's already populated! - let referenced_tables = - if let Ok(view_guard) = existing_view.lock() { - view_guard.get_referenced_table_names() - } else { - vec![] - }; - - // Add the existing view to the new schema - schema - .materialized_views - .insert(view_name.clone(), existing_view); - - // Store for second pass processing - views_to_process - .push((view_name.clone(), referenced_tables)); - false // Don't create new - } else { - true // SQL changed, need to create new - } - } else { - true // No existing view, need to create new - }; - - if should_create_new { - // Create a new IncrementalView - // If this fails, we should propagate the error so the transaction rolls back - let incremental_view = - IncrementalView::from_sql(sql, schema)?; - let referenced_tables = - incremental_view.get_referenced_table_names(); - schema.add_materialized_view(incremental_view); - views_to_process.push((view_name, referenced_tables)); + // Mark the existing view for potential reuse + if existing_views.contains_key(&view_name) { + // We'll check for reuse in the third pass } } Stmt::CreateView { @@ -359,11 +343,56 @@ pub fn parse_schema_rows( } } - // Second pass: populate table_to_views mapping - for (view_name, referenced_tables) in views_to_process { - // Register this view as dependent on each referenced table - for table_name in referenced_tables { - schema.add_materialized_view_dependency(&table_name, &view_name); + // Third pass: Create materialized views now that we have both root pages + for (view_name, (sql, main_root)) in materialized_view_info { + // Look up the DBSP state root for this view - must exist for materialized views + let dbsp_state_root = dbsp_state_roots.get(&view_name).ok_or_else(|| { + LimboError::InternalError(format!( + "Materialized view {view_name} is missing its DBSP state table" + )) + })?; + + // Check if we can reuse the existing view + let mut reuse_view = false; + if let Some(existing_view_mutex) = schema.get_materialized_view(&view_name) { + let existing_view = existing_view_mutex.lock().unwrap(); + if let Some(existing_sql) = schema.materialized_view_sql.get(&view_name) { + if existing_sql == &sql { + reuse_view = true; + } + } + } + + if reuse_view { + // View already exists with same SQL, just update dependencies + let existing_view_mutex = schema.get_materialized_view(&view_name).unwrap(); + let existing_view = existing_view_mutex.lock().unwrap(); + let referenced_tables = existing_view.get_referenced_table_names(); + drop(existing_view); // Release lock before modifying schema + for table_name in referenced_tables { + schema.add_materialized_view_dependency(&table_name, &view_name); + } + } else { + // Create new IncrementalView with both root pages + let incremental_view = + IncrementalView::from_sql(&sql, schema, main_root, *dbsp_state_root)?; + let referenced_tables = incremental_view.get_referenced_table_names(); + + // Create a Table for the materialized view + let table = Arc::new(schema::Table::BTree(Arc::new(schema::BTreeTable { + root_page: main_root, + name: view_name.clone(), + columns: incremental_view.columns.clone(), // Use the view's columns, not the base table's + primary_key_columns: vec![], + has_rowid: true, + is_strict: false, + unique_sets: None, + }))); + + schema.add_materialized_view(incremental_view, table, sql.clone()); + for table_name in referenced_tables { + schema.add_materialized_view_dependency(&table_name, &view_name); + } } } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index f661d6826..ffa26c03d 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -123,6 +123,10 @@ pub enum CursorType { Pseudo(PseudoCursorType), Sorter, VirtualTable(Arc), + MaterializedView( + Arc, + Arc>, + ), } impl CursorType { @@ -865,6 +869,7 @@ impl ProgramBuilder { let default = match cursor_type { CursorType::BTreeTable(btree) => &btree.columns[column].default, CursorType::BTreeIndex(index) => &index.columns[column].default, + CursorType::MaterializedView(btree, _) => &btree.columns[column].default, _ => break 'value None, }; diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7a989d0c5..4b0c40b58 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -953,11 +953,43 @@ pub fn op_open_read( let num_columns = match cursor_type { CursorType::BTreeTable(table_rc) => table_rc.columns.len(), CursorType::BTreeIndex(index_arc) => index_arc.columns.len(), + CursorType::MaterializedView(table_rc, _) => table_rc.columns.len(), _ => unreachable!("This should not have happened"), }; match cursor_type { + CursorType::MaterializedView(_, view_mutex) => { + // This is a materialized view with storage + // Create btree cursor for reading the persistent data + let btree_cursor = Box::new(BTreeCursor::new_table( + mv_cursor, + pager.clone(), + *root_page, + num_columns, + )); + + // Get the view name and look up or create its transaction state + let view_name = view_mutex.lock().unwrap().name().to_string(); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(&view_name); + + // Create materialized view cursor with this view's transaction state + let mv_cursor = crate::incremental::cursor::MaterializedViewCursor::new( + btree_cursor, + view_mutex.clone(), + pager.clone(), + tx_state, + )?; + + cursors + .get_mut(*cursor_id) + .unwrap() + .replace(Cursor::new_materialized_view(mv_cursor)); + } CursorType::BTreeTable(_) => { + // Regular table let cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), *root_page, num_columns); cursors .get_mut(*cursor_id) @@ -1282,10 +1314,18 @@ pub fn op_rewind( ); assert!(pc_if_empty.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Rewind"); - let cursor = cursor.as_btree_mut(); - return_if_io!(cursor.rewind()); - cursor.is_empty() + let mut cursor = state.get_cursor(*cursor_id); + match &mut *cursor { + Cursor::BTree(btree_cursor) => { + return_if_io!(btree_cursor.rewind()); + btree_cursor.is_empty() + } + Cursor::MaterializedView(mv_cursor) => { + return_if_io!(mv_cursor.rewind()); + !mv_cursor.is_valid()? + } + _ => panic!("Rewind on non-btree/materialized-view cursor"), + } }; if is_empty { state.pc = pc_if_empty.as_offset_int(); @@ -1430,17 +1470,43 @@ pub fn op_column( } => { { let mut table_cursor = state.get_cursor(table_cursor_id); - let table_cursor = table_cursor.as_btree_mut(); - return_if_io!( - table_cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true }) - ); + // MaterializedView cursors shouldn't go through deferred seek logic + // but if we somehow get here, handle it appropriately + match &mut *table_cursor { + Cursor::MaterializedView(mv_cursor) => { + // Seek to the rowid in the materialized view + return_if_io!(mv_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + } + _ => { + // Regular btree cursor + let table_cursor = table_cursor.as_btree_mut(); + return_if_io!(table_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + } + } } state.op_column_state = OpColumnState::GetColumn; } OpColumnState::GetColumn => { + // First check if this is a MaterializedViewCursor + { + let mut cursor = state.get_cursor(*cursor_id); + if let Cursor::MaterializedView(mv_cursor) = &mut *cursor { + // Handle materialized view column access + let value = return_if_io!(mv_cursor.column(*column)); + drop(cursor); + state.registers[*dest] = Register::Value(value); + break 'outer; + } + // Fall back to normal handling + } + let (_, cursor_type) = program.cursor_ref.get(*cursor_id).unwrap(); match cursor_type { - CursorType::BTreeTable(_) | CursorType::BTreeIndex(_) => { + CursorType::BTreeTable(_) + | CursorType::BTreeIndex(_) + | CursorType::MaterializedView(_, _) => { 'ifnull: { let mut cursor_ref = must_be_btree_cursor!( *cursor_id, @@ -1843,12 +1909,19 @@ pub fn op_next( ); assert!(pc_if_next.is_offset()); let is_empty = { - let mut cursor = must_be_btree_cursor!(*cursor_id, program.cursor_ref, state, "Next"); - let cursor = cursor.as_btree_mut(); - cursor.set_null_flag(false); - return_if_io!(cursor.next()); - - cursor.is_empty() + let mut cursor = state.get_cursor(*cursor_id); + match &mut *cursor { + Cursor::BTree(btree_cursor) => { + btree_cursor.set_null_flag(false); + return_if_io!(btree_cursor.next()); + btree_cursor.is_empty() + } + Cursor::MaterializedView(mv_cursor) => { + let has_more = return_if_io!(mv_cursor.next()); + !has_more + } + _ => panic!("Next on non-btree/materialized-view cursor"), + } }; if !is_empty { // Increment metrics for row read @@ -2444,9 +2517,18 @@ pub fn op_row_id( } else { state.registers[*dest] = Register::Value(Value::Null); } + } else if let Some(Cursor::MaterializedView(mv_cursor)) = + cursors.get_mut(*cursor_id).unwrap() + { + if let Some(rowid) = return_if_io!(mv_cursor.rowid()) { + state.registers[*dest] = Register::Value(Value::Integer(rowid)); + } else { + state.registers[*dest] = Register::Value(Value::Null); + } } else { return Err(LimboError::InternalError( - "RowId: cursor is not a table or virtual cursor".to_string(), + "RowId: cursor is not a table, virtual, or materialized view cursor" + .to_string(), )); } break; @@ -2497,40 +2579,67 @@ pub fn op_seek_rowid( assert!(target_pc.is_offset()); let (pc, did_seek) = { let mut cursor = state.get_cursor(*cursor_id); - let cursor = cursor.as_btree_mut(); - let rowid = match state.registers[*src_reg].get_value() { - Value::Integer(rowid) => Some(*rowid), - Value::Null => None, - // For non-integer values try to apply affinity and convert them to integer. - other => { - let mut temp_reg = Register::Value(other.clone()); - let converted = apply_affinity_char(&mut temp_reg, Affinity::Numeric); - if converted { - match temp_reg.get_value() { - Value::Integer(i) => Some(*i), - Value::Float(f) => Some(*f as i64), - _ => unreachable!("apply_affinity_char with Numeric should produce an integer if it returns true"), + + // Handle MaterializedView cursor + let (pc, did_seek) = match &mut *cursor { + Cursor::MaterializedView(mv_cursor) => { + let rowid = match state.registers[*src_reg].get_value() { + Value::Integer(rowid) => Some(*rowid), + Value::Null => None, + _ => None, + }; + + match rowid { + Some(rowid) => { + let seek_result = return_if_io!(mv_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + let pc = if !matches!(seek_result, SeekResult::Found) { + target_pc.as_offset_int() + } else { + state.pc + 1 + }; + (pc, true) } - } else { - None + None => (target_pc.as_offset_int(), false), } } - }; - - match rowid { - Some(rowid) => { - let seek_result = return_if_io!( - cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true }) - ); - let pc = if !matches!(seek_result, SeekResult::Found) { - target_pc.as_offset_int() - } else { - state.pc + 1 + Cursor::BTree(btree_cursor) => { + let rowid = match state.registers[*src_reg].get_value() { + Value::Integer(rowid) => Some(*rowid), + Value::Null => None, + // For non-integer values try to apply affinity and convert them to integer. + other => { + let mut temp_reg = Register::Value(other.clone()); + let converted = apply_affinity_char(&mut temp_reg, Affinity::Numeric); + if converted { + match temp_reg.get_value() { + Value::Integer(i) => Some(*i), + Value::Float(f) => Some(*f as i64), + _ => unreachable!("apply_affinity_char with Numeric should produce an integer if it returns true"), + } + } else { + None + } + } }; - (pc, true) + + match rowid { + Some(rowid) => { + let seek_result = return_if_io!(btree_cursor + .seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })); + let pc = if !matches!(seek_result, SeekResult::Found) { + target_pc.as_offset_int() + } else { + state.pc + 1 + }; + (pc, true) + } + None => (target_pc.as_offset_int(), false), + } } - None => (target_pc.as_offset_int(), false), - } + _ => panic!("SeekRowid on non-btree/materialized-view cursor"), + }; + (pc, did_seek) }; // Increment btree_seeks metric for SeekRowid operation after cursor is dropped if did_seek { @@ -5192,12 +5301,11 @@ pub fn op_insert( match &state.op_insert_state.sub_state { OpInsertSubState::MaybeCaptureRecord => { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); + let dependent_views = schema.get_dependent_materialized_views(table_name); // If there are no dependent views, we don't need to capture the old record. // We also don't need to do it if the rowid of the UPDATEd row was changed, because that means // we deleted it earlier and `op_delete` already captured the change. - if dependent_views.is_none() || flag.has(InsertFlags::UPDATE_ROWID_CHANGE) { + if dependent_views.is_empty() || flag.has(InsertFlags::UPDATE_ROWID_CHANGE) { if flag.has(InsertFlags::REQUIRE_SEEK) { state.op_insert_state.sub_state = OpInsertSubState::Seek; } else { @@ -5303,9 +5411,8 @@ pub fn op_insert( state.op_insert_state.sub_state = OpInsertSubState::UpdateLastRowid; } else { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - if dependent_views.is_some() { + let dependent_views = schema.get_dependent_materialized_views(table_name); + if !dependent_views.is_empty() { state.op_insert_state.sub_state = OpInsertSubState::ApplyViewChange; } else { break; @@ -5325,9 +5432,8 @@ pub fn op_insert( program.n_change.set(prev_changes + 1); } let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - if dependent_views.is_some() { + let dependent_views = schema.get_dependent_materialized_views(table_name); + if !dependent_views.is_empty() { state.op_insert_state.sub_state = OpInsertSubState::ApplyViewChange; continue; } @@ -5335,10 +5441,8 @@ pub fn op_insert( } OpInsertSubState::ApplyViewChange => { let schema = program.connection.schema.borrow(); - let dependent_views = - schema.get_dependent_materialized_views_unnormalized(table_name); - assert!(dependent_views.is_some()); - let dependent_views = dependent_views.unwrap(); + let dependent_views = schema.get_dependent_materialized_views(table_name); + assert!(!dependent_views.is_empty()); let (key, values) = { let mut cursor = state.get_cursor(*cursor_id); @@ -5383,17 +5487,22 @@ pub fn op_insert( (key, new_values) }; - let mut tx_states = program.connection.view_transaction_states.borrow_mut(); if let Some((key, values)) = state.op_insert_state.old_record.take() { for view_name in dependent_views.iter() { - let tx_state = tx_states.entry(view_name.clone()).or_default(); - tx_state.delta.delete(key, values.clone()); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(view_name); + tx_state.delete(key, values.clone()); } } for view_name in dependent_views.iter() { - let tx_state = tx_states.entry(view_name.clone()).or_default(); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(view_name); - tx_state.delta.insert(key, values.clone()); + tx_state.insert(key, values.clone()); } break; @@ -5522,10 +5631,12 @@ pub fn op_delete( assert!(!dependent_views.is_empty()); let maybe_deleted_record = state.op_delete_state.deleted_record.take(); if let Some((key, values)) = maybe_deleted_record { - let mut tx_states = program.connection.view_transaction_states.borrow_mut(); for view_name in dependent_views { - let tx_state = tx_states.entry(view_name.clone()).or_default(); - tx_state.delta.delete(key, values.clone()); + let tx_state = program + .connection + .view_transaction_states + .get_or_create(&view_name); + tx_state.delete(key, values.clone()); } } break; @@ -6232,7 +6343,10 @@ pub fn op_open_write( } else { let num_columns = match cursor_type { CursorType::BTreeTable(table_rc) => table_rc.columns.len(), - _ => unreachable!("Expected BTreeTable. This should not have happened."), + CursorType::MaterializedView(table_rc, _) => table_rc.columns.len(), + _ => unreachable!( + "Expected BTreeTable or MaterializedView. This should not have happened." + ), }; let cursor = @@ -6453,6 +6567,7 @@ pub fn op_parse_schema( }, insn ); + let conn = program.connection.clone(); // set auto commit to false in order for parse schema to not commit changes as transaction state is stored in connection, // and we use the same connection for nested query. @@ -6464,7 +6579,7 @@ pub fn op_parse_schema( conn.with_schema_mut(|schema| { // TODO: This function below is synchronous, make it async - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); conn.is_nested_stmt.set(true); parse_schema_rows( stmt, @@ -6479,7 +6594,7 @@ pub fn op_parse_schema( conn.with_schema_mut(|schema| { // TODO: This function below is synchronous, make it async - let existing_views = schema.materialized_views.clone(); + let existing_views = schema.incremental_views.clone(); conn.is_nested_stmt.set(true); parse_schema_rows( stmt, @@ -6500,14 +6615,75 @@ pub fn op_parse_schema( pub fn op_populate_materialized_views( program: &Program, state: &mut ProgramState, - _insn: &Insn, - _pager: &Rc, + insn: &Insn, + pager: &Rc, _mv_store: Option<&Arc>, ) -> Result { - let conn = program.connection.clone(); - let schema = conn.schema.borrow(); + load_insn!(PopulateMaterializedViews { cursors }, insn); + + let conn = program.connection.clone(); + + // For each view, get its cursor and root page + let mut view_info = Vec::new(); + { + let cursors_ref = state.cursors.borrow(); + for (view_name, cursor_id) in cursors { + // Get the cursor to find the root page + let cursor = cursors_ref + .get(*cursor_id) + .and_then(|c| c.as_ref()) + .ok_or_else(|| { + LimboError::InternalError(format!("Cursor {cursor_id} not found")) + })?; + + let root_page = match cursor { + crate::types::Cursor::BTree(btree_cursor) => btree_cursor.root_page(), + _ => { + return Err(LimboError::InternalError( + "Expected BTree cursor for materialized view".into(), + )) + } + }; + + view_info.push((view_name.clone(), root_page, *cursor_id)); + } + } + + // Now populate the views (after releasing the schema borrow) + for (view_name, _root_page, cursor_id) in view_info { + let schema = conn.schema.borrow(); + if let Some(view) = schema.get_materialized_view(&view_name) { + let mut view = view.lock().unwrap(); + // Drop the schema borrow before calling populate_from_table + drop(schema); + + // Get the cursor for writing + // Get a mutable reference to the cursor + let mut cursors_ref = state.cursors.borrow_mut(); + let cursor = cursors_ref + .get_mut(cursor_id) + .and_then(|c| c.as_mut()) + .ok_or_else(|| { + LimboError::InternalError(format!( + "Cursor {cursor_id} not found for population" + )) + })?; + + // Extract the BTreeCursor + let btree_cursor = match cursor { + crate::types::Cursor::BTree(btree_cursor) => btree_cursor, + _ => { + return Err(LimboError::InternalError( + "Expected BTree cursor for materialized view population".into(), + )) + } + }; + + // Now populate it with the cursor for writing + return_if_io!(view.populate_from_table(&conn, pager, btree_cursor.as_mut())); + } + } - return_if_io!(schema.populate_materialized_views(&conn)); // All views populated, advance to next instruction state.pc += 1; Ok(InsnFunctionStepResult::Step) @@ -6932,6 +7108,9 @@ pub fn op_open_ephemeral( CursorType::VirtualTable(_) => { panic!("OpenEphemeral on virtual table cursor, use Insn::VOpen instead"); } + CursorType::MaterializedView(_, _) => { + panic!("OpenEphemeral on materialized view cursor"); + } } state.pc += 1; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index fe3b23073..5f160c235 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -19,6 +19,7 @@ pub fn insn_to_str( CursorType::BTreeIndex(index) => &index.name, CursorType::Pseudo(_) => "pseudo", CursorType::VirtualTable(virtual_table) => &virtual_table.name, + CursorType::MaterializedView(table, _) => &table.name, CursorType::Sorter => "sorter", } }; @@ -541,6 +542,10 @@ pub fn insn_to_str( let name = &index.columns.get(*column).unwrap().name; Some(name) } + CursorType::MaterializedView(table, _) => { + let name = table.columns.get(*column).and_then(|v| v.name.as_ref()); + name + } CursorType::Pseudo(_) => None, CursorType::Sorter => None, CursorType::VirtualTable(v) => v.columns.get(*column).unwrap().name.as_ref(), @@ -1337,13 +1342,13 @@ pub fn insn_to_str( 0, where_clause.clone().unwrap_or("NULL".to_string()), ), - Insn::PopulateMaterializedViews => ( + Insn::PopulateMaterializedViews { cursors } => ( "PopulateMaterializedViews", 0, 0, 0, Value::Null, - 0, + cursors.len() as u16, "".to_string(), ), Insn::Prev { diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 799eb86b9..80a813caa 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -898,7 +898,12 @@ pub enum Insn { }, /// Populate all materialized views after schema parsing - PopulateMaterializedViews, + /// The cursors parameter contains a mapping of view names to cursor IDs that have been + /// opened to the view's btree for writing the materialized data + PopulateMaterializedViews { + /// Mapping of view name to cursor_id for writing to the view's btree + cursors: Vec<(String, usize)>, + }, /// Place the result of lhs >> rhs in dest register. ShiftRight { @@ -1190,7 +1195,7 @@ impl Insn { Insn::IsNull { .. } => execute::op_is_null, Insn::CollSeq { .. } => execute::op_coll_seq, Insn::ParseSchema { .. } => execute::op_parse_schema, - Insn::PopulateMaterializedViews => execute::op_populate_materialized_views, + Insn::PopulateMaterializedViews { .. } => execute::op_populate_materialized_views, Insn::ShiftRight { .. } => execute::op_shift_right, Insn::ShiftLeft { .. } => execute::op_shift_left, Insn::AddImm { .. } => execute::op_add_imm, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1c844e08a..0a5ea402d 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -69,6 +69,17 @@ use std::{ }; use tracing::{instrument, Level}; +/// State machine for committing view deltas with I/O handling +#[derive(Debug, Clone)] +pub enum ViewDeltaCommitState { + NotStarted, + Processing { + views: Vec, // view names (all materialized views have storage) + current_index: usize, + }, + Done, +} + /// We use labels to indicate that we want to jump to whatever the instruction offset /// will be at runtime, because the offset cannot always be determined when the jump /// instruction is created. @@ -284,6 +295,8 @@ pub struct ProgramState { current_collation: Option, op_column_state: OpColumnState, op_row_id_state: OpRowIdState, + /// State machine for committing view deltas with I/O handling + view_delta_state: ViewDeltaCommitState, } impl ProgramState { @@ -326,6 +339,7 @@ impl ProgramState { current_collation: None, op_column_state: OpColumnState::Start, op_row_id_state: OpRowIdState::Start, + view_delta_state: ViewDeltaCommitState::NotStarted, } } @@ -413,6 +427,7 @@ macro_rules! must_be_btree_cursor { let cursor = match cursor_type { CursorType::BTreeTable(_) => $state.get_cursor($cursor_id), CursorType::BTreeIndex(_) => $state.get_cursor($cursor_id), + CursorType::MaterializedView(_, _) => $state.get_cursor($cursor_id), CursorType::Pseudo(_) => panic!("{} on pseudo cursor", $insn_name), CursorType::Sorter => panic!("{} on sorter cursor", $insn_name), CursorType::VirtualTable(_) => panic!("{} on virtual table cursor", $insn_name), @@ -518,20 +533,97 @@ impl Program { } #[instrument(skip_all, level = Level::DEBUG)] - fn apply_view_deltas(&self, rollback: bool) { - if self.connection.view_transaction_states.borrow().is_empty() { - return; - } + fn apply_view_deltas( + &self, + state: &mut ProgramState, + rollback: bool, + pager: &Rc, + ) -> Result> { + use crate::types::IOResult; - let tx_states = self.connection.view_transaction_states.take(); + loop { + match &state.view_delta_state { + ViewDeltaCommitState::NotStarted => { + if self.connection.view_transaction_states.is_empty() { + return Ok(IOResult::Done(())); + } - if !rollback { - let schema = self.connection.schema.borrow(); + if rollback { + // On rollback, just clear and done + self.connection.view_transaction_states.clear(); + return Ok(IOResult::Done(())); + } - for (view_name, tx_state) in tx_states.iter() { - if let Some(view_mutex) = schema.get_materialized_view(view_name) { - let mut view = view_mutex.lock().unwrap(); - view.merge_delta(&tx_state.delta); + // Not a rollback - proceed with processing + let schema = self.connection.schema.borrow(); + + // Collect materialized views - they should all have storage + let mut views = Vec::new(); + for view_name in self.connection.view_transaction_states.get_view_names() { + if let Some(view_mutex) = schema.get_materialized_view(&view_name) { + let view = view_mutex.lock().unwrap(); + let root_page = view.get_root_page(); + + // Materialized views should always have storage (root_page != 0) + assert!( + root_page != 0, + "Materialized view '{view_name}' should have a root page" + ); + + views.push(view_name); + } + } + + state.view_delta_state = ViewDeltaCommitState::Processing { + views, + current_index: 0, + }; + } + + ViewDeltaCommitState::Processing { + views, + current_index, + } => { + // At this point we know it's not a rollback + if *current_index >= views.len() { + // All done, clear the transaction states + self.connection.view_transaction_states.clear(); + state.view_delta_state = ViewDeltaCommitState::Done; + return Ok(IOResult::Done(())); + } + + let view_name = &views[*current_index]; + + let delta = self + .connection + .view_transaction_states + .get(view_name) + .unwrap() + .get_delta(); + + let schema = self.connection.schema.borrow(); + if let Some(view_mutex) = schema.get_materialized_view(view_name) { + let mut view = view_mutex.lock().unwrap(); + + // Handle I/O from merge_delta - pass pager, circuit will create its own cursor + match view.merge_delta(&delta, pager.clone())? { + IOResult::Done(_) => { + // Move to next view + state.view_delta_state = ViewDeltaCommitState::Processing { + views: views.clone(), + current_index: current_index + 1, + }; + } + IOResult::IO(io) => { + // Return I/O, will resume at same index + return Ok(IOResult::IO(io)); + } + } + } + } + + ViewDeltaCommitState::Done => { + return Ok(IOResult::Done(())); } } } @@ -544,7 +636,14 @@ impl Program { mv_store: Option<&Arc>, rollback: bool, ) -> Result> { - self.apply_view_deltas(rollback); + // Apply view deltas with I/O handling + match self.apply_view_deltas(program_state, rollback, &pager)? { + IOResult::IO(io) => return Ok(IOResult::IO(io)), + IOResult::Done(_) => {} + } + + // Reset state for next use + program_state.view_delta_state = ViewDeltaCommitState::NotStarted; if self.connection.transaction_state.get() == TransactionState::None && mv_store.is_none() { // No need to do any work here if not in tx. Current MVCC logic doesn't work with this assumption, diff --git a/core/vtab.rs b/core/vtab.rs index 61db382ba..fc511ba40 100644 --- a/core/vtab.rs +++ b/core/vtab.rs @@ -6,7 +6,7 @@ use crate::{Connection, LimboError, SymbolTable, Value}; use std::ffi::c_void; use std::ptr::NonNull; use std::rc::Rc; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use turso_ext::{ConstraintInfo, IndexInfo, OrderByInfo, ResultCode, VTabKind, VTabModuleImpl}; use turso_parser::{ast, parser::Parser}; @@ -14,7 +14,6 @@ use turso_parser::{ast, parser::Parser}; pub(crate) enum VirtualTableType { Pragma(PragmaVirtualTable), External(ExtVirtualTable), - View(crate::vtab_view::ViewVirtualTable), } #[derive(Clone, Debug)] @@ -30,7 +29,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => true, VirtualTableType::External(table) => table.readonly(), - VirtualTableType::View(_) => true, } } @@ -88,21 +86,6 @@ impl VirtualTable { Ok(Arc::new(vtab)) } - /// Create a virtual table for a view - pub(crate) fn view( - view_name: &str, - columns: Vec, - view: Arc>, - ) -> crate::Result> { - let vtab = VirtualTable { - name: view_name.to_owned(), - columns, - kind: VTabKind::VirtualTable, - vtab_type: VirtualTableType::View(crate::vtab_view::ViewVirtualTable { view }), - }; - Ok(Arc::new(vtab)) - } - fn resolve_columns(schema: String) -> crate::Result> { let mut parser = Parser::new(schema.as_bytes()); if let ast::Cmd::Stmt(ast::Stmt::CreateTable { body, .. }) = parser.next_cmd()?.ok_or( @@ -124,9 +107,6 @@ impl VirtualTable { VirtualTableType::External(table) => { Ok(VirtualTableCursor::External(table.open(conn.clone())?)) } - VirtualTableType::View(table) => { - Ok(VirtualTableCursor::View(Box::new(table.open(conn)?))) - } } } @@ -134,7 +114,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => Err(LimboError::ReadOnly), VirtualTableType::External(table) => table.update(args), - VirtualTableType::View(_) => Err(LimboError::ReadOnly), } } @@ -142,7 +121,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(_) => Ok(()), VirtualTableType::External(table) => table.destroy(), - VirtualTableType::View(_) => Ok(()), } } @@ -154,7 +132,6 @@ impl VirtualTable { match &self.vtab_type { VirtualTableType::Pragma(table) => table.best_index(constraints), VirtualTableType::External(table) => table.best_index(constraints, order_by), - VirtualTableType::View(view) => view.best_index(), } } } @@ -162,7 +139,6 @@ impl VirtualTable { pub enum VirtualTableCursor { Pragma(Box), External(ExtVirtualTableCursor), - View(Box), } impl VirtualTableCursor { @@ -170,7 +146,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.next(), VirtualTableCursor::External(cursor) => cursor.next(), - VirtualTableCursor::View(cursor) => cursor.next(), } } @@ -178,7 +153,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.rowid(), VirtualTableCursor::External(cursor) => cursor.rowid(), - VirtualTableCursor::View(cursor) => cursor.rowid(), } } @@ -186,7 +160,6 @@ impl VirtualTableCursor { match self { VirtualTableCursor::Pragma(cursor) => cursor.column(column), VirtualTableCursor::External(cursor) => cursor.column(column), - VirtualTableCursor::View(cursor) => cursor.column(column), } } @@ -202,7 +175,6 @@ impl VirtualTableCursor { VirtualTableCursor::External(cursor) => { cursor.filter(idx_num, idx_str, arg_count, args) } - VirtualTableCursor::View(cursor) => cursor.filter(args), } } } diff --git a/core/vtab_view.rs b/core/vtab_view.rs deleted file mode 100644 index 4b44f0592..000000000 --- a/core/vtab_view.rs +++ /dev/null @@ -1,101 +0,0 @@ -use crate::incremental::view::IncrementalView; -use crate::{Connection, LimboError, Value, VirtualTable}; -use std::sync::{Arc, Mutex}; - -/// Create a virtual table wrapper for a view -pub fn create_view_virtual_table( - view_name: &str, - view: Arc>, -) -> crate::Result> { - // Use the VirtualTable::view method we added - let view_locked = view.lock().map_err(|_| { - LimboError::InternalError("Failed to lock view for virtual table creation".to_string()) - })?; - let columns = view_locked.columns.clone(); - drop(view_locked); // Release the lock before passing the Arc - VirtualTable::view(view_name, columns, view) -} - -/// Virtual table wrapper for incremental views -#[derive(Clone, Debug)] -pub struct ViewVirtualTable { - pub view: Arc>, -} - -impl ViewVirtualTable { - pub fn best_index(&self) -> Result { - // Views don't use indexes - return a simple index info - Ok(turso_ext::IndexInfo { - idx_num: 0, - idx_str: None, - order_by_consumed: false, - estimated_cost: 1000000.0, - estimated_rows: 1000, - constraint_usages: Vec::new(), - }) - } - - pub fn open(&self, conn: Arc) -> crate::Result { - // Views are now populated during schema parsing (in parse_schema_rows) - // so we just get the current data from the view. - - let view = self.view.lock().map_err(|_| { - LimboError::InternalError("Failed to lock view for reading".to_string()) - })?; - - let tx_states = conn.view_transaction_states.borrow(); - let tx_state = tx_states.get(view.name()); - - let data: Vec<(i64, Vec)> = view.current_data(tx_state); - Ok(ViewVirtualTableCursor { - data, - current_pos: 0, - }) - } -} - -/// Cursor for iterating over view data -pub struct ViewVirtualTableCursor { - data: Vec<(i64, Vec)>, - current_pos: usize, -} - -impl ViewVirtualTableCursor { - pub fn next(&mut self) -> crate::Result { - if self.current_pos < self.data.len() { - self.current_pos += 1; - Ok(self.current_pos < self.data.len()) - } else { - Ok(false) - } - } - - pub fn rowid(&self) -> i64 { - if self.current_pos < self.data.len() { - self.data[self.current_pos].0 - } else { - -1 - } - } - - pub fn column(&self, column: usize) -> crate::Result { - if self.current_pos >= self.data.len() { - return Ok(Value::Null); - } - - let (_row_key, values) = &self.data[self.current_pos]; - - // Return the value at the requested column index - if let Some(value) = values.get(column) { - Ok(value.clone()) - } else { - Ok(Value::Null) - } - } - - pub fn filter(&mut self, _args: Vec) -> crate::Result { - // Reset to beginning for new filter - self.current_pos = 0; - Ok(!self.data.is_empty()) - } -}