use super::compiler::{DbspCircuit, DbspCompiler, DeltaSet}; use super::dbsp::Delta; use super::operator::{ComputationTracker, FilterPredicate}; use crate::schema::{BTreeTable, Column, Schema}; use crate::storage::btree::BTreeCursor; use crate::translate::logical::LogicalPlanBuilder; use crate::types::{IOResult, Value}; use crate::util::extract_view_columns; use crate::{return_if_io, LimboError, Pager, Result, Statement}; use std::cell::RefCell; use std::collections::HashMap; use std::fmt; use std::rc::Rc; use std::sync::{Arc, Mutex}; use turso_parser::ast; use turso_parser::{ ast::{Cmd, Stmt}, parser::Parser, }; /// State machine for populating a view from its source table pub enum PopulateState { /// Initial state - need to prepare the query Start, /// Actively processing rows from the query Processing { stmt: Box, rows_processed: usize, /// If we're in the middle of processing a row (merge_delta returned I/O) pending_row: Option<(i64, Vec)>, // (rowid, values) }, /// Population complete Done, } /// State machine for merge_delta to handle I/O operations impl fmt::Debug for PopulateState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { PopulateState::Start => write!(f, "Start"), PopulateState::Processing { rows_processed, pending_row, .. } => f .debug_struct("Processing") .field("rows_processed", rows_processed) .field("has_pending", &pending_row.is_some()) .finish(), PopulateState::Done => write!(f, "Done"), } } } /// Per-connection transaction state for incremental views #[derive(Debug, Clone, Default)] pub struct ViewTransactionState { // Per-table deltas for uncommitted changes // Maps table_name -> Delta for that table // Using RefCell for interior mutability table_deltas: RefCell>, } impl ViewTransactionState { /// Create a new transaction state pub fn new() -> Self { Self { table_deltas: RefCell::new(HashMap::new()), } } /// Insert a row into the delta for a specific table pub fn insert(&self, table_name: &str, key: i64, values: Vec) { let mut deltas = self.table_deltas.borrow_mut(); let delta = deltas.entry(table_name.to_string()).or_default(); delta.insert(key, values); } /// Delete a row from the delta for a specific table pub fn delete(&self, table_name: &str, key: i64, values: Vec) { let mut deltas = self.table_deltas.borrow_mut(); let delta = deltas.entry(table_name.to_string()).or_default(); delta.delete(key, values); } /// Clear all changes in the delta pub fn clear(&self) { self.table_deltas.borrow_mut().clear(); } /// Get deltas organized by table pub fn get_table_deltas(&self) -> HashMap { self.table_deltas.borrow().clone() } /// Check if the delta is empty pub fn is_empty(&self) -> bool { self.table_deltas.borrow().values().all(|d| d.is_empty()) } /// Returns how many elements exist in the delta. pub fn len(&self) -> usize { self.table_deltas.borrow().values().map(|d| d.len()).sum() } } /// Container for all view transaction states within a connection /// Provides interior mutability for the map of view states #[derive(Debug, Clone, Default)] pub struct AllViewsTxState { states: Rc>>>, } impl AllViewsTxState { /// Create a new container for view transaction states pub fn new() -> Self { Self { states: Rc::new(RefCell::new(HashMap::new())), } } /// Get or create a transaction state for a view pub fn get_or_create(&self, view_name: &str) -> Rc { let mut states = self.states.borrow_mut(); states .entry(view_name.to_string()) .or_insert_with(|| Rc::new(ViewTransactionState::new())) .clone() } /// Get a transaction state for a view if it exists pub fn get(&self, view_name: &str) -> Option> { self.states.borrow().get(view_name).cloned() } /// Clear all transaction states pub fn clear(&self) { self.states.borrow_mut().clear(); } /// Check if there are no transaction states pub fn is_empty(&self) -> bool { self.states.borrow().is_empty() } /// Get all view names that have transaction states pub fn get_view_names(&self) -> Vec { self.states.borrow().keys().cloned().collect() } } /// Incremental view that maintains its state through a DBSP circuit /// /// This version keeps everything in-memory. This is acceptable for small views, since DBSP /// doesn't have to track the history of changes. Still for very large views (think of the result /// of create view v as select * from tbl where x > 1; and that having 1B values. /// /// We should have a version of this that materializes the results. Materializing will also be good /// for large aggregations, because then we don't have to re-compute when opening the database /// again. /// /// Uses DBSP circuits for incremental computation. #[derive(Debug)] pub struct IncrementalView { name: String, // WHERE clause predicate for filtering (kept for compatibility) pub where_predicate: FilterPredicate, // The SELECT statement that defines how to transform input data pub select_stmt: ast::Select, // DBSP circuit that encapsulates the computation circuit: DbspCircuit, // All tables referenced by this view (from FROM clause and JOINs) referenced_tables: Vec>, // The view's output columns with their types pub columns: Vec, // State machine for population populate_state: PopulateState, // Computation tracker for statistics // We will use this one day to export rows_read, but for now, will just test that we're doing the expected amount of compute #[cfg_attr(not(test), allow(dead_code))] pub tracker: Arc>, // Root page of the btree storing the materialized state (0 for unmaterialized) root_page: usize, } impl IncrementalView { /// Validate that a CREATE MATERIALIZED VIEW statement can be handled by IncrementalView /// This should be called early, before updating sqlite_master pub fn can_create_view(select: &ast::Select) -> Result<()> { // Check for JOINs let (join_tables, join_condition) = Self::extract_join_info(select); if join_tables.is_some() || join_condition.is_some() { return Err(LimboError::ParseError( "JOINs in views are not yet supported".to_string(), )); } Ok(()) } /// Try to compile the SELECT statement into a DBSP circuit fn try_compile_circuit( select: &ast::Select, schema: &Schema, main_data_root: usize, internal_state_root: usize, ) -> Result { // Build the logical plan from the SELECT statement let mut builder = LogicalPlanBuilder::new(schema); // Convert Select to a Stmt for the builder let stmt = ast::Stmt::Select(select.clone()); let logical_plan = builder.build_statement(&stmt)?; // Compile the logical plan to a DBSP circuit with the storage roots let compiler = DbspCompiler::new(main_data_root, internal_state_root); let circuit = compiler.compile(&logical_plan)?; Ok(circuit) } /// Get an iterator over column names, using enumerated naming for unnamed columns pub fn column_names(&self) -> impl Iterator + '_ { self.columns.iter().enumerate().map(|(i, col)| { col.name .clone() .unwrap_or_else(|| format!("column{}", i + 1)) }) } /// Check if this view has the same SQL definition as the provided SQL string pub fn has_same_sql(&self, sql: &str) -> bool { // Parse the SQL to extract just the SELECT statement if let Ok(Some(Cmd::Stmt(Stmt::CreateMaterializedView { select, .. }))) = Parser::new(sql.as_bytes()).next_cmd() { // Compare the SELECT statements as SQL strings return self.select_stmt == select; } false } /// Validate a SELECT statement and extract the columns it would produce /// This is used during CREATE MATERIALIZED VIEW to validate the view before storing it pub fn validate_and_extract_columns( select: &ast::Select, schema: &Schema, ) -> Result> { // For now, just extract columns from a simple select // This will need to be expanded to handle joins, aggregates, etc. // Get the base table name let base_table_name = Self::extract_base_table(select).ok_or_else(|| { LimboError::ParseError("Cannot extract base table from SELECT".to_string()) })?; // Get the table from schema let table = schema .get_table(&base_table_name) .and_then(|t| t.btree()) .ok_or_else(|| LimboError::ParseError(format!("Table {base_table_name} not found")))?; // For now, return all columns from the base table // In the future, this should parse the select list and handle projections Ok(table.columns.clone()) } pub fn from_sql( sql: &str, schema: &Schema, main_data_root: usize, internal_state_root: usize, ) -> Result { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next_cmd()?; let cmd = cmd.expect("View is an empty statement"); match cmd { Cmd::Stmt(Stmt::CreateMaterializedView { if_not_exists: _, view_name, columns: _, select, }) => IncrementalView::from_stmt( view_name, select, schema, main_data_root, internal_state_root, ), _ => Err(LimboError::ParseError(format!( "View is not a CREATE MATERIALIZED VIEW statement: {sql}" ))), } } pub fn from_stmt( view_name: ast::QualifiedName, select: ast::Select, schema: &Schema, main_data_root: usize, internal_state_root: usize, ) -> Result { let name = view_name.name.as_str().to_string(); let where_predicate = FilterPredicate::from_select(&select)?; // Extract output columns using the shared function let view_columns = extract_view_columns(&select, schema); let (join_tables, join_condition) = Self::extract_join_info(&select); if join_tables.is_some() || join_condition.is_some() { return Err(LimboError::ParseError( "JOINs in views are not yet supported".to_string(), )); } // Get all tables from FROM clause and JOINs let referenced_tables = Self::extract_all_tables(&select, schema)?; Self::new( name, where_predicate, select.clone(), referenced_tables, view_columns, schema, main_data_root, internal_state_root, ) } #[allow(clippy::too_many_arguments)] pub fn new( name: String, where_predicate: FilterPredicate, select_stmt: ast::Select, referenced_tables: Vec>, columns: Vec, schema: &Schema, main_data_root: usize, internal_state_root: usize, ) -> Result { // Create the tracker that will be shared by all operators let tracker = Arc::new(Mutex::new(ComputationTracker::new())); // Compile the SELECT statement into a DBSP circuit let circuit = Self::try_compile_circuit(&select_stmt, schema, main_data_root, internal_state_root)?; Ok(Self { name, where_predicate, select_stmt, circuit, referenced_tables, columns, populate_state: PopulateState::Start, tracker, root_page: main_data_root, }) } pub fn name(&self) -> &str { &self.name } /// Execute the circuit with uncommitted changes to get processed delta pub fn execute_with_uncommitted( &mut self, uncommitted: DeltaSet, pager: Rc, execute_state: &mut crate::incremental::compiler::ExecuteState, ) -> crate::Result> { // Initialize execute_state with the input data *execute_state = crate::incremental::compiler::ExecuteState::Init { input_data: uncommitted, }; self.circuit.execute(pager, execute_state) } /// Get the root page for this materialized view's btree pub fn get_root_page(&self) -> usize { self.root_page } /// Get all table names referenced by this view pub fn get_referenced_table_names(&self) -> Vec { self.referenced_tables .iter() .map(|t| t.name.clone()) .collect() } /// Get all tables referenced by this view pub fn get_referenced_tables(&self) -> Vec> { self.referenced_tables.clone() } /// Extract all table names from a SELECT statement (including JOINs) fn extract_all_tables(select: &ast::Select, schema: &Schema) -> Result>> { let mut tables = Vec::new(); if let ast::OneSelect::Select { from: Some(ref from), .. } = select.body.select { // Get the main table from FROM clause if let ast::SelectTable::Table(name, _, _) = from.select.as_ref() { let table_name = name.name.as_str(); if let Some(table) = schema.get_btree_table(table_name) { tables.push(table.clone()); } else { return Err(LimboError::ParseError(format!( "Table '{table_name}' not found in schema" ))); } } // Get all tables from JOIN clauses for join in &from.joins { if let ast::SelectTable::Table(name, _, _) = join.table.as_ref() { let table_name = name.name.as_str(); if let Some(table) = schema.get_btree_table(table_name) { tables.push(table.clone()); } else { return Err(LimboError::ParseError(format!( "Table '{table_name}' not found in schema" ))); } } } } if tables.is_empty() { return Err(LimboError::ParseError( "No tables found in SELECT statement".to_string(), )); } Ok(tables) } /// Extract the base table name from a SELECT statement (for non-join cases) fn extract_base_table(select: &ast::Select) -> Option { if let ast::OneSelect::Select { from: Some(ref from), .. } = select.body.select { if let ast::SelectTable::Table(name, _, _) = from.select.as_ref() { return Some(name.name.as_str().to_string()); } } None } /// Generate the SQL query for populating the view from its source table fn sql_for_populate(&self) -> crate::Result { // Get the first table from referenced tables if self.referenced_tables.is_empty() { return Err(LimboError::ParseError( "No tables to populate from".to_string(), )); } let table = &self.referenced_tables[0]; // Check if the table has a rowid alias (INTEGER PRIMARY KEY column) let has_rowid_alias = table.columns.iter().any(|col| col.is_rowid_alias); // For now, select all columns since we don't have the static operators // The circuit will handle filtering and projection // If there's a rowid alias, we don't need to select rowid separately let select_clause = if has_rowid_alias { "*".to_string() } else { "*, rowid".to_string() }; // Build WHERE clause from the where_predicate let where_clause = self.build_where_clause(&self.where_predicate)?; // Construct the final query let query = if where_clause.is_empty() { format!("SELECT {} FROM {}", select_clause, table.name) } else { format!( "SELECT {} FROM {} WHERE {}", select_clause, table.name, where_clause ) }; Ok(query) } /// Build a WHERE clause from a FilterPredicate fn build_where_clause(&self, predicate: &FilterPredicate) -> crate::Result { match predicate { FilterPredicate::None => Ok(String::new()), FilterPredicate::Equals { column, value } => { Ok(format!("{} = {}", column, self.value_to_sql(value))) } FilterPredicate::NotEquals { column, value } => { Ok(format!("{} != {}", column, self.value_to_sql(value))) } FilterPredicate::GreaterThan { column, value } => { Ok(format!("{} > {}", column, self.value_to_sql(value))) } FilterPredicate::GreaterThanOrEqual { column, value } => { Ok(format!("{} >= {}", column, self.value_to_sql(value))) } FilterPredicate::LessThan { column, value } => { Ok(format!("{} < {}", column, self.value_to_sql(value))) } FilterPredicate::LessThanOrEqual { column, value } => { Ok(format!("{} <= {}", column, self.value_to_sql(value))) } FilterPredicate::And(left, right) => { let left_clause = self.build_where_clause(left)?; let right_clause = self.build_where_clause(right)?; Ok(format!("({left_clause} AND {right_clause})")) } FilterPredicate::Or(left, right) => { let left_clause = self.build_where_clause(left)?; let right_clause = self.build_where_clause(right)?; Ok(format!("({left_clause} OR {right_clause})")) } } } /// Convert a Value to SQL literal representation fn value_to_sql(&self, value: &Value) -> String { match value { Value::Null => "NULL".to_string(), Value::Integer(i) => i.to_string(), Value::Float(f) => f.to_string(), Value::Text(t) => format!("'{}'", t.as_str().replace('\'', "''")), Value::Blob(_) => "NULL".to_string(), // Blob literals not supported in WHERE clause yet } } /// Populate the view by scanning the source table using a state machine /// This can be called multiple times and will resume from where it left off /// This method is only for materialized views and will persist data to the btree pub fn populate_from_table( &mut self, conn: &std::sync::Arc, pager: &std::rc::Rc, _btree_cursor: &mut BTreeCursor, ) -> crate::Result> { // If already populated, return immediately if matches!(self.populate_state, PopulateState::Done) { return Ok(IOResult::Done(())); } // Assert that this is a materialized view with a root page assert!( self.root_page != 0, "populate_from_table should only be called for materialized views with root_page" ); loop { // To avoid borrow checker issues, we need to handle state transitions carefully let needs_start = matches!(self.populate_state, PopulateState::Start); if needs_start { // Generate the SQL query for populating the view // It is best to use a standard query than a cursor for two reasons: // 1) Using a sql query will allow us to be much more efficient in cases where we only want // some rows, in particular for indexed filters // 2) There are two types of cursors: index and table. In some situations (like for example // if the table has an integer primary key), the key will be exclusively in the index // btree and not in the table btree. Using cursors would force us to be aware of this // distinction (and others), and ultimately lead to reimplementing the whole query // machinery (next step is which index is best to use, etc) let query = self.sql_for_populate()?; // Prepare the statement let stmt = conn.prepare(&query)?; self.populate_state = PopulateState::Processing { stmt: Box::new(stmt), rows_processed: 0, pending_row: None, }; // Continue to next state continue; } // Handle Done state if matches!(self.populate_state, PopulateState::Done) { return Ok(IOResult::Done(())); } // Handle Processing state - extract state to avoid borrow issues let (mut stmt, mut rows_processed, pending_row) = match std::mem::replace(&mut self.populate_state, PopulateState::Done) { PopulateState::Processing { stmt, rows_processed, pending_row, } => (stmt, rows_processed, pending_row), _ => unreachable!("We already handled Start and Done states"), }; // If we have a pending row from a previous I/O interruption, process it first if let Some((rowid, values)) = pending_row { // Create a single-row delta for the pending row let mut single_row_delta = Delta::new(); single_row_delta.insert(rowid, values.clone()); // Create a DeltaSet with this delta for the first table (for now) let mut delta_set = DeltaSet::new(); // TODO: When we support JOINs, determine which table this row came from delta_set.insert(self.referenced_tables[0].name.clone(), single_row_delta); // Process the pending row with the pager match self.merge_delta(delta_set, pager.clone())? { IOResult::Done(_) => { // Row processed successfully, continue to next row rows_processed += 1; // Continue to fetch next row from statement } IOResult::IO(io) => { // Still not done, save state with pending row self.populate_state = PopulateState::Processing { stmt, rows_processed, pending_row: Some((rowid, values)), // Keep the pending row }; return Ok(IOResult::IO(io)); } } } // Process rows one at a time - no batching loop { // This step() call resumes from where the statement left off match stmt.step()? { crate::vdbe::StepResult::Row => { // Get the row let row = stmt.row().unwrap(); // Extract values from the row let all_values: Vec = row.get_values().cloned().collect(); // Determine how to extract the rowid // If there's a rowid alias (INTEGER PRIMARY KEY), the rowid is one of the columns // Otherwise, it's the last value we explicitly selected let (rowid, values) = if let Some((idx, _)) = self.referenced_tables[0].get_rowid_alias_column() { // The rowid is the value at the rowid alias column index let rowid = match all_values.get(idx) { Some(crate::types::Value::Integer(id)) => *id, _ => { // This shouldn't happen - rowid alias must be an integer rows_processed += 1; continue; } }; // All values are table columns (no separate rowid was selected) (rowid, all_values) } else { // The last value is the explicitly selected rowid let rowid = match all_values.last() { Some(crate::types::Value::Integer(id)) => *id, _ => { // This shouldn't happen - rowid must be an integer rows_processed += 1; continue; } }; // Get all values except the rowid let values = all_values[..all_values.len() - 1].to_vec(); (rowid, values) }; // Create a single-row delta and process it immediately let mut single_row_delta = Delta::new(); single_row_delta.insert(rowid, values.clone()); // Create a DeltaSet with this delta for the first table (for now) let mut delta_set = DeltaSet::new(); // TODO: When we support JOINs, determine which table this row came from delta_set.insert(self.referenced_tables[0].name.clone(), single_row_delta); // Process this single row through merge_delta with the pager match self.merge_delta(delta_set, pager.clone())? { IOResult::Done(_) => { // Row processed successfully, continue to next row rows_processed += 1; } IOResult::IO(io) => { // Save state and return I/O // We'll resume at the SAME row when called again (don't increment rows_processed) // The circuit still has unfinished work for this row self.populate_state = PopulateState::Processing { stmt, rows_processed, // Don't increment - row not done yet! pending_row: Some((rowid, values)), // Save the row for resumption }; return Ok(IOResult::IO(io)); } } } crate::vdbe::StepResult::Done => { // All rows processed, we're done self.populate_state = PopulateState::Done; return Ok(IOResult::Done(())); } crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => { // Save state before returning error self.populate_state = PopulateState::Processing { stmt, rows_processed, pending_row: None, // No pending row when interrupted between rows }; return Err(LimboError::Busy); } crate::vdbe::StepResult::IO => { // Statement needs I/O - save state and return self.populate_state = PopulateState::Processing { stmt, rows_processed, pending_row: None, // No pending row when interrupted between rows }; // TODO: Get the actual I/O completion from the statement let completion = crate::io::Completion::new_dummy(); return Ok(IOResult::IO(crate::types::IOCompletions::Single( completion, ))); } } } } } /// Extract JOIN information from SELECT statement #[allow(clippy::type_complexity)] pub fn extract_join_info( select: &ast::Select, ) -> (Option<(String, String)>, Option<(String, String)>) { use turso_parser::ast::*; if let OneSelect::Select { from: Some(ref from), .. } = select.body.select { // Check if there are any joins if !from.joins.is_empty() { // Get the first (left) table name let left_table = match from.select.as_ref() { SelectTable::Table(name, _, _) => Some(name.name.as_str().to_string()), _ => None, }; // Get the first join (right) table and condition if let Some(first_join) = from.joins.first() { let right_table = match &first_join.table.as_ref() { SelectTable::Table(name, _, _) => Some(name.name.as_str().to_string()), _ => None, }; // Extract join condition (simplified - assumes single equality) let join_condition = if let Some(ref constraint) = &first_join.constraint { match constraint { JoinConstraint::On(expr) => Self::extract_join_columns_from_expr(expr), _ => None, } } else { None }; if let (Some(left), Some(right)) = (left_table, right_table) { return (Some((left, right)), join_condition); } } } } (None, None) } /// Extract join column names from a join condition expression fn extract_join_columns_from_expr(expr: &ast::Expr) -> Option<(String, String)> { use turso_parser::ast::*; // Look for expressions like: t1.col = t2.col if let Expr::Binary(left, op, right) = expr { if matches!(op, Operator::Equals) { // Extract column names from both sides let left_col = match &**left { Expr::Qualified(name, _) => Some(name.as_str().to_string()), Expr::Id(name) => Some(name.as_str().to_string()), _ => None, }; let right_col = match &**right { Expr::Qualified(name, _) => Some(name.as_str().to_string()), Expr::Id(name) => Some(name.as_str().to_string()), _ => None, }; if let (Some(l), Some(r)) = (left_col, right_col) { return Some((l, r)); } } } None } /// Merge a delta set of changes into the view's current state pub fn merge_delta( &mut self, delta_set: DeltaSet, pager: std::rc::Rc, ) -> crate::Result> { // Early return if all deltas are empty if delta_set.is_empty() { return Ok(IOResult::Done(())); } // Use the circuit to process the deltas and write to btree let input_data = delta_set.into_map(); // The circuit now handles all btree I/O internally with the provided pager let _delta = return_if_io!(self.circuit.commit(input_data, pager)); Ok(IOResult::Done(())) } } #[cfg(test)] mod tests { use super::*; use crate::schema::{BTreeTable, Column as SchemaColumn, Schema, Type}; use std::sync::Arc; use turso_parser::ast; use turso_parser::parser::Parser; // Helper function to create a test schema with multiple tables fn create_test_schema() -> Schema { let mut schema = Schema::new(false); // Create customers table let customers_table = BTreeTable { name: "customers".to_string(), root_page: 2, primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)], columns: vec![ SchemaColumn { name: Some("id".to_string()), ty: Type::Integer, ty_str: "INTEGER".to_string(), primary_key: true, is_rowid_alias: true, notnull: true, default: None, unique: false, collation: None, hidden: false, }, SchemaColumn { name: Some("name".to_string()), ty: Type::Text, ty_str: "TEXT".to_string(), primary_key: false, is_rowid_alias: false, notnull: false, default: None, unique: false, collation: None, hidden: false, }, ], has_rowid: true, is_strict: false, unique_sets: vec![], }; // Create orders table let orders_table = BTreeTable { name: "orders".to_string(), root_page: 3, primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)], columns: vec![ SchemaColumn { name: Some("id".to_string()), ty: Type::Integer, ty_str: "INTEGER".to_string(), primary_key: true, is_rowid_alias: true, notnull: true, default: None, unique: false, collation: None, hidden: false, }, SchemaColumn { name: Some("customer_id".to_string()), ty: Type::Integer, ty_str: "INTEGER".to_string(), primary_key: false, is_rowid_alias: false, notnull: false, default: None, unique: false, collation: None, hidden: false, }, SchemaColumn { name: Some("total".to_string()), ty: Type::Integer, ty_str: "INTEGER".to_string(), primary_key: false, is_rowid_alias: false, notnull: false, default: None, unique: false, collation: None, hidden: false, }, ], has_rowid: true, is_strict: false, unique_sets: vec![], }; // Create products table let products_table = BTreeTable { name: "products".to_string(), root_page: 4, primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)], columns: vec![ SchemaColumn { name: Some("id".to_string()), ty: Type::Integer, ty_str: "INTEGER".to_string(), primary_key: true, is_rowid_alias: true, notnull: true, default: None, unique: false, collation: None, hidden: false, }, SchemaColumn { name: Some("name".to_string()), ty: Type::Text, ty_str: "TEXT".to_string(), primary_key: false, is_rowid_alias: false, notnull: false, default: None, unique: false, collation: None, hidden: false, }, ], has_rowid: true, is_strict: false, unique_sets: vec![], }; schema.add_btree_table(Arc::new(customers_table)); schema.add_btree_table(Arc::new(orders_table)); schema.add_btree_table(Arc::new(products_table)); schema } // Helper to parse SQL and extract the SELECT statement fn parse_select(sql: &str) -> ast::Select { let mut parser = Parser::new(sql.as_bytes()); let cmd = parser.next().unwrap().unwrap(); match cmd { ast::Cmd::Stmt(ast::Stmt::Select(select)) => select, _ => panic!("Expected SELECT statement"), } } #[test] fn test_extract_single_table() { let schema = create_test_schema(); let select = parse_select("SELECT * FROM customers"); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); assert_eq!(tables.len(), 1); assert_eq!(tables[0].name, "customers"); } #[test] fn test_extract_tables_from_inner_join() { let schema = create_test_schema(); let select = parse_select( "SELECT * FROM customers INNER JOIN orders ON customers.id = orders.customer_id", ); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); assert_eq!(tables.len(), 2); assert_eq!(tables[0].name, "customers"); assert_eq!(tables[1].name, "orders"); } #[test] fn test_extract_tables_from_multiple_joins() { let schema = create_test_schema(); let select = parse_select( "SELECT * FROM customers INNER JOIN orders ON customers.id = orders.customer_id INNER JOIN products ON orders.id = products.id", ); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); assert_eq!(tables.len(), 3); assert_eq!(tables[0].name, "customers"); assert_eq!(tables[1].name, "orders"); assert_eq!(tables[2].name, "products"); } #[test] fn test_extract_tables_from_left_join() { let schema = create_test_schema(); let select = parse_select( "SELECT * FROM customers LEFT JOIN orders ON customers.id = orders.customer_id", ); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); assert_eq!(tables.len(), 2); assert_eq!(tables[0].name, "customers"); assert_eq!(tables[1].name, "orders"); } #[test] fn test_extract_tables_from_cross_join() { let schema = create_test_schema(); let select = parse_select("SELECT * FROM customers CROSS JOIN orders"); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); assert_eq!(tables.len(), 2); assert_eq!(tables[0].name, "customers"); assert_eq!(tables[1].name, "orders"); } #[test] fn test_extract_tables_with_aliases() { let schema = create_test_schema(); let select = parse_select("SELECT * FROM customers c INNER JOIN orders o ON c.id = o.customer_id"); let tables = IncrementalView::extract_all_tables(&select, &schema).unwrap(); // Should still extract the actual table names, not aliases assert_eq!(tables.len(), 2); assert_eq!(tables[0].name, "customers"); assert_eq!(tables[1].name, "orders"); } #[test] fn test_extract_tables_nonexistent_table_error() { let schema = create_test_schema(); let select = parse_select("SELECT * FROM nonexistent"); let result = IncrementalView::extract_all_tables(&select, &schema); assert!(result.is_err()); assert!(result .unwrap_err() .to_string() .contains("Table 'nonexistent' not found")); } #[test] fn test_extract_tables_nonexistent_join_table_error() { let schema = create_test_schema(); let select = parse_select( "SELECT * FROM customers INNER JOIN nonexistent ON customers.id = nonexistent.id", ); let result = IncrementalView::extract_all_tables(&select, &schema); assert!(result.is_err()); assert!(result .unwrap_err() .to_string() .contains("Table 'nonexistent' not found")); } }