Files
turso/core/incremental/view.rs
Glauber Costa 9f54f60d45 make sure that complex select statements are captured by MV populate
The population code extracts table information from the select statement
so it can populate the materialized view. But the code, as written
today, is naive. It doesn't capture table information correctly if there
is more than one select statement (such in the case of a union query).
2025-09-21 21:00:27 -03:00

2694 lines
99 KiB
Rust

use super::compiler::{DbspCircuit, DbspCompiler, DeltaSet};
use super::dbsp::Delta;
use super::operator::ComputationTracker;
use crate::schema::{BTreeTable, Schema};
use crate::storage::btree::BTreeCursor;
use crate::translate::logical::LogicalPlanBuilder;
use crate::types::{IOResult, Value};
use crate::util::{extract_view_columns, ViewColumnSchema};
use crate::{return_if_io, LimboError, Pager, Result, Statement};
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::rc::Rc;
use std::sync::{Arc, Mutex};
use turso_parser::ast;
use turso_parser::{
ast::{Cmd, Stmt},
parser::Parser,
};
/// State machine for populating a view from its source table
pub enum PopulateState {
/// Initial state - need to prepare the query
Start,
/// All tables that need to be populated
ProcessingAllTables {
queries: Vec<String>,
current_idx: usize,
},
/// Actively processing rows from the query
ProcessingOneTable {
queries: Vec<String>,
current_idx: usize,
stmt: Box<Statement>,
rows_processed: usize,
/// If we're in the middle of processing a row (merge_delta returned I/O)
pending_row: Option<(i64, Vec<Value>)>, // (rowid, values)
},
/// Population complete
Done,
}
/// State machine for merge_delta to handle I/O operations
impl fmt::Debug for PopulateState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
PopulateState::Start => write!(f, "Start"),
PopulateState::ProcessingAllTables {
current_idx,
queries,
} => f
.debug_struct("ProcessingAllTables")
.field("current_idx", current_idx)
.field("num_queries", &queries.len())
.finish(),
PopulateState::ProcessingOneTable {
current_idx,
rows_processed,
pending_row,
queries,
..
} => f
.debug_struct("ProcessingOneTable")
.field("current_idx", current_idx)
.field("rows_processed", rows_processed)
.field("has_pending", &pending_row.is_some())
.field("total_queries", &queries.len())
.finish(),
PopulateState::Done => write!(f, "Done"),
}
}
}
/// Per-connection transaction state for incremental views
#[derive(Debug, Clone, Default)]
pub struct ViewTransactionState {
// Per-table deltas for uncommitted changes
// Maps table_name -> Delta for that table
// Using RefCell for interior mutability
table_deltas: RefCell<HashMap<String, Delta>>,
}
impl ViewTransactionState {
/// Create a new transaction state
pub fn new() -> Self {
Self {
table_deltas: RefCell::new(HashMap::new()),
}
}
/// Insert a row into the delta for a specific table
pub fn insert(&self, table_name: &str, key: i64, values: Vec<Value>) {
let mut deltas = self.table_deltas.borrow_mut();
let delta = deltas.entry(table_name.to_string()).or_default();
delta.insert(key, values);
}
/// Delete a row from the delta for a specific table
pub fn delete(&self, table_name: &str, key: i64, values: Vec<Value>) {
let mut deltas = self.table_deltas.borrow_mut();
let delta = deltas.entry(table_name.to_string()).or_default();
delta.delete(key, values);
}
/// Clear all changes in the delta
pub fn clear(&self) {
self.table_deltas.borrow_mut().clear();
}
/// Get deltas organized by table
pub fn get_table_deltas(&self) -> HashMap<String, Delta> {
self.table_deltas.borrow().clone()
}
/// Check if the delta is empty
pub fn is_empty(&self) -> bool {
self.table_deltas.borrow().values().all(|d| d.is_empty())
}
/// Returns how many elements exist in the delta.
pub fn len(&self) -> usize {
self.table_deltas.borrow().values().map(|d| d.len()).sum()
}
}
/// Container for all view transaction states within a connection
/// Provides interior mutability for the map of view states
#[derive(Debug, Clone, Default)]
pub struct AllViewsTxState {
states: Rc<RefCell<HashMap<String, Arc<ViewTransactionState>>>>,
}
impl AllViewsTxState {
/// Create a new container for view transaction states
pub fn new() -> Self {
Self {
states: Rc::new(RefCell::new(HashMap::new())),
}
}
/// Get or create a transaction state for a view
pub fn get_or_create(&self, view_name: &str) -> Arc<ViewTransactionState> {
let mut states = self.states.borrow_mut();
states
.entry(view_name.to_string())
.or_insert_with(|| Arc::new(ViewTransactionState::new()))
.clone()
}
/// Get a transaction state for a view if it exists
pub fn get(&self, view_name: &str) -> Option<Arc<ViewTransactionState>> {
self.states.borrow().get(view_name).cloned()
}
/// Clear all transaction states
pub fn clear(&self) {
self.states.borrow_mut().clear();
}
/// Check if there are no transaction states
pub fn is_empty(&self) -> bool {
self.states.borrow().is_empty()
}
/// Get all view names that have transaction states
pub fn get_view_names(&self) -> Vec<String> {
self.states.borrow().keys().cloned().collect()
}
}
/// Incremental view that maintains its state through a DBSP circuit
///
/// This version keeps everything in-memory. This is acceptable for small views, since DBSP
/// doesn't have to track the history of changes. Still for very large views (think of the result
/// of create view v as select * from tbl where x > 1; and that having 1B values.
///
/// We should have a version of this that materializes the results. Materializing will also be good
/// for large aggregations, because then we don't have to re-compute when opening the database
/// again.
///
/// Uses DBSP circuits for incremental computation.
#[derive(Debug)]
pub struct IncrementalView {
name: String,
// The SELECT statement that defines how to transform input data
pub select_stmt: ast::Select,
// DBSP circuit that encapsulates the computation
circuit: DbspCircuit,
// All tables referenced by this view (from FROM clause and JOINs)
referenced_tables: Vec<Arc<BTreeTable>>,
// Mapping from table aliases to actual table names (e.g., "c" -> "customers")
table_aliases: HashMap<String, String>,
// Mapping from table name to fully qualified name (e.g., "customers" -> "main.customers")
// This preserves database qualification from the original query
qualified_table_names: HashMap<String, String>,
// WHERE conditions for each table (accumulated from all occurrences)
// Multiple conditions from UNION branches or duplicate references are stored as a vector
table_conditions: HashMap<String, Vec<Option<ast::Expr>>>,
// The view's column schema with table relationships
pub column_schema: ViewColumnSchema,
// State machine for population
populate_state: PopulateState,
// Computation tracker for statistics
// We will use this one day to export rows_read, but for now, will just test that we're doing the expected amount of compute
#[cfg_attr(not(test), allow(dead_code))]
pub tracker: Arc<Mutex<ComputationTracker>>,
// Root page of the btree storing the materialized state (0 for unmaterialized)
root_page: usize,
}
impl IncrementalView {
/// Try to compile the SELECT statement into a DBSP circuit
fn try_compile_circuit(
select: &ast::Select,
schema: &Schema,
main_data_root: usize,
internal_state_root: usize,
internal_state_index_root: usize,
) -> Result<DbspCircuit> {
// Build the logical plan from the SELECT statement
let mut builder = LogicalPlanBuilder::new(schema);
// Convert Select to a Stmt for the builder
let stmt = ast::Stmt::Select(select.clone());
let logical_plan = builder.build_statement(&stmt)?;
// Compile the logical plan to a DBSP circuit with the storage roots
let compiler = DbspCompiler::new(
main_data_root,
internal_state_root,
internal_state_index_root,
);
let circuit = compiler.compile(&logical_plan)?;
Ok(circuit)
}
/// Get an iterator over column names, using enumerated naming for unnamed columns
pub fn column_names(&self) -> impl Iterator<Item = String> + '_ {
self.column_schema
.columns
.iter()
.enumerate()
.map(|(i, vc)| {
vc.column
.name
.clone()
.unwrap_or_else(|| format!("column{}", i + 1))
})
}
/// Check if this view has the same SQL definition as the provided SQL string
pub fn has_same_sql(&self, sql: &str) -> bool {
// Parse the SQL to extract just the SELECT statement
if let Ok(Some(Cmd::Stmt(Stmt::CreateMaterializedView { select, .. }))) =
Parser::new(sql.as_bytes()).next_cmd()
{
// Compare the SELECT statements as SQL strings
return self.select_stmt == select;
}
false
}
/// Validate a SELECT statement and extract the columns it would produce
/// This is used during CREATE MATERIALIZED VIEW to validate the view before storing it
pub fn validate_and_extract_columns(
select: &ast::Select,
schema: &Schema,
) -> Result<ViewColumnSchema> {
// Use the shared function to extract columns with full table context
extract_view_columns(select, schema)
}
pub fn from_sql(
sql: &str,
schema: &Schema,
main_data_root: usize,
internal_state_root: usize,
internal_state_index_root: usize,
) -> Result<Self> {
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next_cmd()?;
let cmd = cmd.expect("View is an empty statement");
match cmd {
Cmd::Stmt(Stmt::CreateMaterializedView {
if_not_exists: _,
view_name,
columns: _,
select,
}) => IncrementalView::from_stmt(
view_name,
select,
schema,
main_data_root,
internal_state_root,
internal_state_index_root,
),
_ => Err(LimboError::ParseError(format!(
"View is not a CREATE MATERIALIZED VIEW statement: {sql}"
))),
}
}
pub fn from_stmt(
view_name: ast::QualifiedName,
select: ast::Select,
schema: &Schema,
main_data_root: usize,
internal_state_root: usize,
internal_state_index_root: usize,
) -> Result<Self> {
let name = view_name.name.as_str().to_string();
// Extract output columns using the shared function
let column_schema = extract_view_columns(&select, schema)?;
let mut referenced_tables = Vec::new();
let mut table_aliases = HashMap::new();
let mut qualified_table_names = HashMap::new();
let mut table_conditions = HashMap::new();
Self::extract_all_tables(
&select,
schema,
&mut referenced_tables,
&mut table_aliases,
&mut qualified_table_names,
&mut table_conditions,
)?;
Self::new(
name,
select.clone(),
referenced_tables,
table_aliases,
qualified_table_names,
table_conditions,
column_schema,
schema,
main_data_root,
internal_state_root,
internal_state_index_root,
)
}
#[allow(clippy::too_many_arguments)]
pub fn new(
name: String,
select_stmt: ast::Select,
referenced_tables: Vec<Arc<BTreeTable>>,
table_aliases: HashMap<String, String>,
qualified_table_names: HashMap<String, String>,
table_conditions: HashMap<String, Vec<Option<ast::Expr>>>,
column_schema: ViewColumnSchema,
schema: &Schema,
main_data_root: usize,
internal_state_root: usize,
internal_state_index_root: usize,
) -> Result<Self> {
// Create the tracker that will be shared by all operators
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
// Compile the SELECT statement into a DBSP circuit
let circuit = Self::try_compile_circuit(
&select_stmt,
schema,
main_data_root,
internal_state_root,
internal_state_index_root,
)?;
Ok(Self {
name,
select_stmt,
circuit,
referenced_tables,
table_aliases,
qualified_table_names,
table_conditions,
column_schema,
populate_state: PopulateState::Start,
tracker,
root_page: main_data_root,
})
}
pub fn name(&self) -> &str {
&self.name
}
/// Execute the circuit with uncommitted changes to get processed delta
pub fn execute_with_uncommitted(
&mut self,
uncommitted: DeltaSet,
pager: Arc<Pager>,
execute_state: &mut crate::incremental::compiler::ExecuteState,
) -> crate::Result<crate::types::IOResult<Delta>> {
// Initialize execute_state with the input data
*execute_state = crate::incremental::compiler::ExecuteState::Init {
input_data: uncommitted,
};
self.circuit.execute(pager, execute_state)
}
/// Get the root page for this materialized view's btree
pub fn get_root_page(&self) -> usize {
self.root_page
}
/// Get all table names referenced by this view
pub fn get_referenced_table_names(&self) -> Vec<String> {
self.referenced_tables
.iter()
.map(|t| t.name.clone())
.collect()
}
/// Get all tables referenced by this view
pub fn get_referenced_tables(&self) -> Vec<Arc<BTreeTable>> {
self.referenced_tables.clone()
}
/// Process a single table reference from a FROM or JOIN clause
fn process_table_reference(
name: &ast::QualifiedName,
alias: &Option<ast::As>,
schema: &Schema,
table_map: &mut HashMap<String, Arc<BTreeTable>>,
aliases: &mut HashMap<String, String>,
qualified_names: &mut HashMap<String, String>,
cte_names: &HashSet<String>,
) -> Result<()> {
let table_name = name.name.as_str();
// Build the fully qualified name
let qualified_name = if let Some(ref db) = name.db_name {
format!("{db}.{table_name}")
} else {
table_name.to_string()
};
// Skip CTEs - they're not real tables
if !cte_names.contains(table_name) {
if let Some(table) = schema.get_btree_table(table_name) {
table_map.insert(table_name.to_string(), table.clone());
qualified_names.insert(table_name.to_string(), qualified_name);
// Store the alias mapping if there is an alias
if let Some(alias_enum) = alias {
let alias_name = match alias_enum {
ast::As::As(name) | ast::As::Elided(name) => match name {
ast::Name::Ident(s) | ast::Name::Quoted(s) => s,
},
};
aliases.insert(alias_name.to_string(), table_name.to_string());
}
} else {
return Err(LimboError::ParseError(format!(
"Table '{table_name}' not found in schema"
)));
}
}
Ok(())
}
fn extract_one_statement(
select: &ast::OneSelect,
schema: &Schema,
table_map: &mut HashMap<String, Arc<BTreeTable>>,
aliases: &mut HashMap<String, String>,
qualified_names: &mut HashMap<String, String>,
table_conditions: &mut HashMap<String, Vec<Option<ast::Expr>>>,
cte_names: &HashSet<String>,
) -> Result<()> {
if let ast::OneSelect::Select {
from: Some(ref from),
..
} = select
{
// Get the main table from FROM clause
if let ast::SelectTable::Table(name, alias, _) = from.select.as_ref() {
Self::process_table_reference(
name,
alias,
schema,
table_map,
aliases,
qualified_names,
cte_names,
)?;
}
// Get all tables from JOIN clauses
for join in &from.joins {
if let ast::SelectTable::Table(name, alias, _) = join.table.as_ref() {
Self::process_table_reference(
name,
alias,
schema,
table_map,
aliases,
qualified_names,
cte_names,
)?;
}
}
}
// Extract WHERE conditions for this SELECT
let where_expr = if let ast::OneSelect::Select {
where_clause: Some(ref where_expr),
..
} = select
{
Some(where_expr.as_ref().clone())
} else {
None
};
// Ensure all tables have an entry in table_conditions (even if empty)
for table_name in table_map.keys() {
table_conditions.entry(table_name.clone()).or_default();
}
// Extract and store table-specific conditions from the WHERE clause
if let Some(ref where_expr) = where_expr {
for table_name in table_map.keys() {
let all_tables: Vec<String> = table_map.keys().cloned().collect();
let table_specific_condition = Self::extract_conditions_for_table(
where_expr,
table_name,
aliases,
&all_tables,
schema,
);
// Only add if there's actually a condition for this table
if let Some(condition) = table_specific_condition {
let conditions = table_conditions.get_mut(table_name).unwrap();
conditions.push(Some(condition));
}
}
} else {
// No WHERE clause - push None for all tables in this SELECT. It is a way
// of signaling that we need all rows in the table. It is important we signal this
// explicitly, because the same table may appear in many conditions - some of which
// have filters that would otherwise be applied.
for table_name in table_map.keys() {
let conditions = table_conditions.get_mut(table_name).unwrap();
conditions.push(None);
}
}
Ok(())
}
/// Extract all tables and their aliases from the SELECT statement, handling CTEs
/// Deduplicates tables and accumulates WHERE conditions
fn extract_all_tables(
select: &ast::Select,
schema: &Schema,
tables: &mut Vec<Arc<BTreeTable>>,
aliases: &mut HashMap<String, String>,
qualified_names: &mut HashMap<String, String>,
table_conditions: &mut HashMap<String, Vec<Option<ast::Expr>>>,
) -> Result<()> {
let mut table_map = HashMap::new();
Self::extract_all_tables_inner(
select,
schema,
&mut table_map,
aliases,
qualified_names,
table_conditions,
&HashSet::new(),
)?;
// Convert deduplicated table map to vector
for (_name, table) in table_map {
tables.push(table);
}
Ok(())
}
fn extract_all_tables_inner(
select: &ast::Select,
schema: &Schema,
table_map: &mut HashMap<String, Arc<BTreeTable>>,
aliases: &mut HashMap<String, String>,
qualified_names: &mut HashMap<String, String>,
table_conditions: &mut HashMap<String, Vec<Option<ast::Expr>>>,
parent_cte_names: &HashSet<String>,
) -> Result<()> {
let mut cte_names = parent_cte_names.clone();
// First, collect CTE names and process any CTEs (WITH clauses)
if let Some(ref with) = select.with {
// First pass: collect all CTE names (needed for recursive CTEs)
for cte in &with.ctes {
cte_names.insert(cte.tbl_name.as_str().to_string());
}
// Second pass: extract tables from each CTE's SELECT statement
for cte in &with.ctes {
// Recursively extract tables from each CTE's SELECT statement
Self::extract_all_tables_inner(
&cte.select,
schema,
table_map,
aliases,
qualified_names,
table_conditions,
&cte_names,
)?;
}
}
// Then process the main SELECT body
Self::extract_one_statement(
&select.body.select,
schema,
table_map,
aliases,
qualified_names,
table_conditions,
&cte_names,
)?;
// Process any compound selects (UNION, etc.)
for c in &select.body.compounds {
let ast::CompoundSelect { select, .. } = c;
Self::extract_one_statement(
select,
schema,
table_map,
aliases,
qualified_names,
table_conditions,
&cte_names,
)?;
}
Ok(())
}
/// Generate SQL queries for populating the view from each source table
/// Returns a vector of SQL statements, one for each referenced table
/// Each query includes the WHERE conditions accumulated from all occurrences
fn sql_for_populate(&self) -> crate::Result<Vec<String>> {
Self::generate_populate_queries(
&self.select_stmt,
&self.referenced_tables,
&self.table_aliases,
&self.qualified_table_names,
&self.table_conditions,
)
}
pub fn generate_populate_queries(
select_stmt: &ast::Select,
referenced_tables: &[Arc<BTreeTable>],
table_aliases: &HashMap<String, String>,
qualified_table_names: &HashMap<String, String>,
table_conditions: &HashMap<String, Vec<Option<ast::Expr>>>,
) -> crate::Result<Vec<String>> {
if referenced_tables.is_empty() {
return Err(LimboError::ParseError(
"No tables to populate from".to_string(),
));
}
let mut queries = Vec::new();
for table in referenced_tables {
// Check if the table has a rowid alias (INTEGER PRIMARY KEY column)
let has_rowid_alias = table.columns.iter().any(|col| col.is_rowid_alias);
// Select all columns. The circuit will handle filtering and projection
// If there's a rowid alias, we don't need to select rowid separately
let select_clause = if has_rowid_alias {
"*".to_string()
} else {
"*, rowid".to_string()
};
// Get accumulated WHERE conditions for this table
let where_clause = if let Some(conditions) = table_conditions.get(&table.name) {
// Combine multiple conditions with OR if there are multiple occurrences
Self::combine_conditions(
select_stmt,
conditions,
&table.name,
referenced_tables,
table_aliases,
)?
} else {
String::new()
};
// Use the qualified table name if available, otherwise just the table name
let table_name = qualified_table_names
.get(&table.name)
.cloned()
.unwrap_or_else(|| table.name.clone());
// Construct the query for this table
let query = if where_clause.is_empty() {
format!("SELECT {select_clause} FROM {table_name}")
} else {
format!("SELECT {select_clause} FROM {table_name} WHERE {where_clause}")
};
tracing::debug!("populating materialized view with `{query}`");
queries.push(query);
}
Ok(queries)
}
fn combine_conditions(
_select_stmt: &ast::Select,
conditions: &[Option<ast::Expr>],
table_name: &str,
_referenced_tables: &[Arc<BTreeTable>],
table_aliases: &HashMap<String, String>,
) -> crate::Result<String> {
// Check if any conditions are None (SELECTs without WHERE)
let has_none = conditions.iter().any(|c| c.is_none());
let non_empty: Vec<_> = conditions.iter().filter_map(|c| c.as_ref()).collect();
// If we have both Some and None conditions, that means in some of the expressions where
// this table appear we want all rows. So we need to fetch all rows.
if has_none && !non_empty.is_empty() {
return Ok(String::new());
}
if non_empty.is_empty() {
return Ok(String::new());
}
if non_empty.len() == 1 {
// Unqualify the expression before converting to string
let unqualified = Self::unqualify_expression(non_empty[0], table_name, table_aliases);
return Ok(unqualified.to_string());
}
// Multiple conditions - combine with OR
// This happens in UNION ALL when the same table appears multiple times
let mut combined_parts = Vec::new();
for condition in non_empty {
let unqualified = Self::unqualify_expression(condition, table_name, table_aliases);
// Wrap each condition in parentheses to preserve precedence
combined_parts.push(format!("({unqualified})"));
}
// Join all conditions with OR
Ok(combined_parts.join(" OR "))
}
/// Resolve a table alias to the actual table name
/// Check if an expression is a simple comparison that can be safely extracted
/// This excludes subqueries, CASE expressions, function calls, etc.
fn is_simple_comparison(expr: &ast::Expr) -> bool {
match expr {
// Simple column references and literals are OK
ast::Expr::Column { .. } | ast::Expr::Literal(_) => true,
// Simple binary operations between simple expressions are OK
ast::Expr::Binary(left, op, right) => {
match op {
// Logical operators
ast::Operator::And | ast::Operator::Or => {
Self::is_simple_comparison(left) && Self::is_simple_comparison(right)
}
// Comparison operators
ast::Operator::Equals
| ast::Operator::NotEquals
| ast::Operator::Less
| ast::Operator::LessEquals
| ast::Operator::Greater
| ast::Operator::GreaterEquals
| ast::Operator::Is
| ast::Operator::IsNot => {
Self::is_simple_comparison(left) && Self::is_simple_comparison(right)
}
// String concatenation and other operations are NOT simple
ast::Operator::Concat => false,
// Arithmetic might be OK if operands are simple
ast::Operator::Add
| ast::Operator::Subtract
| ast::Operator::Multiply
| ast::Operator::Divide
| ast::Operator::Modulus => {
Self::is_simple_comparison(left) && Self::is_simple_comparison(right)
}
_ => false,
}
}
// Unary operations might be OK
ast::Expr::Unary(
ast::UnaryOperator::Not
| ast::UnaryOperator::Negative
| ast::UnaryOperator::Positive,
inner,
) => Self::is_simple_comparison(inner),
ast::Expr::Unary(_, _) => false,
// Complex expressions are NOT simple
ast::Expr::Case { .. } => false,
ast::Expr::Cast { .. } => false,
ast::Expr::Collate { .. } => false,
ast::Expr::Exists(_) => false,
ast::Expr::FunctionCall { .. } => false,
ast::Expr::InList { .. } => false,
ast::Expr::InSelect { .. } => false,
ast::Expr::Like { .. } => false,
ast::Expr::NotNull(_) => true, // IS NOT NULL is simple enough
ast::Expr::Parenthesized(exprs) => {
// Parenthesized expression can contain multiple expressions
// Only consider it simple if it has exactly one simple expression
exprs.len() == 1 && Self::is_simple_comparison(&exprs[0])
}
ast::Expr::Subquery(_) => false,
// BETWEEN might be OK if all operands are simple
ast::Expr::Between { .. } => {
// BETWEEN has a different structure, for safety just exclude it
false
}
// Qualified references are simple
ast::Expr::DoublyQualified(..) => true,
ast::Expr::Qualified(_, _) => true,
// These are simple
ast::Expr::Id(_) => true,
ast::Expr::Name(_) => true,
// Anything else is not simple
_ => false,
}
}
/// Extract conditions from a WHERE clause that apply to a specific table
fn extract_conditions_for_table(
expr: &ast::Expr,
table_name: &str,
aliases: &HashMap<String, String>,
all_tables: &[String],
schema: &Schema,
) -> Option<ast::Expr> {
match expr {
ast::Expr::Binary(left, op, right) => {
match op {
ast::Operator::And => {
// For AND, we can extract conditions independently
let left_cond = Self::extract_conditions_for_table(
left, table_name, aliases, all_tables, schema,
);
let right_cond = Self::extract_conditions_for_table(
right, table_name, aliases, all_tables, schema,
);
match (left_cond, right_cond) {
(Some(l), Some(r)) => Some(ast::Expr::Binary(
Box::new(l),
ast::Operator::And,
Box::new(r),
)),
(Some(l), None) => Some(l),
(None, Some(r)) => Some(r),
(None, None) => None,
}
}
ast::Operator::Or => {
// For OR, both sides must reference only our table
let left_tables =
Self::get_tables_in_expr(left, aliases, all_tables, schema);
let right_tables =
Self::get_tables_in_expr(right, aliases, all_tables, schema);
if left_tables.len() == 1
&& left_tables.contains(&table_name.to_string())
&& right_tables.len() == 1
&& right_tables.contains(&table_name.to_string())
&& Self::is_simple_comparison(expr)
{
Some(expr.clone())
} else {
None
}
}
_ => {
// For comparison operators, check if this condition only references our table
let referenced_tables =
Self::get_tables_in_expr(expr, aliases, all_tables, schema);
if referenced_tables.len() == 1
&& referenced_tables.contains(&table_name.to_string())
&& Self::is_simple_comparison(expr)
{
Some(expr.clone())
} else {
None
}
}
}
}
_ => {
// For other expressions, check if they only reference our table
let referenced_tables = Self::get_tables_in_expr(expr, aliases, all_tables, schema);
if referenced_tables.len() == 1
&& referenced_tables.contains(&table_name.to_string())
&& Self::is_simple_comparison(expr)
{
Some(expr.clone())
} else {
None
}
}
}
}
/// Unqualify column references in an expression
/// Removes table/alias prefixes from qualified column names
fn unqualify_expression(
expr: &ast::Expr,
table_name: &str,
aliases: &HashMap<String, String>,
) -> ast::Expr {
match expr {
ast::Expr::Binary(left, op, right) => ast::Expr::Binary(
Box::new(Self::unqualify_expression(left, table_name, aliases)),
*op,
Box::new(Self::unqualify_expression(right, table_name, aliases)),
),
ast::Expr::Qualified(table_or_alias, column) => {
// Check if this qualification refers to our table
let table_str = table_or_alias.as_str();
let actual_table = if let Some(actual) = aliases.get(table_str) {
actual.clone()
} else if table_str.contains('.') {
// Handle database.table format
table_str
.split('.')
.next_back()
.unwrap_or(table_str)
.to_string()
} else {
table_str.to_string()
};
if actual_table == table_name {
// Remove the qualification
ast::Expr::Id(column.clone())
} else {
// Keep the qualification (shouldn't happen if extraction worked correctly)
expr.clone()
}
}
ast::Expr::DoublyQualified(_database, table, column) => {
// Check if this refers to our table
if table.as_str() == table_name {
// Remove the qualification, keep just the column
ast::Expr::Id(column.clone())
} else {
// Keep the qualification (shouldn't happen if extraction worked correctly)
expr.clone()
}
}
ast::Expr::Unary(op, inner) => ast::Expr::Unary(
*op,
Box::new(Self::unqualify_expression(inner, table_name, aliases)),
),
ast::Expr::FunctionCall {
name,
args,
distinctness,
filter_over,
order_by,
} => ast::Expr::FunctionCall {
name: name.clone(),
args: args
.iter()
.map(|arg| Box::new(Self::unqualify_expression(arg, table_name, aliases)))
.collect(),
distinctness: *distinctness,
filter_over: filter_over.clone(),
order_by: order_by.clone(),
},
ast::Expr::InList { lhs, not, rhs } => ast::Expr::InList {
lhs: Box::new(Self::unqualify_expression(lhs, table_name, aliases)),
not: *not,
rhs: rhs
.iter()
.map(|item| Box::new(Self::unqualify_expression(item, table_name, aliases)))
.collect(),
},
ast::Expr::Between {
lhs,
not,
start,
end,
} => ast::Expr::Between {
lhs: Box::new(Self::unqualify_expression(lhs, table_name, aliases)),
not: *not,
start: Box::new(Self::unqualify_expression(start, table_name, aliases)),
end: Box::new(Self::unqualify_expression(end, table_name, aliases)),
},
_ => expr.clone(),
}
}
/// Get all tables referenced in an expression
fn get_tables_in_expr(
expr: &ast::Expr,
aliases: &HashMap<String, String>,
all_tables: &[String],
schema: &Schema,
) -> Vec<String> {
let mut tables = Vec::new();
Self::collect_tables_in_expr(expr, aliases, all_tables, schema, &mut tables);
tables.sort();
tables.dedup();
tables
}
/// Recursively collect table references from an expression
fn collect_tables_in_expr(
expr: &ast::Expr,
aliases: &HashMap<String, String>,
all_tables: &[String],
schema: &Schema,
tables: &mut Vec<String>,
) {
match expr {
ast::Expr::Binary(left, _, right) => {
Self::collect_tables_in_expr(left, aliases, all_tables, schema, tables);
Self::collect_tables_in_expr(right, aliases, all_tables, schema, tables);
}
ast::Expr::Qualified(table_or_alias, _) => {
// Handle database.table or just table/alias
let table_str = table_or_alias.as_str();
let table_name = if let Some(actual_table) = aliases.get(table_str) {
// It's an alias
actual_table.clone()
} else if table_str.contains('.') {
// It might be database.table format, extract just the table name
table_str
.split('.')
.next_back()
.unwrap_or(table_str)
.to_string()
} else {
// It's a direct table name
table_str.to_string()
};
tables.push(table_name);
}
ast::Expr::DoublyQualified(_database, table, _column) => {
// For database.table.column, extract the table name
tables.push(table.to_string());
}
ast::Expr::Id(column) => {
// Unqualified column - try to find which table has this column
if all_tables.len() == 1 {
tables.push(all_tables[0].clone());
} else {
// Check which table has this column
for table_name in all_tables {
if let Some(table) = schema.get_btree_table(table_name) {
if table
.columns
.iter()
.any(|col| col.name.as_deref() == Some(column.as_str()))
{
tables.push(table_name.clone());
break; // Found the table, stop looking
}
}
}
}
}
ast::Expr::FunctionCall { args, .. } => {
for arg in args {
Self::collect_tables_in_expr(arg, aliases, all_tables, schema, tables);
}
}
ast::Expr::InList { lhs, rhs, .. } => {
Self::collect_tables_in_expr(lhs, aliases, all_tables, schema, tables);
for item in rhs {
Self::collect_tables_in_expr(item, aliases, all_tables, schema, tables);
}
}
ast::Expr::InSelect { lhs, .. } => {
Self::collect_tables_in_expr(lhs, aliases, all_tables, schema, tables);
}
ast::Expr::Between {
lhs, start, end, ..
} => {
Self::collect_tables_in_expr(lhs, aliases, all_tables, schema, tables);
Self::collect_tables_in_expr(start, aliases, all_tables, schema, tables);
Self::collect_tables_in_expr(end, aliases, all_tables, schema, tables);
}
ast::Expr::Unary(_, expr) => {
Self::collect_tables_in_expr(expr, aliases, all_tables, schema, tables);
}
_ => {
// Literals, etc. don't reference tables
}
}
}
/// Populate the view by scanning the source table using a state machine
/// This can be called multiple times and will resume from where it left off
/// This method is only for materialized views and will persist data to the btree
pub fn populate_from_table(
&mut self,
conn: &std::sync::Arc<crate::Connection>,
pager: &std::sync::Arc<crate::Pager>,
_btree_cursor: &mut BTreeCursor,
) -> crate::Result<IOResult<()>> {
// Assert that this is a materialized view with a root page
assert!(
self.root_page != 0,
"populate_from_table should only be called for materialized views with root_page"
);
'outer: loop {
match std::mem::replace(&mut self.populate_state, PopulateState::Done) {
PopulateState::Start => {
// Generate the SQL query for populating the view
// It is best to use a standard query than a cursor for two reasons:
// 1) Using a sql query will allow us to be much more efficient in cases where we only want
// some rows, in particular for indexed filters
// 2) There are two types of cursors: index and table. In some situations (like for example
// if the table has an integer primary key), the key will be exclusively in the index
// btree and not in the table btree. Using cursors would force us to be aware of this
// distinction (and others), and ultimately lead to reimplementing the whole query
// machinery (next step is which index is best to use, etc)
let queries = self.sql_for_populate()?;
self.populate_state = PopulateState::ProcessingAllTables {
queries,
current_idx: 0,
};
}
PopulateState::ProcessingAllTables {
queries,
current_idx,
} => {
if current_idx >= queries.len() {
self.populate_state = PopulateState::Done;
return Ok(IOResult::Done(()));
}
let query = queries[current_idx].clone();
// Create a new connection for reading to avoid transaction conflicts
// This allows us to read from tables while the parent transaction is writing the view
// The statement holds a reference to this connection, keeping it alive
let read_conn = conn.db.connect()?;
// Prepare the statement using the read connection
let stmt = read_conn.prepare(&query)?;
self.populate_state = PopulateState::ProcessingOneTable {
queries,
current_idx,
stmt: Box::new(stmt),
rows_processed: 0,
pending_row: None,
};
}
PopulateState::ProcessingOneTable {
queries,
current_idx,
mut stmt,
mut rows_processed,
pending_row,
} => {
// If we have a pending row from a previous I/O interruption, process it first
if let Some((rowid, values)) = pending_row {
match self.process_one_row(
rowid,
values.clone(),
current_idx,
pager.clone(),
)? {
IOResult::Done(_) => {
// Row processed successfully, continue to next row
rows_processed += 1;
}
IOResult::IO(io) => {
// Still not done, restore state with pending row and return
self.populate_state = PopulateState::ProcessingOneTable {
queries,
current_idx,
stmt,
rows_processed,
pending_row: Some((rowid, values)),
};
return Ok(IOResult::IO(io));
}
}
}
// Process rows one at a time - no batching
loop {
// This step() call resumes from where the statement left off
match stmt.step()? {
crate::vdbe::StepResult::Row => {
// Get the row
let row = stmt.row().unwrap();
// Extract values from the row
let all_values: Vec<crate::types::Value> =
row.get_values().cloned().collect();
// Extract rowid and values using helper
let (rowid, values) =
match self.extract_rowid_and_values(all_values, current_idx) {
Some(result) => result,
None => {
// Invalid rowid, skip this row
rows_processed += 1;
continue;
}
};
// Process this row
match self.process_one_row(
rowid,
values.clone(),
current_idx,
pager.clone(),
)? {
IOResult::Done(_) => {
// Row processed successfully, continue to next row
rows_processed += 1;
}
IOResult::IO(io) => {
// Save state and return I/O
// We'll resume at the SAME row when called again (don't increment rows_processed)
// The circuit still has unfinished work for this row
self.populate_state = PopulateState::ProcessingOneTable {
queries,
current_idx,
stmt,
rows_processed, // Don't increment - row not done yet!
pending_row: Some((rowid, values)), // Save the row for resumption
};
return Ok(IOResult::IO(io));
}
}
}
crate::vdbe::StepResult::Done => {
// All rows processed from this table
// Move to next table
self.populate_state = PopulateState::ProcessingAllTables {
queries,
current_idx: current_idx + 1,
};
continue 'outer;
}
crate::vdbe::StepResult::Interrupt | crate::vdbe::StepResult::Busy => {
// Save state before returning error
self.populate_state = PopulateState::ProcessingOneTable {
queries,
current_idx,
stmt,
rows_processed,
pending_row: None, // No pending row when interrupted between rows
};
return Err(LimboError::Busy);
}
crate::vdbe::StepResult::IO => {
// Statement needs I/O - save state and return
self.populate_state = PopulateState::ProcessingOneTable {
queries,
current_idx,
stmt,
rows_processed,
pending_row: None, // No pending row when interrupted between rows
};
// TODO: Get the actual I/O completion from the statement
let completion = crate::io::Completion::new_dummy();
return Ok(IOResult::IO(crate::types::IOCompletions::Single(
completion,
)));
}
}
}
}
PopulateState::Done => {
return Ok(IOResult::Done(()));
}
}
}
}
/// Process a single row through the circuit
fn process_one_row(
&mut self,
rowid: i64,
values: Vec<Value>,
table_idx: usize,
pager: Arc<crate::Pager>,
) -> crate::Result<IOResult<()>> {
// Create a single-row delta
let mut single_row_delta = Delta::new();
single_row_delta.insert(rowid, values);
// Create a DeltaSet with this delta for the current table
let mut delta_set = DeltaSet::new();
let table_name = self.referenced_tables[table_idx].name.clone();
delta_set.insert(table_name, single_row_delta);
// Process through merge_delta
self.merge_delta(delta_set, pager)
}
/// Extract rowid and values from a row
fn extract_rowid_and_values(
&self,
all_values: Vec<Value>,
table_idx: usize,
) -> Option<(i64, Vec<Value>)> {
if let Some((idx, _)) = self.referenced_tables[table_idx].get_rowid_alias_column() {
// The rowid is the value at the rowid alias column index
let rowid = match all_values.get(idx) {
Some(Value::Integer(id)) => *id,
_ => return None, // Invalid rowid
};
// All values are table columns (no separate rowid was selected)
Some((rowid, all_values))
} else {
// The last value is the explicitly selected rowid
let rowid = match all_values.last() {
Some(Value::Integer(id)) => *id,
_ => return None, // Invalid rowid
};
// Get all values except the rowid
let values = all_values[..all_values.len() - 1].to_vec();
Some((rowid, values))
}
}
/// Merge a delta set of changes into the view's current state
pub fn merge_delta(
&mut self,
delta_set: DeltaSet,
pager: Arc<crate::Pager>,
) -> crate::Result<IOResult<()>> {
// Early return if all deltas are empty
if delta_set.is_empty() {
return Ok(IOResult::Done(()));
}
// Use the circuit to process the deltas and write to btree
let input_data = delta_set.into_map();
// The circuit now handles all btree I/O internally with the provided pager
let _delta = return_if_io!(self.circuit.commit(input_data, pager));
Ok(IOResult::Done(()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::{BTreeTable, Column as SchemaColumn, Schema, Type};
use std::sync::Arc;
use turso_parser::ast;
use turso_parser::parser::Parser;
// Helper function to create a test schema with multiple tables
fn create_test_schema() -> Schema {
let mut schema = Schema::new(false);
// Create customers table
let customers_table = BTreeTable {
name: "customers".to_string(),
root_page: 2,
primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)],
columns: vec![
SchemaColumn {
name: Some("id".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: true,
is_rowid_alias: true,
notnull: true,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("name".to_string()),
ty: Type::Text,
ty_str: "TEXT".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
],
has_rowid: true,
is_strict: false,
unique_sets: vec![],
};
// Create orders table
let orders_table = BTreeTable {
name: "orders".to_string(),
root_page: 3,
primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)],
columns: vec![
SchemaColumn {
name: Some("id".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: true,
is_rowid_alias: true,
notnull: true,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("customer_id".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("total".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
],
has_rowid: true,
is_strict: false,
unique_sets: vec![],
};
// Create products table
let products_table = BTreeTable {
name: "products".to_string(),
root_page: 4,
primary_key_columns: vec![("id".to_string(), ast::SortOrder::Asc)],
columns: vec![
SchemaColumn {
name: Some("id".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: true,
is_rowid_alias: true,
notnull: true,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("name".to_string()),
ty: Type::Text,
ty_str: "TEXT".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("price".to_string()),
ty: Type::Real,
ty_str: "REAL".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
],
has_rowid: true,
is_strict: false,
unique_sets: vec![],
};
// Create logs table - without a rowid alias (no INTEGER PRIMARY KEY)
let logs_table = BTreeTable {
name: "logs".to_string(),
root_page: 5,
primary_key_columns: vec![], // No primary key, so no rowid alias
columns: vec![
SchemaColumn {
name: Some("message".to_string()),
ty: Type::Text,
ty_str: "TEXT".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("level".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
SchemaColumn {
name: Some("timestamp".to_string()),
ty: Type::Integer,
ty_str: "INTEGER".to_string(),
primary_key: false,
is_rowid_alias: false,
notnull: false,
default: None,
unique: false,
collation: None,
hidden: false,
},
],
has_rowid: true, // Has implicit rowid but no alias
is_strict: false,
unique_sets: vec![],
};
schema.add_btree_table(Arc::new(customers_table));
schema.add_btree_table(Arc::new(orders_table));
schema.add_btree_table(Arc::new(products_table));
schema.add_btree_table(Arc::new(logs_table));
schema
}
// Helper to parse SQL and extract the SELECT statement
fn parse_select(sql: &str) -> ast::Select {
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next().unwrap().unwrap();
match cmd {
ast::Cmd::Stmt(ast::Stmt::Select(select)) => select,
_ => panic!("Expected SELECT statement"),
}
}
// Type alias for the complex return type of extract_all_tables
type ExtractedTableInfo = (
Vec<Arc<BTreeTable>>,
HashMap<String, String>,
HashMap<String, String>,
HashMap<String, Vec<Option<ast::Expr>>>,
);
fn extract_all_tables(select: &ast::Select, schema: &Schema) -> Result<ExtractedTableInfo> {
let mut referenced_tables = Vec::new();
let mut table_aliases = HashMap::new();
let mut qualified_table_names = HashMap::new();
let mut table_conditions = HashMap::new();
IncrementalView::extract_all_tables(
select,
schema,
&mut referenced_tables,
&mut table_aliases,
&mut qualified_table_names,
&mut table_conditions,
)?;
Ok((
referenced_tables,
table_aliases,
qualified_table_names,
table_conditions,
))
}
#[test]
fn test_extract_single_table() {
let schema = create_test_schema();
let select = parse_select("SELECT * FROM customers");
let (tables, _, _, _table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 1);
assert_eq!(tables[0].name, "customers");
}
#[test]
fn test_tables_from_union() {
let schema = create_test_schema();
let select = parse_select("SELECT name FROM customers union SELECT name from products");
let (tables, _, _, table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 2);
assert!(table_conditions.contains_key("customers"));
assert!(table_conditions.contains_key("products"));
}
#[test]
fn test_extract_tables_from_inner_join() {
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers INNER JOIN orders ON customers.id = orders.customer_id",
);
let (tables, _, _, table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 2);
assert!(table_conditions.contains_key("customers"));
assert!(table_conditions.contains_key("orders"));
}
#[test]
fn test_extract_tables_from_multiple_joins() {
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers
INNER JOIN orders ON customers.id = orders.customer_id
INNER JOIN products ON orders.id = products.id",
);
let (tables, _, _, table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 3);
assert!(table_conditions.contains_key("customers"));
assert!(table_conditions.contains_key("orders"));
assert!(table_conditions.contains_key("products"));
}
#[test]
fn test_extract_tables_from_left_join() {
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers LEFT JOIN orders ON customers.id = orders.customer_id",
);
let (tables, _, _, table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 2);
assert!(table_conditions.contains_key("customers"));
assert!(table_conditions.contains_key("orders"));
}
#[test]
fn test_extract_tables_from_cross_join() {
let schema = create_test_schema();
let select = parse_select("SELECT * FROM customers CROSS JOIN orders");
let (tables, _, _, table_conditions) = extract_all_tables(&select, &schema).unwrap();
assert_eq!(tables.len(), 2);
assert!(table_conditions.contains_key("customers"));
assert!(table_conditions.contains_key("orders"));
}
#[test]
fn test_extract_tables_with_aliases() {
let schema = create_test_schema();
let select =
parse_select("SELECT * FROM customers c INNER JOIN orders o ON c.id = o.customer_id");
let (tables, aliases, _, _table_conditions) = extract_all_tables(&select, &schema).unwrap();
// Should still extract the actual table names, not aliases
assert_eq!(tables.len(), 2);
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
// Check that aliases are correctly mapped
assert_eq!(aliases.get("c"), Some(&"customers".to_string()));
assert_eq!(aliases.get("o"), Some(&"orders".to_string()));
}
#[test]
fn test_extract_tables_nonexistent_table_error() {
let schema = create_test_schema();
let select = parse_select("SELECT * FROM nonexistent");
let result = extract_all_tables(&select, &schema).map(|(tables, _, _, _)| tables);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("Table 'nonexistent' not found"));
}
#[test]
fn test_extract_tables_nonexistent_join_table_error() {
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers INNER JOIN nonexistent ON customers.id = nonexistent.id",
);
let result = extract_all_tables(&select, &schema).map(|(tables, _, _, _)| tables);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("Table 'nonexistent' not found"));
}
#[test]
fn test_sql_for_populate_simple_query_no_where() {
// Test simple query with no WHERE clause
let schema = create_test_schema();
let select = parse_select("SELECT * FROM customers");
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 1);
// customers has id as rowid alias, so no need for explicit rowid
assert_eq!(queries[0], "SELECT * FROM customers");
}
#[test]
fn test_sql_for_populate_simple_query_with_where() {
// Test simple query with WHERE clause
let schema = create_test_schema();
let select = parse_select("SELECT * FROM customers WHERE id > 10");
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 1);
// For single-table queries, we should get the full WHERE clause
assert_eq!(queries[0], "SELECT * FROM customers WHERE id > 10");
}
#[test]
fn test_sql_for_populate_join_with_where_on_both_tables() {
// Test JOIN query with WHERE conditions on both tables
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers c \
JOIN orders o ON c.id = o.customer_id \
WHERE c.id > 10 AND o.total > 100",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// With per-table WHERE extraction:
// - customers table gets: c.id > 10
// - orders table gets: o.total > 100
assert!(queries
.iter()
.any(|q| q == "SELECT * FROM customers WHERE id > 10"));
assert!(queries
.iter()
.any(|q| q == "SELECT * FROM orders WHERE total > 100"));
}
#[test]
fn test_sql_for_populate_complex_join_with_mixed_conditions() {
// Test complex JOIN with WHERE conditions mixing both tables
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers c \
JOIN orders o ON c.id = o.customer_id \
WHERE c.id > 10 AND o.total > 100 AND c.name = 'John' \
AND o.customer_id = 5 AND (c.id = 15 OR o.total = 200)",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// With per-table WHERE extraction:
// - customers gets: c.id > 10 AND c.name = 'John'
// - orders gets: o.total > 100 AND o.customer_id = 5
// Note: The OR condition (c.id = 15 OR o.total = 200) involves both tables,
// so it cannot be extracted to either table individually
// Check both queries exist (order doesn't matter)
assert!(queries
.contains(&"SELECT * FROM customers WHERE id > 10 AND name = 'John'".to_string()));
assert!(queries
.contains(&"SELECT * FROM orders WHERE total > 100 AND customer_id = 5".to_string()));
}
#[test]
fn test_sql_for_populate_table_without_rowid_alias() {
let schema = create_test_schema();
let select = parse_select("SELECT * FROM logs WHERE level > 2");
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 1);
// logs table has no rowid alias, so we need to explicitly select rowid
assert_eq!(queries[0], "SELECT *, rowid FROM logs WHERE level > 2");
}
#[test]
fn test_sql_for_populate_join_with_and_without_rowid_alias() {
// Test JOIN between a table with rowid alias and one without
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers c \
JOIN logs l ON c.id = l.level \
WHERE c.id > 10 AND l.level > 2",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// customers has rowid alias (id), logs doesn't
assert!(queries.contains(&"SELECT * FROM customers WHERE id > 10".to_string()));
assert!(queries.contains(&"SELECT *, rowid FROM logs WHERE level > 2".to_string()));
}
#[test]
fn test_sql_for_populate_with_database_qualified_names() {
// Test that database.table.column references are handled correctly
// The table name in FROM should keep the database prefix,
// but column names in WHERE should be unqualified
let schema = create_test_schema();
// Test with single table using database qualification
let select = parse_select("SELECT * FROM main.customers WHERE main.customers.id > 10");
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 1);
// The FROM clause should preserve the database qualification,
// but the WHERE clause should have unqualified column names
assert_eq!(queries[0], "SELECT * FROM main.customers WHERE id > 10");
}
#[test]
fn test_sql_for_populate_join_with_database_qualified_names() {
// Test JOIN with database-qualified table and column references
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM main.customers c \
JOIN main.orders o ON c.id = o.customer_id \
WHERE main.customers.id > 10 AND main.orders.total > 100",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// The FROM clauses should preserve database qualification,
// but WHERE clauses should have unqualified column names
assert!(queries.contains(&"SELECT * FROM main.customers WHERE id > 10".to_string()));
assert!(queries.contains(&"SELECT * FROM main.orders WHERE total > 100".to_string()));
}
#[test]
fn test_where_extraction_for_three_tables_with_aliases() {
// Test that WHERE clause extraction correctly separates conditions for 3+ tables
// This addresses the concern about conditions "piling up" as joins increase
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers c
JOIN orders o ON c.id = o.customer_id
JOIN products p ON p.id = o.product_id
WHERE c.id > 10 AND o.total > 100 AND p.price > 50",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Verify we extracted all three tables
assert_eq!(tables.len(), 3);
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
assert!(table_names.contains(&"products"));
// Verify aliases are correctly mapped
assert_eq!(aliases.get("c"), Some(&"customers".to_string()));
assert_eq!(aliases.get("o"), Some(&"orders".to_string()));
assert_eq!(aliases.get("p"), Some(&"products".to_string()));
// Generate populate queries to verify each table gets its own conditions
let queries = IncrementalView::generate_populate_queries(
&select,
&tables,
&aliases,
&qualified_names,
&table_conditions,
)
.unwrap();
assert_eq!(queries.len(), 3);
// Verify the exact queries generated for each table
// The order might vary, so check all possibilities
let expected_queries = vec![
"SELECT * FROM customers WHERE id > 10",
"SELECT * FROM orders WHERE total > 100",
"SELECT * FROM products WHERE price > 50",
];
for expected in &expected_queries {
assert!(
queries.contains(&expected.to_string()),
"Missing expected query: {expected}. Got: {queries:?}"
);
}
}
#[test]
fn test_sql_for_populate_complex_expressions_not_included() {
// Test that complex expressions (subqueries, CASE, string concat) are NOT included in populate queries
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers
WHERE id > (SELECT MAX(customer_id) FROM orders)
AND name || ' Customer' = 'John Customer'
AND CASE WHEN id > 10 THEN 1 ELSE 0 END = 1
AND EXISTS (SELECT 1 FROM orders WHERE customer_id = customers.id)",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let queries = IncrementalView::generate_populate_queries(
&select,
&tables,
&aliases,
&qualified_names,
&table_conditions,
)
.unwrap();
assert_eq!(queries.len(), 1);
// Since customers table has an INTEGER PRIMARY KEY (id), we should get SELECT *
// without rowid and without WHERE clause (all conditions are complex)
assert_eq!(queries[0], "SELECT * FROM customers");
}
#[test]
fn test_sql_for_populate_unambiguous_unqualified_column() {
// Test that unambiguous unqualified columns ARE extracted
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers c \
JOIN orders o ON c.id = o.customer_id \
WHERE total > 100", // 'total' only exists in orders table
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// 'total' is unambiguous (only in orders), so it should be extracted
assert!(queries.contains(&"SELECT * FROM customers".to_string()));
assert!(queries.contains(&"SELECT * FROM orders WHERE total > 100".to_string()));
}
#[test]
fn test_database_qualified_table_names() {
let schema = create_test_schema();
// Test with database-qualified table names
let select = parse_select(
"SELECT c.id, c.name, o.id, o.total
FROM main.customers c
JOIN main.orders o ON c.id = o.customer_id
WHERE c.id > 10",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that qualified names are preserved
assert!(qualified_names.contains_key("customers"));
assert_eq!(qualified_names.get("customers").unwrap(), "main.customers");
assert!(qualified_names.contains_key("orders"));
assert_eq!(qualified_names.get("orders").unwrap(), "main.orders");
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names.clone(),
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// The FROM clause should contain the database-qualified name
// But the WHERE clause should use unqualified column names
assert!(queries.contains(&"SELECT * FROM main.customers WHERE id > 10".to_string()));
assert!(queries.contains(&"SELECT * FROM main.orders".to_string()));
}
#[test]
fn test_mixed_qualified_unqualified_tables() {
let schema = create_test_schema();
// Test with a mix of qualified and unqualified table names
let select = parse_select(
"SELECT c.id, c.name, o.id, o.total
FROM main.customers c
JOIN orders o ON c.id = o.customer_id
WHERE c.id > 10 AND o.total < 1000",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that qualified names are preserved where specified
assert_eq!(qualified_names.get("customers").unwrap(), "main.customers");
// Unqualified tables should not have an entry (or have the bare name)
assert!(
!qualified_names.contains_key("orders")
|| qualified_names.get("orders").unwrap() == "orders"
);
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names.clone(),
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2);
// The FROM clause should preserve qualification where specified
assert!(queries.contains(&"SELECT * FROM main.customers WHERE id > 10".to_string()));
assert!(queries.contains(&"SELECT * FROM orders WHERE total < 1000".to_string()));
}
#[test]
fn test_extract_tables_with_simple_cte() {
let schema = create_test_schema();
let select = parse_select(
"WITH customer_totals AS (
SELECT c.id, c.name, SUM(o.total) as total_spent
FROM customers c
JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name
)
SELECT * FROM customer_totals WHERE total_spent > 1000",
);
let (tables, aliases, _qualified_names, _table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that we found both tables from the CTE
assert_eq!(tables.len(), 2);
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
// Check aliases from the CTE
assert_eq!(aliases.get("c"), Some(&"customers".to_string()));
assert_eq!(aliases.get("o"), Some(&"orders".to_string()));
}
#[test]
fn test_extract_tables_with_multiple_ctes() {
let schema = create_test_schema();
let select = parse_select(
"WITH
high_value_customers AS (
SELECT id, name
FROM customers
WHERE id IN (SELECT customer_id FROM orders WHERE total > 500)
),
recent_orders AS (
SELECT id, customer_id, total
FROM orders
WHERE id > 100
)
SELECT hvc.name, ro.total
FROM high_value_customers hvc
JOIN recent_orders ro ON hvc.id = ro.customer_id",
);
let (tables, _aliases, _qualified_names, _table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that we found both tables from both CTEs
assert_eq!(tables.len(), 2);
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
}
#[test]
fn test_sql_for_populate_union_mixed_conditions() {
// Test UNION where same table appears with and without WHERE clause
// This should drop ALL conditions to ensure we get all rows
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM customers WHERE id > 10
UNION ALL
SELECT * FROM customers",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
let view = IncrementalView::new(
"union_view".to_string(),
select.clone(),
tables,
aliases,
qualified_names,
table_conditions,
extract_view_columns(&select, &schema).unwrap(),
&schema,
1, // main_data_root
2, // internal_state_root
3, // internal_state_index_root
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 1);
// When the same table appears with and without WHERE conditions in a UNION,
// we must fetch ALL rows (no WHERE clause) because the conditions are incompatible
assert_eq!(
queries[0], "SELECT * FROM customers",
"UNION with mixed conditions (some with WHERE, some without) should fetch ALL rows"
);
}
#[test]
fn test_extract_tables_with_nested_cte() {
let schema = create_test_schema();
let select = parse_select(
"WITH RECURSIVE customer_hierarchy AS (
SELECT id, name, 0 as level
FROM customers
WHERE id = 1
UNION ALL
SELECT c.id, c.name, ch.level + 1
FROM customers c
JOIN orders o ON c.id = o.customer_id
JOIN customer_hierarchy ch ON o.customer_id = ch.id
WHERE ch.level < 3
)
SELECT * FROM customer_hierarchy",
);
let (tables, _aliases, _qualified_names, _table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that we found the tables referenced in the recursive CTE
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
// We're finding duplicates because "customers" appears twice in the recursive CTE
// Let's deduplicate
let unique_tables: std::collections::HashSet<&str> = table_names.iter().cloned().collect();
assert_eq!(unique_tables.len(), 2);
assert!(unique_tables.contains("customers"));
assert!(unique_tables.contains("orders"));
}
#[test]
fn test_extract_tables_with_cte_and_main_query() {
let schema = create_test_schema();
let select = parse_select(
"WITH customer_stats AS (
SELECT customer_id, COUNT(*) as order_count
FROM orders
GROUP BY customer_id
)
SELECT c.name, cs.order_count, p.name as product_name
FROM customers c
JOIN customer_stats cs ON c.id = cs.customer_id
JOIN products p ON p.id = 1",
);
let (tables, aliases, _qualified_names, _table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Check that we found tables from both the CTE and the main query
assert_eq!(tables.len(), 3);
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
assert!(table_names.contains(&"products"));
// Check aliases from main query
assert_eq!(aliases.get("c"), Some(&"customers".to_string()));
assert_eq!(aliases.get("p"), Some(&"products".to_string()));
}
#[test]
fn test_sql_for_populate_simple_union() {
let schema = create_test_schema();
let select = parse_select(
"SELECT * FROM orders WHERE total > 1000
UNION ALL
SELECT * FROM orders WHERE total < 100",
);
let (tables, aliases, qualified_names, table_conditions) =
extract_all_tables(&select, &schema).unwrap();
// Generate populate queries
let queries = IncrementalView::generate_populate_queries(
&select,
&tables,
&aliases,
&qualified_names,
&table_conditions,
)
.unwrap();
// We should have deduplicated to a single table
assert_eq!(tables.len(), 1, "Should have one unique table");
assert_eq!(tables[0].name, "orders"); // Single table, order doesn't matter
// Should have collected two conditions
assert_eq!(table_conditions.get("orders").unwrap().len(), 2);
// Should combine multiple conditions with OR
assert_eq!(queries.len(), 1);
// Conditions are combined with OR
assert_eq!(
queries[0],
"SELECT * FROM orders WHERE (total > 1000) OR (total < 100)"
);
}
#[test]
fn test_sql_for_populate_with_union_and_filters() {
let schema = create_test_schema();
// Test UNION with different WHERE conditions on the same table
let select = parse_select(
"SELECT * FROM orders WHERE total > 1000
UNION ALL
SELECT * FROM orders WHERE total < 100",
);
let view = IncrementalView::from_stmt(
ast::QualifiedName {
db_name: None,
name: ast::Name::Ident("test_view".to_string()),
alias: None,
},
select,
&schema,
1,
2,
3,
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
// We deduplicate tables, so we get 1 query for orders
assert_eq!(queries.len(), 1);
// Multiple conditions on the same table are combined with OR
assert_eq!(
queries[0],
"SELECT * FROM orders WHERE (total > 1000) OR (total < 100)"
);
}
#[test]
fn test_sql_for_populate_with_union_mixed_tables() {
let schema = create_test_schema();
// Test UNION with different tables
let select = parse_select(
"SELECT id, name FROM customers WHERE id > 10
UNION ALL
SELECT customer_id as id, 'Order' as name FROM orders WHERE total > 500",
);
let view = IncrementalView::from_stmt(
ast::QualifiedName {
db_name: None,
name: ast::Name::Ident("test_view".to_string()),
alias: None,
},
select,
&schema,
1,
2,
3,
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
assert_eq!(queries.len(), 2, "Should have one query per table");
// Check that each table gets its appropriate WHERE clause
let customers_query = queries
.iter()
.find(|q| q.contains("FROM customers"))
.unwrap();
let orders_query = queries.iter().find(|q| q.contains("FROM orders")).unwrap();
assert!(customers_query.contains("WHERE id > 10"));
assert!(orders_query.contains("WHERE total > 500"));
}
#[test]
fn test_sql_for_populate_duplicate_tables_conflicting_filters() {
// This tests what happens when we have duplicate table references with different filters
// We need to manually construct a view to simulate what would happen with CTEs
let schema = create_test_schema();
// Get the orders table twice (simulating what would happen with CTEs)
let orders_table = schema.get_btree_table("orders").unwrap();
let referenced_tables = vec![orders_table.clone(), orders_table.clone()];
// Create a SELECT that would have conflicting WHERE conditions
let select = parse_select(
"SELECT * FROM orders WHERE total > 1000", // This is just for the AST
);
let view = IncrementalView::new(
"test_view".to_string(),
select.clone(),
referenced_tables,
HashMap::new(),
HashMap::new(),
HashMap::new(),
extract_view_columns(&select, &schema).unwrap(),
&schema,
1,
2,
3,
)
.unwrap();
let queries = view.sql_for_populate().unwrap();
// With duplicates, we should get 2 identical queries
assert_eq!(queries.len(), 2);
// Both should be the same since they're from the same table reference
assert_eq!(queries[0], queries[1]);
}
#[test]
fn test_table_extraction_with_nested_ctes_complex_conditions() {
let schema = create_test_schema();
let select = parse_select(
"WITH
customer_orders AS (
SELECT c.*, o.total
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE c.name LIKE 'A%' AND o.total > 100
),
top_customers AS (
SELECT * FROM customer_orders WHERE total > 500
)
SELECT * FROM top_customers",
);
// Test table extraction directly without creating a view
let mut tables = Vec::new();
let mut aliases = HashMap::new();
let mut qualified_names = HashMap::new();
let mut table_conditions = HashMap::new();
IncrementalView::extract_all_tables(
&select,
&schema,
&mut tables,
&mut aliases,
&mut qualified_names,
&mut table_conditions,
)
.unwrap();
let table_names: Vec<&str> = tables.iter().map(|t| t.name.as_str()).collect();
// Should have one reference to each table
assert_eq!(table_names.len(), 2, "Should have 2 table references");
assert!(table_names.contains(&"customers"));
assert!(table_names.contains(&"orders"));
// Check aliases
assert_eq!(aliases.get("c"), Some(&"customers".to_string()));
assert_eq!(aliases.get("o"), Some(&"orders".to_string()));
}
#[test]
fn test_union_all_populate_queries() {
// Test that UNION ALL generates correct populate queries
let schema = create_test_schema();
// Create a UNION ALL query that references the same table twice with different WHERE conditions
let sql = "
SELECT id, name FROM customers WHERE id < 5
UNION ALL
SELECT id, name FROM customers WHERE id > 10
";
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next_cmd().unwrap();
let select_stmt = match cmd.unwrap() {
turso_parser::ast::Cmd::Stmt(ast::Stmt::Select(select)) => select,
_ => panic!("Expected SELECT statement"),
};
// Extract tables and conditions
let (tables, aliases, qualified_names, conditions) =
extract_all_tables(&select_stmt, &schema).unwrap();
// Generate populate queries
let queries = IncrementalView::generate_populate_queries(
&select_stmt,
&tables,
&aliases,
&qualified_names,
&conditions,
)
.unwrap();
// Expected query - assuming customers table has INTEGER PRIMARY KEY
// so we don't need to select rowid separately
let expected = "SELECT * FROM customers WHERE (id < 5) OR (id > 10)";
assert_eq!(
queries.len(),
1,
"Should generate exactly 1 query for UNION ALL with same table"
);
assert_eq!(queries[0], expected, "Query should match expected format");
}
#[test]
fn test_union_all_different_tables_populate_queries() {
// Test UNION ALL with different tables
let schema = create_test_schema();
let sql = "
SELECT id, name FROM customers WHERE id < 5
UNION ALL
SELECT id, product_name FROM orders WHERE amount > 100
";
let mut parser = Parser::new(sql.as_bytes());
let cmd = parser.next_cmd().unwrap();
let select_stmt = match cmd.unwrap() {
turso_parser::ast::Cmd::Stmt(ast::Stmt::Select(select)) => select,
_ => panic!("Expected SELECT statement"),
};
// Extract tables and conditions
let (tables, aliases, qualified_names, conditions) =
extract_all_tables(&select_stmt, &schema).unwrap();
// Generate populate queries
let queries = IncrementalView::generate_populate_queries(
&select_stmt,
&tables,
&aliases,
&qualified_names,
&conditions,
)
.unwrap();
// Should generate separate queries for each table
assert_eq!(
queries.len(),
2,
"Should generate 2 queries for different tables"
);
// Check we have queries for both tables
let has_customers = queries.iter().any(|q| q.contains("customers"));
let has_orders = queries.iter().any(|q| q.contains("orders"));
assert!(has_customers, "Should have a query for customers table");
assert!(has_orders, "Should have a query for orders table");
// Verify the customers query has its WHERE clause
let customers_query = queries
.iter()
.find(|q| q.contains("customers"))
.expect("Should have customers query");
assert!(
customers_query.contains("WHERE"),
"Customers query should have WHERE clause"
);
}
}