Files
turso/core/incremental/operator.rs
Glauber Costa 6e2bd364ee fix issue with rowids and deletions
The operator itself should handle deletions and updates that change
the rowid by consolidating its state.

Our current materialized views track state themselves, so we don't
see this problem now. But it becomes apparent once we switch the
views to use circuits.
2025-08-27 11:18:54 -05:00

2460 lines
85 KiB
Rust

#![allow(dead_code)]
// Operator DAG for DBSP-style incremental computation
// Based on Feldera DBSP design but adapted for Turso's architecture
use crate::incremental::expr_compiler::CompiledExpression;
use crate::incremental::hashable_row::HashableRow;
use crate::types::Text;
use crate::{Connection, Database, SymbolTable, Value};
use std::collections::{HashMap, HashSet};
use std::fmt::{self, Debug, Display};
use std::sync::Arc;
use std::sync::Mutex;
/// Tracks computation counts to verify incremental behavior (for tests now), and in the future
/// should be used to provide statistics.
#[derive(Debug, Default, Clone)]
pub struct ComputationTracker {
pub filter_evaluations: usize,
pub project_operations: usize,
pub join_lookups: usize,
pub aggregation_updates: usize,
pub full_scans: usize,
}
impl ComputationTracker {
pub fn new() -> Self {
Self::default()
}
pub fn record_filter(&mut self) {
self.filter_evaluations += 1;
}
pub fn record_project(&mut self) {
self.project_operations += 1;
}
pub fn record_join_lookup(&mut self) {
self.join_lookups += 1;
}
pub fn record_aggregation(&mut self) {
self.aggregation_updates += 1;
}
pub fn record_full_scan(&mut self) {
self.full_scans += 1;
}
pub fn total_computations(&self) -> usize {
self.filter_evaluations
+ self.project_operations
+ self.join_lookups
+ self.aggregation_updates
}
}
/// A delta represents ordered changes to data
#[derive(Debug, Clone, Default)]
pub struct Delta {
/// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete
/// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete +
/// insert. If this is not ordered, it would be applied in arbitrary order and break the view.
pub changes: Vec<(HashableRow, isize)>,
}
impl Delta {
pub fn new() -> Self {
Self {
changes: Vec::new(),
}
}
pub fn insert(&mut self, row_key: i64, values: Vec<Value>) {
let row = HashableRow::new(row_key, values);
self.changes.push((row, 1));
}
pub fn delete(&mut self, row_key: i64, values: Vec<Value>) {
let row = HashableRow::new(row_key, values);
self.changes.push((row, -1));
}
pub fn is_empty(&self) -> bool {
self.changes.is_empty()
}
pub fn len(&self) -> usize {
self.changes.len()
}
/// Merge another delta into this one
/// This preserves the order of operations - no consolidation is done
/// to maintain the full history of changes
pub fn merge(&mut self, other: &Delta) {
// Simply append all changes from other, preserving order
self.changes.extend(other.changes.iter().cloned());
}
/// Consolidate changes by combining entries with the same HashableRow
pub fn consolidate(&mut self) {
if self.changes.is_empty() {
return;
}
// Use a HashMap to accumulate weights
let mut consolidated: HashMap<HashableRow, isize> = HashMap::new();
for (row, weight) in self.changes.drain(..) {
*consolidated.entry(row).or_insert(0) += weight;
}
// Convert back to vec, filtering out zero weights
self.changes = consolidated
.into_iter()
.filter(|(_, weight)| *weight != 0)
.collect();
}
}
#[cfg(test)]
mod hashable_row_tests {
use super::*;
#[test]
fn test_hashable_row_delta_operations() {
let mut delta = Delta::new();
// Test INSERT
delta.insert(1, vec![Value::Integer(1), Value::Integer(100)]);
assert_eq!(delta.len(), 1);
// Test UPDATE (DELETE + INSERT) - order matters!
delta.delete(1, vec![Value::Integer(1), Value::Integer(100)]);
delta.insert(1, vec![Value::Integer(1), Value::Integer(200)]);
assert_eq!(delta.len(), 3); // Should have 3 operations before consolidation
// Verify order is preserved
let ops: Vec<_> = delta.changes.iter().collect();
assert_eq!(ops[0].1, 1); // First insert
assert_eq!(ops[1].1, -1); // Delete
assert_eq!(ops[2].1, 1); // Second insert
// Test consolidation
delta.consolidate();
// After consolidation, the first insert and delete should cancel out
// leaving only the second insert
assert_eq!(delta.len(), 1);
let final_row = &delta.changes[0];
assert_eq!(final_row.0.rowid, 1);
assert_eq!(
final_row.0.values,
vec![Value::Integer(1), Value::Integer(200)]
);
assert_eq!(final_row.1, 1);
}
#[test]
fn test_duplicate_row_consolidation() {
let mut delta = Delta::new();
// Insert same row twice
delta.insert(2, vec![Value::Integer(2), Value::Integer(300)]);
delta.insert(2, vec![Value::Integer(2), Value::Integer(300)]);
assert_eq!(delta.len(), 2);
delta.consolidate();
assert_eq!(delta.len(), 1);
// Weight should be 2 (sum of both inserts)
let final_row = &delta.changes[0];
assert_eq!(final_row.0.rowid, 2);
assert_eq!(final_row.1, 2);
}
}
/// Represents an operator in the dataflow graph
#[derive(Debug, Clone)]
pub enum QueryOperator {
/// Table scan - source of data
TableScan {
table_name: String,
column_names: Vec<String>,
},
/// Filter rows based on predicate
Filter {
predicate: FilterPredicate,
input: usize, // Index of input operator
},
/// Project columns (select specific columns)
Project {
columns: Vec<ProjectColumn>,
input: usize,
},
/// Join two inputs
Join {
join_type: JoinType,
on_column: String,
left_input: usize,
right_input: usize,
},
/// Aggregate
Aggregate {
group_by: Vec<String>,
aggregates: Vec<AggregateFunction>,
input: usize,
},
}
#[derive(Debug, Clone)]
pub enum FilterPredicate {
/// Column = value
Equals { column: String, value: Value },
/// Column != value
NotEquals { column: String, value: Value },
/// Column > value
GreaterThan { column: String, value: Value },
/// Column >= value
GreaterThanOrEqual { column: String, value: Value },
/// Column < value
LessThan { column: String, value: Value },
/// Column <= value
LessThanOrEqual { column: String, value: Value },
/// Logical AND of two predicates
And(Box<FilterPredicate>, Box<FilterPredicate>),
/// Logical OR of two predicates
Or(Box<FilterPredicate>, Box<FilterPredicate>),
/// No predicate (accept all rows)
None,
}
impl FilterPredicate {
/// Parse a SQL AST expression into a FilterPredicate
/// This centralizes all SQL-to-predicate parsing logic
pub fn from_sql_expr(expr: &turso_parser::ast::Expr) -> crate::Result<Self> {
use turso_parser::ast::*;
let Expr::Binary(lhs, op, rhs) = expr else {
return Err(crate::LimboError::ParseError(
"Unsupported WHERE clause for incremental views: not a binary expression"
.to_string(),
));
};
// Handle AND/OR logical operators
match op {
Operator::And => {
let left = Self::from_sql_expr(lhs)?;
let right = Self::from_sql_expr(rhs)?;
return Ok(FilterPredicate::And(Box::new(left), Box::new(right)));
}
Operator::Or => {
let left = Self::from_sql_expr(lhs)?;
let right = Self::from_sql_expr(rhs)?;
return Ok(FilterPredicate::Or(Box::new(left), Box::new(right)));
}
_ => {}
}
// Handle comparison operators
let Expr::Id(column_name) = &**lhs else {
return Err(crate::LimboError::ParseError(
"Unsupported WHERE clause for incremental views: left-hand-side is not a column reference".to_string(),
));
};
let column = column_name.as_str().to_string();
// Parse the right-hand side value
let value = match &**rhs {
Expr::Literal(Literal::String(s)) => {
// Strip quotes from string literals
let cleaned = s.trim_matches('\'').trim_matches('"');
Value::Text(Text::new(cleaned))
}
Expr::Literal(Literal::Numeric(n)) => {
// Try to parse as integer first, then float
if let Ok(i) = n.parse::<i64>() {
Value::Integer(i)
} else if let Ok(f) = n.parse::<f64>() {
Value::Float(f)
} else {
return Err(crate::LimboError::ParseError(
"Unsupported WHERE clause for incremental views: right-hand-side is not a numeric literal".to_string(),
));
}
}
Expr::Literal(Literal::Null) => Value::Null,
Expr::Literal(Literal::Blob(_)) => {
// Blob comparison not yet supported
return Err(crate::LimboError::ParseError(
"Unsupported WHERE clause for incremental views: comparison with blob literals is not supported".to_string(),
));
}
other => {
// Complex expressions not yet supported
return Err(crate::LimboError::ParseError(
format!("Unsupported WHERE clause for incremental views: comparison with {other:?} is not supported"),
));
}
};
// Create the appropriate predicate based on operator
match op {
Operator::Equals => Ok(FilterPredicate::Equals { column, value }),
Operator::NotEquals => Ok(FilterPredicate::NotEquals { column, value }),
Operator::Greater => Ok(FilterPredicate::GreaterThan { column, value }),
Operator::GreaterEquals => Ok(FilterPredicate::GreaterThanOrEqual { column, value }),
Operator::Less => Ok(FilterPredicate::LessThan { column, value }),
Operator::LessEquals => Ok(FilterPredicate::LessThanOrEqual { column, value }),
other => Err(crate::LimboError::ParseError(
format!("Unsupported WHERE clause for incremental views: comparison operator {other:?} is not supported"),
)),
}
}
/// Parse a WHERE clause from a SELECT statement
pub fn from_select(select: &turso_parser::ast::Select) -> crate::Result<Self> {
use turso_parser::ast::*;
if let OneSelect::Select {
ref where_clause, ..
} = select.body.select
{
if let Some(where_clause) = where_clause {
Self::from_sql_expr(where_clause)
} else {
Ok(FilterPredicate::None)
}
} else {
Err(crate::LimboError::ParseError(
"Unsupported WHERE clause for incremental views: not a single SELECT statement"
.to_string(),
))
}
}
}
#[derive(Debug, Clone)]
pub struct ProjectColumn {
/// The original SQL expression (for debugging/fallback)
pub expr: turso_parser::ast::Expr,
/// Optional alias for the column
pub alias: Option<String>,
/// Compiled expression (handles both trivial columns and complex expressions)
pub compiled: CompiledExpression,
}
#[derive(Debug, Clone)]
pub enum JoinType {
Inner,
Left,
Right,
}
#[derive(Debug, Clone)]
pub enum AggregateFunction {
Count,
Sum(String),
Avg(String),
Min(String),
Max(String),
}
impl Display for AggregateFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AggregateFunction::Count => write!(f, "COUNT(*)"),
AggregateFunction::Sum(col) => write!(f, "SUM({col})"),
AggregateFunction::Avg(col) => write!(f, "AVG({col})"),
AggregateFunction::Min(col) => write!(f, "MIN({col})"),
AggregateFunction::Max(col) => write!(f, "MAX({col})"),
}
}
}
impl AggregateFunction {
/// Get the default output column name for this aggregate function
#[inline]
pub fn default_output_name(&self) -> String {
self.to_string()
}
/// Create an AggregateFunction from a SQL function and its arguments
/// Returns None if the function is not a supported aggregate
pub fn from_sql_function(
func: &crate::function::Func,
input_column: Option<String>,
) -> Option<Self> {
use crate::function::{AggFunc, Func};
match func {
Func::Agg(agg_func) => {
match agg_func {
AggFunc::Count | AggFunc::Count0 => Some(AggregateFunction::Count),
AggFunc::Sum => input_column.map(AggregateFunction::Sum),
AggFunc::Avg => input_column.map(AggregateFunction::Avg),
AggFunc::Min => input_column.map(AggregateFunction::Min),
AggFunc::Max => input_column.map(AggregateFunction::Max),
_ => None, // Other aggregate functions not yet supported in DBSP
}
}
_ => None, // Not an aggregate function
}
}
}
/// Operator DAG (Directed Acyclic Graph)
/// Base trait for incremental operators
pub trait IncrementalOperator: Debug {
/// Initialize with base data
fn initialize(&mut self, data: Delta);
/// Process a delta (incremental update)
fn process_delta(&mut self, delta: Delta) -> Delta;
/// Get current accumulated state
fn get_current_state(&self) -> Delta;
/// Set computation tracker
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>);
}
/// Filter operator - filters rows based on predicate
#[derive(Debug)]
pub struct FilterOperator {
predicate: FilterPredicate,
current_state: Delta,
column_names: Vec<String>,
tracker: Option<Arc<Mutex<ComputationTracker>>>,
}
impl FilterOperator {
pub fn new(predicate: FilterPredicate, column_names: Vec<String>) -> Self {
Self {
predicate,
current_state: Delta::new(),
column_names,
tracker: None,
}
}
/// Get the predicate for this filter
pub fn predicate(&self) -> &FilterPredicate {
&self.predicate
}
pub fn evaluate_predicate(&self, values: &[Value]) -> bool {
match &self.predicate {
FilterPredicate::None => true,
FilterPredicate::Equals { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
return v == value;
}
}
false
}
FilterPredicate::NotEquals { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
return v != value;
}
}
false
}
FilterPredicate::GreaterThan { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
// Compare based on value types
match (v, value) {
(Value::Integer(a), Value::Integer(b)) => return a > b,
(Value::Float(a), Value::Float(b)) => return a > b,
(Value::Text(a), Value::Text(b)) => return a.as_str() > b.as_str(),
_ => {}
}
}
}
false
}
FilterPredicate::GreaterThanOrEqual { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
match (v, value) {
(Value::Integer(a), Value::Integer(b)) => return a >= b,
(Value::Float(a), Value::Float(b)) => return a >= b,
(Value::Text(a), Value::Text(b)) => return a.as_str() >= b.as_str(),
_ => {}
}
}
}
false
}
FilterPredicate::LessThan { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
match (v, value) {
(Value::Integer(a), Value::Integer(b)) => return a < b,
(Value::Float(a), Value::Float(b)) => return a < b,
(Value::Text(a), Value::Text(b)) => return a.as_str() < b.as_str(),
_ => {}
}
}
}
false
}
FilterPredicate::LessThanOrEqual { column, value } => {
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
if let Some(v) = values.get(idx) {
match (v, value) {
(Value::Integer(a), Value::Integer(b)) => return a <= b,
(Value::Float(a), Value::Float(b)) => return a <= b,
(Value::Text(a), Value::Text(b)) => return a.as_str() <= b.as_str(),
_ => {}
}
}
}
false
}
FilterPredicate::And(left, right) => {
// Temporarily create sub-filters to evaluate
let left_filter = FilterOperator::new((**left).clone(), self.column_names.clone());
let right_filter =
FilterOperator::new((**right).clone(), self.column_names.clone());
left_filter.evaluate_predicate(values) && right_filter.evaluate_predicate(values)
}
FilterPredicate::Or(left, right) => {
let left_filter = FilterOperator::new((**left).clone(), self.column_names.clone());
let right_filter =
FilterOperator::new((**right).clone(), self.column_names.clone());
left_filter.evaluate_predicate(values) || right_filter.evaluate_predicate(values)
}
}
}
}
impl IncrementalOperator for FilterOperator {
fn initialize(&mut self, data: Delta) {
// Process initial data through filter
for (row, weight) in data.changes {
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_filter();
}
if self.evaluate_predicate(&row.values) {
self.current_state.changes.push((row, weight));
}
}
}
fn process_delta(&mut self, delta: Delta) -> Delta {
let mut output_delta = Delta::new();
// Process only the delta, not the entire state
for (row, weight) in delta.changes {
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_filter();
}
if self.evaluate_predicate(&row.values) {
output_delta.changes.push((row.clone(), weight));
// Update our state
self.current_state.changes.push((row, weight));
}
}
output_delta
}
fn get_current_state(&self) -> Delta {
// Return a consolidated view of the current state
let mut consolidated = self.current_state.clone();
consolidated.consolidate();
consolidated
}
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
self.tracker = Some(tracker);
}
}
/// Project operator - selects/transforms columns
#[derive(Clone)]
pub struct ProjectOperator {
columns: Vec<ProjectColumn>,
input_column_names: Vec<String>,
output_column_names: Vec<String>,
current_state: Delta,
tracker: Option<Arc<Mutex<ComputationTracker>>>,
// Internal in-memory connection for expression evaluation
// Programs are very dependent on having a connection, so give it one.
//
// We could in theory pass the current connection, but there are a host of problems with that.
// For example: during a write transaction, where views are usually updated, we have autocommit
// on. When the program we are executing calls Halt, it will try to commit the current
// transaction, which is absolutely incorrect.
//
// There are other ways to solve this, but a read-only connection to an empty in-memory
// database gives us the closest environment we need to execute expressions.
internal_conn: Arc<Connection>,
}
impl std::fmt::Debug for ProjectOperator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ProjectOperator")
.field("columns", &self.columns)
.field("input_column_names", &self.input_column_names)
.field("output_column_names", &self.output_column_names)
.field("current_state", &self.current_state)
.field("tracker", &self.tracker)
.finish_non_exhaustive()
}
}
impl ProjectOperator {
/// Create a new ProjectOperator from a SELECT statement, extracting projection columns
pub fn from_select(
select: &turso_parser::ast::Select,
input_column_names: Vec<String>,
schema: &crate::schema::Schema,
) -> crate::Result<Self> {
use turso_parser::ast::*;
// Set up internal connection for expression evaluation
let io = Arc::new(crate::MemoryIO::new());
let db = Database::open_file(
io, ":memory:", false, // no MVCC needed for expression evaluation
false, // no indexes needed
)?;
let internal_conn = db.connect()?;
// Set to read-only mode and disable auto-commit since we're only evaluating expressions
internal_conn.query_only.set(true);
internal_conn.auto_commit.set(false);
let temp_syms = SymbolTable::new();
// Extract columns from SELECT statement
let columns = if let OneSelect::Select {
columns: ref select_columns,
..
} = &select.body.select
{
let mut columns = Vec::new();
for result_col in select_columns {
match result_col {
ResultColumn::Expr(expr, alias) => {
let alias_str = if let Some(As::As(alias_name)) = alias {
Some(alias_name.as_str().to_string())
} else {
None
};
// Try to compile the expression (handles both columns and complex expressions)
let compiled = CompiledExpression::compile(
expr,
&input_column_names,
schema,
&temp_syms,
internal_conn.clone(),
)?;
columns.push(ProjectColumn {
expr: (**expr).clone(),
alias: alias_str,
compiled,
});
}
ResultColumn::Star => {
// Select all columns - create trivial column references
for name in &input_column_names {
// Create an Id expression for the column
let expr = Expr::Id(Name::Ident(name.clone()));
let compiled = CompiledExpression::compile(
&expr,
&input_column_names,
schema,
&temp_syms,
internal_conn.clone(),
)?;
columns.push(ProjectColumn {
expr,
alias: None,
compiled,
});
}
}
x => {
return Err(crate::LimboError::ParseError(format!(
"Unsupported {x:?} clause when compiling project operator",
)));
}
}
}
if columns.is_empty() {
return Err(crate::LimboError::ParseError(
"No columns found when compiling project operator".to_string(),
));
}
columns
} else {
return Err(crate::LimboError::ParseError(
"Expression is not a valid SELECT expression".to_string(),
));
};
// Generate output column names based on aliases or expressions
let output_column_names = columns
.iter()
.map(|c| {
c.alias.clone().unwrap_or_else(|| match &c.expr {
Expr::Id(name) => name.as_str().to_string(),
Expr::Qualified(table, column) => {
format!("{}.{}", table.as_str(), column.as_str())
}
Expr::DoublyQualified(db, table, column) => {
format!("{}.{}.{}", db.as_str(), table.as_str(), column.as_str())
}
_ => c.expr.to_string(),
})
})
.collect();
Ok(Self {
columns,
input_column_names,
output_column_names,
current_state: Delta::new(),
tracker: None,
internal_conn,
})
}
/// Get the columns for this projection
pub fn columns(&self) -> &[ProjectColumn] {
&self.columns
}
fn project_values(&self, values: &[Value]) -> Vec<Value> {
let mut output = Vec::new();
for col in &self.columns {
// Use the internal connection's pager for expression evaluation
let internal_pager = self.internal_conn.pager.borrow().clone();
// Execute the compiled expression (handles both columns and complex expressions)
let result = col
.compiled
.execute(values, internal_pager)
.expect("Failed to execute compiled expression for the Project operator");
output.push(result);
}
output
}
fn evaluate_expression(&self, expr: &turso_parser::ast::Expr, values: &[Value]) -> Value {
use turso_parser::ast::*;
match expr {
Expr::Id(name) => {
if let Some(idx) = self
.input_column_names
.iter()
.position(|c| c == name.as_str())
{
if let Some(v) = values.get(idx) {
return v.clone();
}
}
Value::Null
}
Expr::Literal(lit) => {
match lit {
Literal::Numeric(n) => {
if let Ok(i) = n.parse::<i64>() {
Value::Integer(i)
} else if let Ok(f) = n.parse::<f64>() {
Value::Float(f)
} else {
Value::Null
}
}
Literal::String(s) => {
let cleaned = s.trim_matches('\'').trim_matches('"');
Value::Text(Text::new(cleaned))
}
Literal::Null => Value::Null,
Literal::Blob(_)
| Literal::Keyword(_)
| Literal::CurrentDate
| Literal::CurrentTime
| Literal::CurrentTimestamp => Value::Null, // Not supported yet
}
}
Expr::Binary(left, op, right) => {
let left_val = self.evaluate_expression(left, values);
let right_val = self.evaluate_expression(right, values);
match op {
Operator::Add => match (&left_val, &right_val) {
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a + b),
(Value::Float(a), Value::Float(b)) => Value::Float(a + b),
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 + b),
(Value::Float(a), Value::Integer(b)) => Value::Float(a + *b as f64),
_ => Value::Null,
},
Operator::Subtract => match (&left_val, &right_val) {
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a - b),
(Value::Float(a), Value::Float(b)) => Value::Float(a - b),
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 - b),
(Value::Float(a), Value::Integer(b)) => Value::Float(a - *b as f64),
_ => Value::Null,
},
Operator::Multiply => match (&left_val, &right_val) {
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a * b),
(Value::Float(a), Value::Float(b)) => Value::Float(a * b),
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 * b),
(Value::Float(a), Value::Integer(b)) => Value::Float(a * *b as f64),
_ => Value::Null,
},
Operator::Divide => match (&left_val, &right_val) {
(Value::Integer(a), Value::Integer(b)) => {
if *b != 0 {
Value::Integer(a / b)
} else {
Value::Null
}
}
(Value::Float(a), Value::Float(b)) => {
if *b != 0.0 {
Value::Float(a / b)
} else {
Value::Null
}
}
(Value::Integer(a), Value::Float(b)) => {
if *b != 0.0 {
Value::Float(*a as f64 / b)
} else {
Value::Null
}
}
(Value::Float(a), Value::Integer(b)) => {
if *b != 0 {
Value::Float(a / *b as f64)
} else {
Value::Null
}
}
_ => Value::Null,
},
_ => Value::Null, // Other operators not supported yet
}
}
Expr::FunctionCall { name, args, .. } => {
match name.as_str().to_lowercase().as_str() {
"hex" => {
if args.len() == 1 {
let arg_val = self.evaluate_expression(&args[0], values);
match arg_val {
Value::Integer(i) => Value::Text(Text::new(&format!("{i:X}"))),
_ => Value::Null,
}
} else {
Value::Null
}
}
_ => Value::Null, // Other functions not supported yet
}
}
Expr::Parenthesized(inner) => {
assert!(
inner.len() <= 1,
"Parenthesized expressions with multiple elements are not supported"
);
if !inner.is_empty() {
self.evaluate_expression(&inner[0], values)
} else {
Value::Null
}
}
_ => Value::Null, // Other expression types not supported yet
}
}
}
impl IncrementalOperator for ProjectOperator {
fn initialize(&mut self, data: Delta) {
for (row, weight) in &data.changes {
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_project();
}
let projected = self.project_values(&row.values);
let projected_row = HashableRow::new(row.rowid, projected);
self.current_state.changes.push((projected_row, *weight));
}
}
fn process_delta(&mut self, delta: Delta) -> Delta {
let mut output_delta = Delta::new();
for (row, weight) in &delta.changes {
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_project();
}
let projected = self.project_values(&row.values);
let projected_row = HashableRow::new(row.rowid, projected);
output_delta.changes.push((projected_row.clone(), *weight));
self.current_state.changes.push((projected_row, *weight));
}
output_delta
}
fn get_current_state(&self) -> Delta {
// Return a consolidated view of the current state
let mut consolidated = self.current_state.clone();
consolidated.consolidate();
consolidated
}
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
self.tracker = Some(tracker);
}
}
/// Join operator - performs incremental joins using DBSP formula
/// ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
#[derive(Debug)]
pub struct JoinOperator {
join_type: JoinType,
pub left_on_column: String,
pub right_on_column: String,
left_column_names: Vec<String>,
right_column_names: Vec<String>,
// Current accumulated state for both sides
left_state: Delta,
right_state: Delta,
// Index for efficient lookups: column_value_as_string -> vec of row_keys
// We use String representation of values since Value doesn't implement Hash
left_index: HashMap<String, Vec<i64>>,
right_index: HashMap<String, Vec<i64>>,
// Result state
current_state: Delta,
tracker: Option<Arc<Mutex<ComputationTracker>>>,
// For generating unique keys for join results
next_result_key: i64,
}
impl JoinOperator {
pub fn new(
join_type: JoinType,
left_on_column: String,
right_on_column: String,
left_column_names: Vec<String>,
right_column_names: Vec<String>,
) -> Self {
Self {
join_type,
left_on_column,
right_on_column,
left_column_names,
right_column_names,
left_state: Delta::new(),
right_state: Delta::new(),
left_index: HashMap::new(),
right_index: HashMap::new(),
current_state: Delta::new(),
tracker: None,
next_result_key: 0,
}
}
pub fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
self.tracker = Some(tracker);
}
/// Build index for a side of the join
fn build_index(
state: &Delta,
column_names: &[String],
on_column: &str,
) -> HashMap<String, Vec<i64>> {
let mut index = HashMap::new();
// Find the column index
let col_idx = column_names.iter().position(|c| c == on_column);
if col_idx.is_none() {
return index;
}
let col_idx = col_idx.unwrap();
// Build the index
for (row, weight) in &state.changes {
// Include rows with positive weight in the index
if *weight > 0 {
if let Some(key_value) = row.values.get(col_idx) {
// Convert value to string for indexing
let key_str = format!("{key_value:?}");
index
.entry(key_str)
.or_insert_with(Vec::new)
.push(row.rowid);
}
}
}
index
}
/// Join two deltas
fn join_deltas(&self, left_delta: &Delta, right_delta: &Delta, next_key: &mut i64) -> Delta {
let mut result = Delta::new();
// Find column indices
let left_col_idx = self
.left_column_names
.iter()
.position(|c| c == &self.left_on_column)
.unwrap_or(0);
let right_col_idx = self
.right_column_names
.iter()
.position(|c| c == &self.right_on_column)
.unwrap_or(0);
// For each row in left_delta
for (left_row, left_weight) in &left_delta.changes {
// Process both inserts and deletes
let left_join_value = left_row.values.get(left_col_idx);
if left_join_value.is_none() {
continue;
}
let left_join_value = left_join_value.unwrap();
// Look up matching rows in right_delta
for (right_row, right_weight) in &right_delta.changes {
// Process both inserts and deletes
let right_join_value = right_row.values.get(right_col_idx);
if right_join_value.is_none() {
continue;
}
let right_join_value = right_join_value.unwrap();
// Check if values match
if left_join_value == right_join_value {
// Record the join lookup
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_join_lookup();
}
// Create joined row
let mut joined_values = left_row.values.clone();
joined_values.extend(right_row.values.clone());
// Generate a unique key for the result
let result_key = *next_key;
*next_key += 1;
let joined_row = HashableRow::new(result_key, joined_values);
result
.changes
.push((joined_row, left_weight * right_weight));
}
}
}
result
}
/// Join a delta with the full state using the index
fn join_delta_with_state(
&self,
delta: &Delta,
state: &Delta,
delta_on_left: bool,
next_key: &mut i64,
) -> Delta {
let mut result = Delta::new();
let (delta_col_idx, state_col_names) = if delta_on_left {
(
self.left_column_names
.iter()
.position(|c| c == &self.left_on_column)
.unwrap_or(0),
&self.right_column_names,
)
} else {
(
self.right_column_names
.iter()
.position(|c| c == &self.right_on_column)
.unwrap_or(0),
&self.left_column_names,
)
};
// Use index for efficient lookup
let state_index = Self::build_index(
state,
state_col_names,
if delta_on_left {
&self.right_on_column
} else {
&self.left_on_column
},
);
for (delta_row, delta_weight) in &delta.changes {
// Process both inserts and deletes
let delta_join_value = delta_row.values.get(delta_col_idx);
if delta_join_value.is_none() {
continue;
}
let delta_join_value = delta_join_value.unwrap();
// Use index to find matching rows
let delta_key_str = format!("{delta_join_value:?}");
if let Some(matching_keys) = state_index.get(&delta_key_str) {
for state_key in matching_keys {
// Look up in the state - find the row with this rowid
let state_row_opt = state
.changes
.iter()
.find(|(row, weight)| row.rowid == *state_key && *weight > 0);
if let Some((state_row, state_weight)) = state_row_opt {
// Record the join lookup
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_join_lookup();
}
// Create joined row
let joined_values = if delta_on_left {
let mut v = delta_row.values.clone();
v.extend(state_row.values.clone());
v
} else {
let mut v = state_row.values.clone();
v.extend(delta_row.values.clone());
v
};
let result_key = *next_key;
*next_key += 1;
let joined_row = HashableRow::new(result_key, joined_values);
result
.changes
.push((joined_row, delta_weight * state_weight));
}
}
}
}
result
}
/// Initialize both sides of the join
pub fn initialize_both(&mut self, left_data: Delta, right_data: Delta) {
self.left_state = left_data.clone();
self.right_state = right_data.clone();
// Build indices
self.left_index = Self::build_index(
&self.left_state,
&self.left_column_names,
&self.left_on_column,
);
self.right_index = Self::build_index(
&self.right_state,
&self.right_column_names,
&self.right_on_column,
);
// Perform initial join
let mut next_key = self.next_result_key;
self.current_state = self.join_deltas(&self.left_state, &self.right_state, &mut next_key);
self.next_result_key = next_key;
}
/// Process deltas for both sides using DBSP formula
/// ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
pub fn process_both_deltas(&mut self, left_delta: Delta, right_delta: Delta) -> Delta {
let mut result = Delta::new();
let mut next_key = self.next_result_key;
// A ⋈ ∂B (existing left with new right)
let a_join_db =
self.join_delta_with_state(&right_delta, &self.left_state, false, &mut next_key);
result.merge(&a_join_db);
// ∂A ⋈ B (new left with existing right)
let da_join_b =
self.join_delta_with_state(&left_delta, &self.right_state, true, &mut next_key);
result.merge(&da_join_b);
// ∂A ⋈ ∂B (new left with new right)
let da_join_db = self.join_deltas(&left_delta, &right_delta, &mut next_key);
result.merge(&da_join_db);
// Update the next key counter
self.next_result_key = next_key;
// Update state
self.left_state.merge(&left_delta);
self.right_state.merge(&right_delta);
self.current_state.merge(&result);
// Rebuild indices if needed
self.left_index = Self::build_index(
&self.left_state,
&self.left_column_names,
&self.left_on_column,
);
self.right_index = Self::build_index(
&self.right_state,
&self.right_column_names,
&self.right_on_column,
);
result
}
pub fn get_current_state(&self) -> &Delta {
&self.current_state
}
/// Process a delta from the left table only
pub fn process_left_delta(&mut self, left_delta: Delta) -> Delta {
let empty_delta = Delta::new();
self.process_both_deltas(left_delta, empty_delta)
}
/// Process a delta from the right table only
pub fn process_right_delta(&mut self, right_delta: Delta) -> Delta {
let empty_delta = Delta::new();
self.process_both_deltas(empty_delta, right_delta)
}
}
/// Aggregate operator - performs incremental aggregation with GROUP BY
/// Maintains running totals/counts that are updated incrementally
#[derive(Debug, Clone)]
pub struct AggregateOperator {
// GROUP BY columns
group_by: Vec<String>,
// Aggregate functions to compute
aggregates: Vec<AggregateFunction>,
// Column names from input
pub input_column_names: Vec<String>,
// Aggregation state: group_key_str -> aggregate values
// For each group, we store the aggregate results
// We use String representation of group keys since Value doesn't implement Hash
group_states: HashMap<String, AggregateState>,
// Map to keep track of actual group key values for output
group_key_values: HashMap<String, Vec<Value>>,
// Current output state as a Delta
current_state: Delta,
tracker: Option<Arc<Mutex<ComputationTracker>>>,
}
/// State for a single group's aggregates
#[derive(Debug, Clone)]
struct AggregateState {
// For COUNT: just the count
count: i64,
// For SUM: column_name -> sum value
sums: HashMap<String, f64>,
// For AVG: column_name -> (sum, count) for computing average
avgs: HashMap<String, (f64, i64)>,
// For MIN: column_name -> min value
mins: HashMap<String, Value>,
// For MAX: column_name -> max value
maxs: HashMap<String, Value>,
}
impl AggregateState {
fn new() -> Self {
Self {
count: 0,
sums: HashMap::new(),
avgs: HashMap::new(),
mins: HashMap::new(),
maxs: HashMap::new(),
}
}
/// Apply a delta to this aggregate state
fn apply_delta(
&mut self,
values: &[Value],
weight: isize,
aggregates: &[AggregateFunction],
column_names: &[String],
) {
// Update COUNT
self.count += weight as i64;
// Update other aggregates
for agg in aggregates {
match agg {
AggregateFunction::Count => {
// Already handled above
}
AggregateFunction::Sum(col_name) => {
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
if let Some(val) = values.get(idx) {
let num_val = match val {
Value::Integer(i) => *i as f64,
Value::Float(f) => *f,
_ => 0.0,
};
*self.sums.entry(col_name.clone()).or_insert(0.0) +=
num_val * weight as f64;
}
}
}
AggregateFunction::Avg(col_name) => {
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
if let Some(val) = values.get(idx) {
let num_val = match val {
Value::Integer(i) => *i as f64,
Value::Float(f) => *f,
_ => 0.0,
};
let (sum, count) =
self.avgs.entry(col_name.clone()).or_insert((0.0, 0));
*sum += num_val * weight as f64;
*count += weight as i64;
}
}
}
AggregateFunction::Min(col_name) => {
// MIN/MAX are more complex for incremental updates
// For now, we'll need to recompute from the full state
// This is a limitation we can improve later
if weight > 0 {
// Only update on insert
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
if let Some(val) = values.get(idx) {
self.mins
.entry(col_name.clone())
.and_modify(|existing| {
if val < existing {
*existing = val.clone();
}
})
.or_insert_with(|| val.clone());
}
}
}
}
AggregateFunction::Max(col_name) => {
if weight > 0 {
// Only update on insert
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
if let Some(val) = values.get(idx) {
self.maxs
.entry(col_name.clone())
.and_modify(|existing| {
if val > existing {
*existing = val.clone();
}
})
.or_insert_with(|| val.clone());
}
}
}
}
}
}
}
/// Convert aggregate state to output values
fn to_values(&self, aggregates: &[AggregateFunction]) -> Vec<Value> {
let mut result = Vec::new();
for agg in aggregates {
match agg {
AggregateFunction::Count => {
result.push(Value::Integer(self.count));
}
AggregateFunction::Sum(col_name) => {
let sum = self.sums.get(col_name).copied().unwrap_or(0.0);
// Return as integer if it's a whole number, otherwise as float
if sum.fract() == 0.0 {
result.push(Value::Integer(sum as i64));
} else {
result.push(Value::Float(sum));
}
}
AggregateFunction::Avg(col_name) => {
if let Some((sum, count)) = self.avgs.get(col_name) {
if *count > 0 {
result.push(Value::Float(sum / *count as f64));
} else {
result.push(Value::Null);
}
} else {
result.push(Value::Null);
}
}
AggregateFunction::Min(col_name) => {
result.push(self.mins.get(col_name).cloned().unwrap_or(Value::Null));
}
AggregateFunction::Max(col_name) => {
result.push(self.maxs.get(col_name).cloned().unwrap_or(Value::Null));
}
}
}
result
}
}
impl AggregateOperator {
pub fn new(
group_by: Vec<String>,
aggregates: Vec<AggregateFunction>,
input_column_names: Vec<String>,
) -> Self {
Self {
group_by,
aggregates,
input_column_names,
group_states: HashMap::new(),
group_key_values: HashMap::new(),
current_state: Delta::new(),
tracker: None,
}
}
pub fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
self.tracker = Some(tracker);
}
/// Extract group key values from a row
fn extract_group_key(&self, values: &[Value]) -> Vec<Value> {
let mut key = Vec::new();
for group_col in &self.group_by {
if let Some(idx) = self.input_column_names.iter().position(|c| c == group_col) {
if let Some(val) = values.get(idx) {
key.push(val.clone());
} else {
key.push(Value::Null);
}
} else {
key.push(Value::Null);
}
}
key
}
/// Convert group key to string for indexing (since Value doesn't implement Hash)
fn group_key_to_string(key: &[Value]) -> String {
key.iter()
.map(|v| format!("{v:?}"))
.collect::<Vec<_>>()
.join(",")
}
/// Process a delta and update aggregate state incrementally
pub fn process_delta(&mut self, delta: Delta) -> Delta {
let mut output_delta = Delta::new();
// Track which groups were modified and their old values
let mut modified_groups = HashSet::new();
let mut old_values: HashMap<String, Vec<Value>> = HashMap::new();
// Process each change in the delta
for (row, weight) in &delta.changes {
if let Some(tracker) = &self.tracker {
tracker.lock().unwrap().record_aggregation();
}
// Extract group key
let group_key = self.extract_group_key(&row.values);
let group_key_str = Self::group_key_to_string(&group_key);
// Store old aggregate values BEFORE applying the delta
// (only for the first time we see this group in this batch)
if !modified_groups.contains(&group_key_str) {
if let Some(state) = self.group_states.get(&group_key_str) {
let mut old_row = group_key.clone();
old_row.extend(state.to_values(&self.aggregates));
old_values.insert(group_key_str.clone(), old_row);
}
}
modified_groups.insert(group_key_str.clone());
// Store the actual group key values
self.group_key_values
.insert(group_key_str.clone(), group_key.clone());
// Get or create aggregate state for this group
let state = self
.group_states
.entry(group_key_str.clone())
.or_insert_with(AggregateState::new);
// Apply the delta to the aggregate state
state.apply_delta(
&row.values,
*weight,
&self.aggregates,
&self.input_column_names,
);
}
// Generate output delta for modified groups
for group_key_str in modified_groups {
// Get the actual group key values
let group_key = self
.group_key_values
.get(&group_key_str)
.cloned()
.unwrap_or_default();
// Generate a unique key for this group
// We use a hash of the group key to ensure consistency
let result_key = group_key_str
.bytes()
.fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64));
// Emit retraction for old value if it existed
if let Some(old_row_values) = old_values.get(&group_key_str) {
let old_row = HashableRow::new(result_key, old_row_values.clone());
output_delta.changes.push((old_row.clone(), -1));
// Also remove from current state
self.current_state.changes.push((old_row, -1));
}
if let Some(state) = self.group_states.get(&group_key_str) {
// Build output row: group_by columns + aggregate values
let mut output_values = group_key.clone();
output_values.extend(state.to_values(&self.aggregates));
// Check if group should be removed (count is 0)
if state.count > 0 {
// Add to output delta with positive weight
let output_row = HashableRow::new(result_key, output_values.clone());
output_delta.changes.push((output_row.clone(), 1));
// Update current state
self.current_state.changes.push((output_row, 1));
} else {
// Group has count=0, remove from state
// (we already emitted the retraction above if needed)
self.group_states.remove(&group_key_str);
self.group_key_values.remove(&group_key_str);
}
}
}
// Consolidate current state to handle removals
self.current_state.consolidate();
output_delta
}
pub fn get_current_state(&self) -> &Delta {
&self.current_state
}
}
impl IncrementalOperator for AggregateOperator {
fn initialize(&mut self, data: Delta) {
// Process all initial data
self.process_delta(data);
}
fn process_delta(&mut self, delta: Delta) -> Delta {
self.process_delta(delta)
}
fn get_current_state(&self) -> Delta {
// Return a consolidated view of the current state
let mut consolidated = self.current_state.clone();
consolidated.consolidate();
consolidated
}
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
self.tracker = Some(tracker);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::Text;
use crate::Value;
use std::sync::{Arc, Mutex};
/// Assert that we're doing incremental work, not full recomputation
fn assert_incremental(tracker: &ComputationTracker, expected_ops: usize, data_size: usize) {
assert!(
tracker.total_computations() <= expected_ops,
"Expected <= {} operations for incremental update, got {}",
expected_ops,
tracker.total_computations()
);
assert!(
tracker.total_computations() < data_size,
"Computation count {} suggests full recomputation (data size: {})",
tracker.total_computations(),
data_size
);
assert_eq!(
tracker.full_scans, 0,
"Incremental computation should not perform full scans"
);
}
// Aggregate tests
#[test]
fn test_aggregate_incremental_update_emits_retraction() {
// This test verifies that when an aggregate value changes,
// the operator emits both a retraction (-1) of the old value
// and an insertion (+1) of the new value.
// Create an aggregate operator for SUM(age) with no GROUP BY
let mut agg = AggregateOperator::new(
vec![], // No GROUP BY
vec![AggregateFunction::Sum("age".to_string())],
vec!["id".to_string(), "name".to_string(), "age".to_string()],
);
// Initial data: 3 users
let mut initial_delta = Delta::new();
initial_delta.insert(
1,
vec![
Value::Integer(1),
Value::Text("Alice".to_string().into()),
Value::Integer(25),
],
);
initial_delta.insert(
2,
vec![
Value::Integer(2),
Value::Text("Bob".to_string().into()),
Value::Integer(30),
],
);
initial_delta.insert(
3,
vec![
Value::Integer(3),
Value::Text("Charlie".to_string().into()),
Value::Integer(35),
],
);
// Initialize with initial data
agg.initialize(initial_delta);
// Verify initial state: SUM(age) = 25 + 30 + 35 = 90
let state = agg.get_current_state();
assert_eq!(state.changes.len(), 1, "Should have one aggregate row");
let (row, weight) = &state.changes[0];
assert_eq!(*weight, 1, "Aggregate row should have weight 1");
assert_eq!(row.values[0], Value::Float(90.0), "SUM should be 90");
// Now add a new user (incremental update)
let mut update_delta = Delta::new();
update_delta.insert(
4,
vec![
Value::Integer(4),
Value::Text("David".to_string().into()),
Value::Integer(40),
],
);
// Process the incremental update
let output_delta = agg.process_delta(update_delta);
// CRITICAL: The output delta should contain TWO changes:
// 1. Retraction of old aggregate value (90) with weight -1
// 2. Insertion of new aggregate value (130) with weight +1
assert_eq!(
output_delta.changes.len(),
2,
"Expected 2 changes (retraction + insertion), got {}: {:?}",
output_delta.changes.len(),
output_delta.changes
);
// Verify the retraction comes first
let (retraction_row, retraction_weight) = &output_delta.changes[0];
assert_eq!(
*retraction_weight, -1,
"First change should be a retraction"
);
assert_eq!(
retraction_row.values[0],
Value::Float(90.0),
"Retracted value should be the old sum (90)"
);
// Verify the insertion comes second
let (insertion_row, insertion_weight) = &output_delta.changes[1];
assert_eq!(*insertion_weight, 1, "Second change should be an insertion");
assert_eq!(
insertion_row.values[0],
Value::Float(130.0),
"Inserted value should be the new sum (130)"
);
// Both changes should have the same row ID (since it's the same aggregate group)
assert_eq!(
retraction_row.rowid, insertion_row.rowid,
"Retraction and insertion should have the same row ID"
);
}
#[test]
fn test_aggregate_with_group_by_emits_retractions() {
// This test verifies that when aggregate values change for grouped data,
// the operator emits both retractions and insertions correctly for each group.
// Create an aggregate operator for SUM(score) GROUP BY team
let mut agg = AggregateOperator::new(
vec!["team".to_string()], // GROUP BY team
vec![AggregateFunction::Sum("score".to_string())],
vec![
"id".to_string(),
"team".to_string(),
"player".to_string(),
"score".to_string(),
],
);
// Initial data: players on different teams
let mut initial_delta = Delta::new();
initial_delta.insert(
1,
vec![
Value::Integer(1),
Value::Text("red".to_string().into()),
Value::Text("Alice".to_string().into()),
Value::Integer(10),
],
);
initial_delta.insert(
2,
vec![
Value::Integer(2),
Value::Text("blue".to_string().into()),
Value::Text("Bob".to_string().into()),
Value::Integer(15),
],
);
initial_delta.insert(
3,
vec![
Value::Integer(3),
Value::Text("red".to_string().into()),
Value::Text("Charlie".to_string().into()),
Value::Integer(20),
],
);
// Initialize with initial data
agg.initialize(initial_delta);
// Verify initial state: red team = 30, blue team = 15
let state = agg.get_current_state();
assert_eq!(state.changes.len(), 2, "Should have two groups");
// Find the red and blue team aggregates
let mut red_sum = None;
let mut blue_sum = None;
for (row, weight) in &state.changes {
assert_eq!(*weight, 1);
if let Value::Text(team) = &row.values[0] {
if team.as_str() == "red" {
red_sum = Some(&row.values[1]);
} else if team.as_str() == "blue" {
blue_sum = Some(&row.values[1]);
}
}
}
assert_eq!(
red_sum,
Some(&Value::Float(30.0)),
"Red team sum should be 30"
);
assert_eq!(
blue_sum,
Some(&Value::Float(15.0)),
"Blue team sum should be 15"
);
// Now add a new player to the red team (incremental update)
let mut update_delta = Delta::new();
update_delta.insert(
4,
vec![
Value::Integer(4),
Value::Text("red".to_string().into()),
Value::Text("David".to_string().into()),
Value::Integer(25),
],
);
// Process the incremental update
let output_delta = agg.process_delta(update_delta);
// Should have 2 changes: retraction of old red team sum, insertion of new red team sum
// Blue team should NOT be affected
assert_eq!(
output_delta.changes.len(),
2,
"Expected 2 changes for red team only, got {}: {:?}",
output_delta.changes.len(),
output_delta.changes
);
// Both changes should be for the red team
let mut found_retraction = false;
let mut found_insertion = false;
for (row, weight) in &output_delta.changes {
if let Value::Text(team) = &row.values[0] {
assert_eq!(team.as_str(), "red", "Only red team should have changes");
if *weight == -1 {
// Retraction of old value
assert_eq!(
row.values[1],
Value::Float(30.0),
"Should retract old sum of 30"
);
found_retraction = true;
} else if *weight == 1 {
// Insertion of new value
assert_eq!(
row.values[1],
Value::Float(55.0),
"Should insert new sum of 55"
);
found_insertion = true;
}
}
}
assert!(found_retraction, "Should have found retraction");
assert!(found_insertion, "Should have found insertion");
}
// Join tests
#[test]
fn test_join_uses_delta_formula() {
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
// Create join operator
let mut join = JoinOperator::new(
JoinType::Inner,
"user_id".to_string(),
"user_id".to_string(),
vec!["user_id".to_string(), "email".to_string()],
vec![
"login_id".to_string(),
"user_id".to_string(),
"timestamp".to_string(),
],
);
join.set_tracker(tracker.clone());
// Initial data: emails table
let mut emails = Delta::new();
emails.insert(
1,
vec![
Value::Integer(1),
Value::Text(Text::new("alice@example.com")),
],
);
emails.insert(
2,
vec![Value::Integer(2), Value::Text(Text::new("bob@example.com"))],
);
// Initial data: logins table
let mut logins = Delta::new();
logins.insert(
1,
vec![Value::Integer(1), Value::Integer(1), Value::Integer(1000)],
);
logins.insert(
2,
vec![Value::Integer(2), Value::Integer(1), Value::Integer(2000)],
);
// Initialize join
join.initialize_both(emails.clone(), logins.clone());
// Reset tracker for delta processing
tracker.lock().unwrap().join_lookups = 0;
// Add one login for bob (user_id=2)
let mut delta_logins = Delta::new();
delta_logins.insert(
3,
vec![Value::Integer(3), Value::Integer(2), Value::Integer(3000)],
);
// Process delta - should use incremental formula
let empty_delta = Delta::new();
let output = join.process_both_deltas(empty_delta, delta_logins);
// Should have one join result (bob's new login)
assert_eq!(output.len(), 1);
// Verify we used index lookups, not nested loops
// Should have done 1 lookup (finding bob's email for the new login)
let lookups = tracker.lock().unwrap().join_lookups;
assert_eq!(lookups, 1, "Should use index lookup, not scan all emails");
// Verify incremental behavior - we processed only the delta
let t = tracker.lock().unwrap();
assert_incremental(&t, 1, 3); // 1 operation for 3 total rows
}
#[test]
fn test_join_maintains_index() {
// Create join operator
let mut join = JoinOperator::new(
JoinType::Inner,
"id".to_string(),
"ref_id".to_string(),
vec!["id".to_string(), "name".to_string()],
vec!["ref_id".to_string(), "value".to_string()],
);
// Initial data
let mut left = Delta::new();
left.insert(1, vec![Value::Integer(1), Value::Text(Text::new("A"))]);
left.insert(2, vec![Value::Integer(2), Value::Text(Text::new("B"))]);
let mut right = Delta::new();
right.insert(1, vec![Value::Integer(1), Value::Integer(100)]);
// Initialize - should build index
join.initialize_both(left.clone(), right.clone());
// Verify initial join worked
let state = join.get_current_state();
assert_eq!(state.changes.len(), 1); // One match: id=1
// Add new item to left
let mut delta_left = Delta::new();
delta_left.insert(3, vec![Value::Integer(3), Value::Text(Text::new("C"))]);
// Add matching item to right
let mut delta_right = Delta::new();
delta_right.insert(2, vec![Value::Integer(3), Value::Integer(300)]);
// Process deltas
let output = join.process_both_deltas(delta_left, delta_right);
// Should have new join result
assert_eq!(output.len(), 1);
// Verify the join result has the expected values
assert!(!output.changes.is_empty());
let (result, _weight) = &output.changes[0];
assert_eq!(result.values.len(), 4); // id, name, ref_id, value
}
#[test]
fn test_join_formula_correctness() {
// Test the DBSP formula: ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
let mut join = JoinOperator::new(
JoinType::Inner,
"x".to_string(),
"x".to_string(),
vec!["x".to_string(), "a".to_string()],
vec!["x".to_string(), "b".to_string()],
);
join.set_tracker(tracker.clone());
// Initial state A
let mut a = Delta::new();
a.insert(1, vec![Value::Integer(1), Value::Text(Text::new("a1"))]);
a.insert(2, vec![Value::Integer(2), Value::Text(Text::new("a2"))]);
// Initial state B
let mut b = Delta::new();
b.insert(1, vec![Value::Integer(1), Value::Text(Text::new("b1"))]);
b.insert(2, vec![Value::Integer(2), Value::Text(Text::new("b2"))]);
join.initialize_both(a.clone(), b.clone());
// Reset tracker
tracker.lock().unwrap().join_lookups = 0;
// Delta for A (add x=3)
let mut delta_a = Delta::new();
delta_a.insert(3, vec![Value::Integer(3), Value::Text(Text::new("a3"))]);
// Delta for B (add x=3 and x=1)
let mut delta_b = Delta::new();
delta_b.insert(3, vec![Value::Integer(3), Value::Text(Text::new("b3"))]);
delta_b.insert(4, vec![Value::Integer(1), Value::Text(Text::new("b1_new"))]);
let output = join.process_both_deltas(delta_a, delta_b);
// Expected results:
// A ⋈ ∂B: (1,a1) ⋈ (1,b1_new) = 1 result
// ∂A ⋈ B: (3,a3) ⋈ nothing = 0 results
// ∂A ⋈ ∂B: (3,a3) ⋈ (3,b3) = 1 result
// Total: 2 results
assert_eq!(output.len(), 2);
// Verify we're doing incremental work
let lookups = tracker.lock().unwrap().join_lookups;
assert!(lookups <= 4, "Should use efficient index lookups");
}
// Aggregation tests
#[test]
fn test_count_increments_not_recounts() {
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
// Create COUNT(*) GROUP BY category
let mut agg = AggregateOperator::new(
vec!["category".to_string()],
vec![AggregateFunction::Count],
vec![
"item_id".to_string(),
"category".to_string(),
"price".to_string(),
],
);
agg.set_tracker(tracker.clone());
// Initial: 100 items in 10 categories (10 items each)
let mut initial = Delta::new();
for i in 0..100 {
let category = format!("cat_{}", i / 10);
initial.insert(
i,
vec![
Value::Integer(i),
Value::Text(Text::new(&category)),
Value::Integer(i * 10),
],
);
}
agg.initialize(initial);
// Reset tracker for delta processing
tracker.lock().unwrap().aggregation_updates = 0;
// Add one item to category 'cat_0'
let mut delta = Delta::new();
delta.insert(
100,
vec![
Value::Integer(100),
Value::Text(Text::new("cat_0")),
Value::Integer(1000),
],
);
let output = agg.process_delta(delta);
// Should only update one group (cat_0), not recount all groups
assert_eq!(tracker.lock().unwrap().aggregation_updates, 1);
// Output should show cat_0 now has count 11
assert_eq!(output.len(), 1);
assert!(!output.changes.is_empty());
let (change_row, _weight) = &output.changes[0];
assert_eq!(change_row.values[0], Value::Text(Text::new("cat_0")));
assert_eq!(change_row.values[1], Value::Integer(11));
// Verify incremental behavior
let t = tracker.lock().unwrap();
assert_incremental(&t, 1, 101);
}
#[test]
fn test_sum_updates_incrementally() {
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
// Create SUM(amount) GROUP BY product
let mut agg = AggregateOperator::new(
vec!["product".to_string()],
vec![AggregateFunction::Sum("amount".to_string())],
vec![
"sale_id".to_string(),
"product".to_string(),
"amount".to_string(),
],
);
agg.set_tracker(tracker.clone());
// Initial sales
let mut initial = Delta::new();
initial.insert(
1,
vec![
Value::Integer(1),
Value::Text(Text::new("Widget")),
Value::Integer(100),
],
);
initial.insert(
2,
vec![
Value::Integer(2),
Value::Text(Text::new("Gadget")),
Value::Integer(200),
],
);
initial.insert(
3,
vec![
Value::Integer(3),
Value::Text(Text::new("Widget")),
Value::Integer(150),
],
);
agg.initialize(initial);
// Check initial state: Widget=250, Gadget=200
let state = agg.get_current_state();
let widget_sum = state
.changes
.iter()
.find(|(c, _)| c.values[0] == Value::Text(Text::new("Widget")))
.map(|(c, _)| c)
.unwrap();
assert_eq!(widget_sum.values[1], Value::Integer(250));
// Reset tracker
tracker.lock().unwrap().aggregation_updates = 0;
// Add sale of 50 for Widget
let mut delta = Delta::new();
delta.insert(
4,
vec![
Value::Integer(4),
Value::Text(Text::new("Widget")),
Value::Integer(50),
],
);
let output = agg.process_delta(delta);
// Should only update Widget group
assert_eq!(tracker.lock().unwrap().aggregation_updates, 1);
assert_eq!(output.len(), 1);
// Widget should now be 300 (250 + 50)
assert!(!output.changes.is_empty());
let (change, _weight) = &output.changes[0];
assert_eq!(change.values[0], Value::Text(Text::new("Widget")));
assert_eq!(change.values[1], Value::Integer(300));
}
#[test]
fn test_count_and_sum_together() {
// Test the example from DBSP_ROADMAP: COUNT(*) and SUM(amount) GROUP BY user_id
let mut agg = AggregateOperator::new(
vec!["user_id".to_string()],
vec![
AggregateFunction::Count,
AggregateFunction::Sum("amount".to_string()),
],
vec![
"order_id".to_string(),
"user_id".to_string(),
"amount".to_string(),
],
);
// Initial orders
let mut initial = Delta::new();
initial.insert(
1,
vec![Value::Integer(1), Value::Integer(1), Value::Integer(100)],
);
initial.insert(
2,
vec![Value::Integer(2), Value::Integer(1), Value::Integer(200)],
);
initial.insert(
3,
vec![Value::Integer(3), Value::Integer(2), Value::Integer(150)],
);
agg.initialize(initial);
// Check initial state
// User 1: count=2, sum=300
// User 2: count=1, sum=150
let state = agg.get_current_state();
assert_eq!(state.changes.len(), 2);
let user1 = state
.changes
.iter()
.find(|(c, _)| c.values[0] == Value::Integer(1))
.map(|(c, _)| c)
.unwrap();
assert_eq!(user1.values[1], Value::Integer(2)); // count
assert_eq!(user1.values[2], Value::Integer(300)); // sum
let user2 = state
.changes
.iter()
.find(|(c, _)| c.values[0] == Value::Integer(2))
.map(|(c, _)| c)
.unwrap();
assert_eq!(user2.values[1], Value::Integer(1)); // count
assert_eq!(user2.values[2], Value::Integer(150)); // sum
// Add order for user 1
let mut delta = Delta::new();
delta.insert(
4,
vec![Value::Integer(4), Value::Integer(1), Value::Integer(50)],
);
let output = agg.process_delta(delta);
// Should only update user 1
assert_eq!(output.len(), 1);
assert!(!output.changes.is_empty());
let (change, _weight) = &output.changes[0];
assert_eq!(change.values[0], Value::Integer(1)); // user_id
assert_eq!(change.values[1], Value::Integer(3)); // count: 2 + 1
assert_eq!(change.values[2], Value::Integer(350)); // sum: 300 + 50
}
#[test]
fn test_avg_maintains_sum_and_count() {
// Test AVG aggregation
let mut agg = AggregateOperator::new(
vec!["category".to_string()],
vec![AggregateFunction::Avg("value".to_string())],
vec![
"id".to_string(),
"category".to_string(),
"value".to_string(),
],
);
// Initial data
let mut initial = Delta::new();
initial.insert(
1,
vec![
Value::Integer(1),
Value::Text(Text::new("A")),
Value::Integer(10),
],
);
initial.insert(
2,
vec![
Value::Integer(2),
Value::Text(Text::new("A")),
Value::Integer(20),
],
);
initial.insert(
3,
vec![
Value::Integer(3),
Value::Text(Text::new("B")),
Value::Integer(30),
],
);
agg.initialize(initial);
// Check initial averages
// Category A: avg = (10 + 20) / 2 = 15
// Category B: avg = 30 / 1 = 30
let state = agg.get_current_state();
let cat_a = state
.changes
.iter()
.find(|(c, _)| c.values[0] == Value::Text(Text::new("A")))
.map(|(c, _)| c)
.unwrap();
assert_eq!(cat_a.values[1], Value::Float(15.0));
let cat_b = state
.changes
.iter()
.find(|(c, _)| c.values[0] == Value::Text(Text::new("B")))
.map(|(c, _)| c)
.unwrap();
assert_eq!(cat_b.values[1], Value::Float(30.0));
// Add value to category A
let mut delta = Delta::new();
delta.insert(
4,
vec![
Value::Integer(4),
Value::Text(Text::new("A")),
Value::Integer(30),
],
);
let output = agg.process_delta(delta);
// Category A avg should now be (10 + 20 + 30) / 3 = 20
assert!(!output.changes.is_empty());
let (change, _weight) = &output.changes[0];
assert_eq!(change.values[0], Value::Text(Text::new("A")));
assert_eq!(change.values[1], Value::Float(20.0));
}
#[test]
fn test_delete_updates_aggregates() {
// Test that deletes (negative weights) properly update aggregates
let mut agg = AggregateOperator::new(
vec!["category".to_string()],
vec![
AggregateFunction::Count,
AggregateFunction::Sum("value".to_string()),
],
vec![
"id".to_string(),
"category".to_string(),
"value".to_string(),
],
);
// Initial data
let mut initial = Delta::new();
initial.insert(
1,
vec![
Value::Integer(1),
Value::Text(Text::new("A")),
Value::Integer(100),
],
);
initial.insert(
2,
vec![
Value::Integer(2),
Value::Text(Text::new("A")),
Value::Integer(200),
],
);
agg.initialize(initial);
// Check initial state: count=2, sum=300
let state = agg.get_current_state();
assert!(!state.changes.is_empty());
let (row, _weight) = &state.changes[0];
assert_eq!(row.values[1], Value::Integer(2)); // count
assert_eq!(row.values[2], Value::Integer(300)); // sum
// Delete one row
let mut delta = Delta::new();
delta.delete(
1,
vec![
Value::Integer(1),
Value::Text(Text::new("A")),
Value::Integer(100),
],
);
let output = agg.process_delta(delta);
// Should update to count=1, sum=200
assert!(!output.changes.is_empty());
let (change_row, _weight) = &output.changes[0];
assert_eq!(change_row.values[0], Value::Text(Text::new("A")));
assert_eq!(change_row.values[1], Value::Integer(1)); // count: 2 - 1
assert_eq!(change_row.values[2], Value::Integer(200)); // sum: 300 - 100
}
#[test]
fn test_filter_operator_rowid_update() {
// When a row's rowid changes (e.g., UPDATE t SET a=1 WHERE a=3 on INTEGER PRIMARY KEY),
// the operator should properly consolidate the state
let mut filter = FilterOperator::new(
FilterPredicate::GreaterThan {
column: "b".to_string(),
value: Value::Integer(2),
},
vec!["a".to_string(), "b".to_string()],
);
// Initialize with a row (rowid=3, values=[3, 3])
let mut init_data = Delta::new();
init_data.insert(3, vec![Value::Integer(3), Value::Integer(3)]);
filter.initialize(init_data);
// Check initial state
let state = filter.get_current_state();
assert_eq!(state.changes.len(), 1);
assert_eq!(state.changes[0].0.rowid, 3);
assert_eq!(
state.changes[0].0.values,
vec![Value::Integer(3), Value::Integer(3)]
);
// Simulate an UPDATE that changes rowid from 3 to 1
// This is sent as: delete(3) + insert(1)
let mut update_delta = Delta::new();
update_delta.delete(3, vec![Value::Integer(3), Value::Integer(3)]);
update_delta.insert(1, vec![Value::Integer(1), Value::Integer(3)]);
let output = filter.process_delta(update_delta);
// The output delta should have both changes (both pass the filter b > 2)
assert_eq!(output.changes.len(), 2);
assert_eq!(output.changes[0].1, -1); // delete weight
assert_eq!(output.changes[1].1, 1); // insert weight
// The current state should be consolidated to only have rows with positive weight
let final_state = filter.get_current_state();
// After consolidation, we should have only one row with rowid=1
assert_eq!(
final_state.changes.len(),
1,
"State should be consolidated to have only one row"
);
assert_eq!(final_state.changes[0].0.rowid, 1);
assert_eq!(
final_state.changes[0].0.values,
vec![Value::Integer(1), Value::Integer(3)]
);
assert_eq!(final_state.changes[0].1, 1); // positive weight
}
}