mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-18 09:04:19 +01:00
The operator itself should handle deletions and updates that change the rowid by consolidating its state. Our current materialized views track state themselves, so we don't see this problem now. But it becomes apparent once we switch the views to use circuits.
2460 lines
85 KiB
Rust
2460 lines
85 KiB
Rust
#![allow(dead_code)]
|
|
// Operator DAG for DBSP-style incremental computation
|
|
// Based on Feldera DBSP design but adapted for Turso's architecture
|
|
|
|
use crate::incremental::expr_compiler::CompiledExpression;
|
|
use crate::incremental::hashable_row::HashableRow;
|
|
use crate::types::Text;
|
|
use crate::{Connection, Database, SymbolTable, Value};
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::fmt::{self, Debug, Display};
|
|
use std::sync::Arc;
|
|
use std::sync::Mutex;
|
|
|
|
/// Tracks computation counts to verify incremental behavior (for tests now), and in the future
|
|
/// should be used to provide statistics.
|
|
#[derive(Debug, Default, Clone)]
|
|
pub struct ComputationTracker {
|
|
pub filter_evaluations: usize,
|
|
pub project_operations: usize,
|
|
pub join_lookups: usize,
|
|
pub aggregation_updates: usize,
|
|
pub full_scans: usize,
|
|
}
|
|
|
|
impl ComputationTracker {
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
pub fn record_filter(&mut self) {
|
|
self.filter_evaluations += 1;
|
|
}
|
|
|
|
pub fn record_project(&mut self) {
|
|
self.project_operations += 1;
|
|
}
|
|
|
|
pub fn record_join_lookup(&mut self) {
|
|
self.join_lookups += 1;
|
|
}
|
|
|
|
pub fn record_aggregation(&mut self) {
|
|
self.aggregation_updates += 1;
|
|
}
|
|
|
|
pub fn record_full_scan(&mut self) {
|
|
self.full_scans += 1;
|
|
}
|
|
|
|
pub fn total_computations(&self) -> usize {
|
|
self.filter_evaluations
|
|
+ self.project_operations
|
|
+ self.join_lookups
|
|
+ self.aggregation_updates
|
|
}
|
|
}
|
|
|
|
/// A delta represents ordered changes to data
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct Delta {
|
|
/// Ordered list of changes: (row, weight) where weight is +1 for insert, -1 for delete
|
|
/// It is crucial that this is ordered. Imagine the case of an update, which becomes a delete +
|
|
/// insert. If this is not ordered, it would be applied in arbitrary order and break the view.
|
|
pub changes: Vec<(HashableRow, isize)>,
|
|
}
|
|
|
|
impl Delta {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
changes: Vec::new(),
|
|
}
|
|
}
|
|
|
|
pub fn insert(&mut self, row_key: i64, values: Vec<Value>) {
|
|
let row = HashableRow::new(row_key, values);
|
|
self.changes.push((row, 1));
|
|
}
|
|
|
|
pub fn delete(&mut self, row_key: i64, values: Vec<Value>) {
|
|
let row = HashableRow::new(row_key, values);
|
|
self.changes.push((row, -1));
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.changes.is_empty()
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.changes.len()
|
|
}
|
|
|
|
/// Merge another delta into this one
|
|
/// This preserves the order of operations - no consolidation is done
|
|
/// to maintain the full history of changes
|
|
pub fn merge(&mut self, other: &Delta) {
|
|
// Simply append all changes from other, preserving order
|
|
self.changes.extend(other.changes.iter().cloned());
|
|
}
|
|
|
|
/// Consolidate changes by combining entries with the same HashableRow
|
|
pub fn consolidate(&mut self) {
|
|
if self.changes.is_empty() {
|
|
return;
|
|
}
|
|
|
|
// Use a HashMap to accumulate weights
|
|
let mut consolidated: HashMap<HashableRow, isize> = HashMap::new();
|
|
|
|
for (row, weight) in self.changes.drain(..) {
|
|
*consolidated.entry(row).or_insert(0) += weight;
|
|
}
|
|
|
|
// Convert back to vec, filtering out zero weights
|
|
self.changes = consolidated
|
|
.into_iter()
|
|
.filter(|(_, weight)| *weight != 0)
|
|
.collect();
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod hashable_row_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_hashable_row_delta_operations() {
|
|
let mut delta = Delta::new();
|
|
|
|
// Test INSERT
|
|
delta.insert(1, vec![Value::Integer(1), Value::Integer(100)]);
|
|
assert_eq!(delta.len(), 1);
|
|
|
|
// Test UPDATE (DELETE + INSERT) - order matters!
|
|
delta.delete(1, vec![Value::Integer(1), Value::Integer(100)]);
|
|
delta.insert(1, vec![Value::Integer(1), Value::Integer(200)]);
|
|
assert_eq!(delta.len(), 3); // Should have 3 operations before consolidation
|
|
|
|
// Verify order is preserved
|
|
let ops: Vec<_> = delta.changes.iter().collect();
|
|
assert_eq!(ops[0].1, 1); // First insert
|
|
assert_eq!(ops[1].1, -1); // Delete
|
|
assert_eq!(ops[2].1, 1); // Second insert
|
|
|
|
// Test consolidation
|
|
delta.consolidate();
|
|
// After consolidation, the first insert and delete should cancel out
|
|
// leaving only the second insert
|
|
assert_eq!(delta.len(), 1);
|
|
|
|
let final_row = &delta.changes[0];
|
|
assert_eq!(final_row.0.rowid, 1);
|
|
assert_eq!(
|
|
final_row.0.values,
|
|
vec![Value::Integer(1), Value::Integer(200)]
|
|
);
|
|
assert_eq!(final_row.1, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_duplicate_row_consolidation() {
|
|
let mut delta = Delta::new();
|
|
|
|
// Insert same row twice
|
|
delta.insert(2, vec![Value::Integer(2), Value::Integer(300)]);
|
|
delta.insert(2, vec![Value::Integer(2), Value::Integer(300)]);
|
|
|
|
assert_eq!(delta.len(), 2);
|
|
|
|
delta.consolidate();
|
|
assert_eq!(delta.len(), 1);
|
|
|
|
// Weight should be 2 (sum of both inserts)
|
|
let final_row = &delta.changes[0];
|
|
assert_eq!(final_row.0.rowid, 2);
|
|
assert_eq!(final_row.1, 2);
|
|
}
|
|
}
|
|
|
|
/// Represents an operator in the dataflow graph
|
|
#[derive(Debug, Clone)]
|
|
pub enum QueryOperator {
|
|
/// Table scan - source of data
|
|
TableScan {
|
|
table_name: String,
|
|
column_names: Vec<String>,
|
|
},
|
|
|
|
/// Filter rows based on predicate
|
|
Filter {
|
|
predicate: FilterPredicate,
|
|
input: usize, // Index of input operator
|
|
},
|
|
|
|
/// Project columns (select specific columns)
|
|
Project {
|
|
columns: Vec<ProjectColumn>,
|
|
input: usize,
|
|
},
|
|
|
|
/// Join two inputs
|
|
Join {
|
|
join_type: JoinType,
|
|
on_column: String,
|
|
left_input: usize,
|
|
right_input: usize,
|
|
},
|
|
|
|
/// Aggregate
|
|
Aggregate {
|
|
group_by: Vec<String>,
|
|
aggregates: Vec<AggregateFunction>,
|
|
input: usize,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum FilterPredicate {
|
|
/// Column = value
|
|
Equals { column: String, value: Value },
|
|
/// Column != value
|
|
NotEquals { column: String, value: Value },
|
|
/// Column > value
|
|
GreaterThan { column: String, value: Value },
|
|
/// Column >= value
|
|
GreaterThanOrEqual { column: String, value: Value },
|
|
/// Column < value
|
|
LessThan { column: String, value: Value },
|
|
/// Column <= value
|
|
LessThanOrEqual { column: String, value: Value },
|
|
/// Logical AND of two predicates
|
|
And(Box<FilterPredicate>, Box<FilterPredicate>),
|
|
/// Logical OR of two predicates
|
|
Or(Box<FilterPredicate>, Box<FilterPredicate>),
|
|
/// No predicate (accept all rows)
|
|
None,
|
|
}
|
|
|
|
impl FilterPredicate {
|
|
/// Parse a SQL AST expression into a FilterPredicate
|
|
/// This centralizes all SQL-to-predicate parsing logic
|
|
pub fn from_sql_expr(expr: &turso_parser::ast::Expr) -> crate::Result<Self> {
|
|
use turso_parser::ast::*;
|
|
|
|
let Expr::Binary(lhs, op, rhs) = expr else {
|
|
return Err(crate::LimboError::ParseError(
|
|
"Unsupported WHERE clause for incremental views: not a binary expression"
|
|
.to_string(),
|
|
));
|
|
};
|
|
|
|
// Handle AND/OR logical operators
|
|
match op {
|
|
Operator::And => {
|
|
let left = Self::from_sql_expr(lhs)?;
|
|
let right = Self::from_sql_expr(rhs)?;
|
|
return Ok(FilterPredicate::And(Box::new(left), Box::new(right)));
|
|
}
|
|
Operator::Or => {
|
|
let left = Self::from_sql_expr(lhs)?;
|
|
let right = Self::from_sql_expr(rhs)?;
|
|
return Ok(FilterPredicate::Or(Box::new(left), Box::new(right)));
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
// Handle comparison operators
|
|
let Expr::Id(column_name) = &**lhs else {
|
|
return Err(crate::LimboError::ParseError(
|
|
"Unsupported WHERE clause for incremental views: left-hand-side is not a column reference".to_string(),
|
|
));
|
|
};
|
|
|
|
let column = column_name.as_str().to_string();
|
|
|
|
// Parse the right-hand side value
|
|
let value = match &**rhs {
|
|
Expr::Literal(Literal::String(s)) => {
|
|
// Strip quotes from string literals
|
|
let cleaned = s.trim_matches('\'').trim_matches('"');
|
|
Value::Text(Text::new(cleaned))
|
|
}
|
|
Expr::Literal(Literal::Numeric(n)) => {
|
|
// Try to parse as integer first, then float
|
|
if let Ok(i) = n.parse::<i64>() {
|
|
Value::Integer(i)
|
|
} else if let Ok(f) = n.parse::<f64>() {
|
|
Value::Float(f)
|
|
} else {
|
|
return Err(crate::LimboError::ParseError(
|
|
"Unsupported WHERE clause for incremental views: right-hand-side is not a numeric literal".to_string(),
|
|
));
|
|
}
|
|
}
|
|
Expr::Literal(Literal::Null) => Value::Null,
|
|
Expr::Literal(Literal::Blob(_)) => {
|
|
// Blob comparison not yet supported
|
|
return Err(crate::LimboError::ParseError(
|
|
"Unsupported WHERE clause for incremental views: comparison with blob literals is not supported".to_string(),
|
|
));
|
|
}
|
|
other => {
|
|
// Complex expressions not yet supported
|
|
return Err(crate::LimboError::ParseError(
|
|
format!("Unsupported WHERE clause for incremental views: comparison with {other:?} is not supported"),
|
|
));
|
|
}
|
|
};
|
|
|
|
// Create the appropriate predicate based on operator
|
|
match op {
|
|
Operator::Equals => Ok(FilterPredicate::Equals { column, value }),
|
|
Operator::NotEquals => Ok(FilterPredicate::NotEquals { column, value }),
|
|
Operator::Greater => Ok(FilterPredicate::GreaterThan { column, value }),
|
|
Operator::GreaterEquals => Ok(FilterPredicate::GreaterThanOrEqual { column, value }),
|
|
Operator::Less => Ok(FilterPredicate::LessThan { column, value }),
|
|
Operator::LessEquals => Ok(FilterPredicate::LessThanOrEqual { column, value }),
|
|
other => Err(crate::LimboError::ParseError(
|
|
format!("Unsupported WHERE clause for incremental views: comparison operator {other:?} is not supported"),
|
|
)),
|
|
}
|
|
}
|
|
|
|
/// Parse a WHERE clause from a SELECT statement
|
|
pub fn from_select(select: &turso_parser::ast::Select) -> crate::Result<Self> {
|
|
use turso_parser::ast::*;
|
|
|
|
if let OneSelect::Select {
|
|
ref where_clause, ..
|
|
} = select.body.select
|
|
{
|
|
if let Some(where_clause) = where_clause {
|
|
Self::from_sql_expr(where_clause)
|
|
} else {
|
|
Ok(FilterPredicate::None)
|
|
}
|
|
} else {
|
|
Err(crate::LimboError::ParseError(
|
|
"Unsupported WHERE clause for incremental views: not a single SELECT statement"
|
|
.to_string(),
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ProjectColumn {
|
|
/// The original SQL expression (for debugging/fallback)
|
|
pub expr: turso_parser::ast::Expr,
|
|
/// Optional alias for the column
|
|
pub alias: Option<String>,
|
|
/// Compiled expression (handles both trivial columns and complex expressions)
|
|
pub compiled: CompiledExpression,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum JoinType {
|
|
Inner,
|
|
Left,
|
|
Right,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum AggregateFunction {
|
|
Count,
|
|
Sum(String),
|
|
Avg(String),
|
|
Min(String),
|
|
Max(String),
|
|
}
|
|
|
|
impl Display for AggregateFunction {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
AggregateFunction::Count => write!(f, "COUNT(*)"),
|
|
AggregateFunction::Sum(col) => write!(f, "SUM({col})"),
|
|
AggregateFunction::Avg(col) => write!(f, "AVG({col})"),
|
|
AggregateFunction::Min(col) => write!(f, "MIN({col})"),
|
|
AggregateFunction::Max(col) => write!(f, "MAX({col})"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl AggregateFunction {
|
|
/// Get the default output column name for this aggregate function
|
|
#[inline]
|
|
pub fn default_output_name(&self) -> String {
|
|
self.to_string()
|
|
}
|
|
|
|
/// Create an AggregateFunction from a SQL function and its arguments
|
|
/// Returns None if the function is not a supported aggregate
|
|
pub fn from_sql_function(
|
|
func: &crate::function::Func,
|
|
input_column: Option<String>,
|
|
) -> Option<Self> {
|
|
use crate::function::{AggFunc, Func};
|
|
|
|
match func {
|
|
Func::Agg(agg_func) => {
|
|
match agg_func {
|
|
AggFunc::Count | AggFunc::Count0 => Some(AggregateFunction::Count),
|
|
AggFunc::Sum => input_column.map(AggregateFunction::Sum),
|
|
AggFunc::Avg => input_column.map(AggregateFunction::Avg),
|
|
AggFunc::Min => input_column.map(AggregateFunction::Min),
|
|
AggFunc::Max => input_column.map(AggregateFunction::Max),
|
|
_ => None, // Other aggregate functions not yet supported in DBSP
|
|
}
|
|
}
|
|
_ => None, // Not an aggregate function
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Operator DAG (Directed Acyclic Graph)
|
|
/// Base trait for incremental operators
|
|
pub trait IncrementalOperator: Debug {
|
|
/// Initialize with base data
|
|
fn initialize(&mut self, data: Delta);
|
|
|
|
/// Process a delta (incremental update)
|
|
fn process_delta(&mut self, delta: Delta) -> Delta;
|
|
|
|
/// Get current accumulated state
|
|
fn get_current_state(&self) -> Delta;
|
|
|
|
/// Set computation tracker
|
|
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>);
|
|
}
|
|
|
|
/// Filter operator - filters rows based on predicate
|
|
#[derive(Debug)]
|
|
pub struct FilterOperator {
|
|
predicate: FilterPredicate,
|
|
current_state: Delta,
|
|
column_names: Vec<String>,
|
|
tracker: Option<Arc<Mutex<ComputationTracker>>>,
|
|
}
|
|
|
|
impl FilterOperator {
|
|
pub fn new(predicate: FilterPredicate, column_names: Vec<String>) -> Self {
|
|
Self {
|
|
predicate,
|
|
current_state: Delta::new(),
|
|
column_names,
|
|
tracker: None,
|
|
}
|
|
}
|
|
|
|
/// Get the predicate for this filter
|
|
pub fn predicate(&self) -> &FilterPredicate {
|
|
&self.predicate
|
|
}
|
|
|
|
pub fn evaluate_predicate(&self, values: &[Value]) -> bool {
|
|
match &self.predicate {
|
|
FilterPredicate::None => true,
|
|
FilterPredicate::Equals { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
return v == value;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::NotEquals { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
return v != value;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::GreaterThan { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
// Compare based on value types
|
|
match (v, value) {
|
|
(Value::Integer(a), Value::Integer(b)) => return a > b,
|
|
(Value::Float(a), Value::Float(b)) => return a > b,
|
|
(Value::Text(a), Value::Text(b)) => return a.as_str() > b.as_str(),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::GreaterThanOrEqual { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
match (v, value) {
|
|
(Value::Integer(a), Value::Integer(b)) => return a >= b,
|
|
(Value::Float(a), Value::Float(b)) => return a >= b,
|
|
(Value::Text(a), Value::Text(b)) => return a.as_str() >= b.as_str(),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::LessThan { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
match (v, value) {
|
|
(Value::Integer(a), Value::Integer(b)) => return a < b,
|
|
(Value::Float(a), Value::Float(b)) => return a < b,
|
|
(Value::Text(a), Value::Text(b)) => return a.as_str() < b.as_str(),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::LessThanOrEqual { column, value } => {
|
|
if let Some(idx) = self.column_names.iter().position(|c| c == column) {
|
|
if let Some(v) = values.get(idx) {
|
|
match (v, value) {
|
|
(Value::Integer(a), Value::Integer(b)) => return a <= b,
|
|
(Value::Float(a), Value::Float(b)) => return a <= b,
|
|
(Value::Text(a), Value::Text(b)) => return a.as_str() <= b.as_str(),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
FilterPredicate::And(left, right) => {
|
|
// Temporarily create sub-filters to evaluate
|
|
let left_filter = FilterOperator::new((**left).clone(), self.column_names.clone());
|
|
let right_filter =
|
|
FilterOperator::new((**right).clone(), self.column_names.clone());
|
|
left_filter.evaluate_predicate(values) && right_filter.evaluate_predicate(values)
|
|
}
|
|
FilterPredicate::Or(left, right) => {
|
|
let left_filter = FilterOperator::new((**left).clone(), self.column_names.clone());
|
|
let right_filter =
|
|
FilterOperator::new((**right).clone(), self.column_names.clone());
|
|
left_filter.evaluate_predicate(values) || right_filter.evaluate_predicate(values)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl IncrementalOperator for FilterOperator {
|
|
fn initialize(&mut self, data: Delta) {
|
|
// Process initial data through filter
|
|
for (row, weight) in data.changes {
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_filter();
|
|
}
|
|
|
|
if self.evaluate_predicate(&row.values) {
|
|
self.current_state.changes.push((row, weight));
|
|
}
|
|
}
|
|
}
|
|
|
|
fn process_delta(&mut self, delta: Delta) -> Delta {
|
|
let mut output_delta = Delta::new();
|
|
|
|
// Process only the delta, not the entire state
|
|
for (row, weight) in delta.changes {
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_filter();
|
|
}
|
|
|
|
if self.evaluate_predicate(&row.values) {
|
|
output_delta.changes.push((row.clone(), weight));
|
|
|
|
// Update our state
|
|
self.current_state.changes.push((row, weight));
|
|
}
|
|
}
|
|
|
|
output_delta
|
|
}
|
|
|
|
fn get_current_state(&self) -> Delta {
|
|
// Return a consolidated view of the current state
|
|
let mut consolidated = self.current_state.clone();
|
|
consolidated.consolidate();
|
|
consolidated
|
|
}
|
|
|
|
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
|
|
self.tracker = Some(tracker);
|
|
}
|
|
}
|
|
|
|
/// Project operator - selects/transforms columns
|
|
#[derive(Clone)]
|
|
pub struct ProjectOperator {
|
|
columns: Vec<ProjectColumn>,
|
|
input_column_names: Vec<String>,
|
|
output_column_names: Vec<String>,
|
|
current_state: Delta,
|
|
tracker: Option<Arc<Mutex<ComputationTracker>>>,
|
|
// Internal in-memory connection for expression evaluation
|
|
// Programs are very dependent on having a connection, so give it one.
|
|
//
|
|
// We could in theory pass the current connection, but there are a host of problems with that.
|
|
// For example: during a write transaction, where views are usually updated, we have autocommit
|
|
// on. When the program we are executing calls Halt, it will try to commit the current
|
|
// transaction, which is absolutely incorrect.
|
|
//
|
|
// There are other ways to solve this, but a read-only connection to an empty in-memory
|
|
// database gives us the closest environment we need to execute expressions.
|
|
internal_conn: Arc<Connection>,
|
|
}
|
|
|
|
impl std::fmt::Debug for ProjectOperator {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("ProjectOperator")
|
|
.field("columns", &self.columns)
|
|
.field("input_column_names", &self.input_column_names)
|
|
.field("output_column_names", &self.output_column_names)
|
|
.field("current_state", &self.current_state)
|
|
.field("tracker", &self.tracker)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
impl ProjectOperator {
|
|
/// Create a new ProjectOperator from a SELECT statement, extracting projection columns
|
|
pub fn from_select(
|
|
select: &turso_parser::ast::Select,
|
|
input_column_names: Vec<String>,
|
|
schema: &crate::schema::Schema,
|
|
) -> crate::Result<Self> {
|
|
use turso_parser::ast::*;
|
|
|
|
// Set up internal connection for expression evaluation
|
|
let io = Arc::new(crate::MemoryIO::new());
|
|
let db = Database::open_file(
|
|
io, ":memory:", false, // no MVCC needed for expression evaluation
|
|
false, // no indexes needed
|
|
)?;
|
|
let internal_conn = db.connect()?;
|
|
// Set to read-only mode and disable auto-commit since we're only evaluating expressions
|
|
internal_conn.query_only.set(true);
|
|
internal_conn.auto_commit.set(false);
|
|
|
|
let temp_syms = SymbolTable::new();
|
|
|
|
// Extract columns from SELECT statement
|
|
let columns = if let OneSelect::Select {
|
|
columns: ref select_columns,
|
|
..
|
|
} = &select.body.select
|
|
{
|
|
let mut columns = Vec::new();
|
|
for result_col in select_columns {
|
|
match result_col {
|
|
ResultColumn::Expr(expr, alias) => {
|
|
let alias_str = if let Some(As::As(alias_name)) = alias {
|
|
Some(alias_name.as_str().to_string())
|
|
} else {
|
|
None
|
|
};
|
|
// Try to compile the expression (handles both columns and complex expressions)
|
|
let compiled = CompiledExpression::compile(
|
|
expr,
|
|
&input_column_names,
|
|
schema,
|
|
&temp_syms,
|
|
internal_conn.clone(),
|
|
)?;
|
|
columns.push(ProjectColumn {
|
|
expr: (**expr).clone(),
|
|
alias: alias_str,
|
|
compiled,
|
|
});
|
|
}
|
|
ResultColumn::Star => {
|
|
// Select all columns - create trivial column references
|
|
for name in &input_column_names {
|
|
// Create an Id expression for the column
|
|
let expr = Expr::Id(Name::Ident(name.clone()));
|
|
let compiled = CompiledExpression::compile(
|
|
&expr,
|
|
&input_column_names,
|
|
schema,
|
|
&temp_syms,
|
|
internal_conn.clone(),
|
|
)?;
|
|
columns.push(ProjectColumn {
|
|
expr,
|
|
alias: None,
|
|
compiled,
|
|
});
|
|
}
|
|
}
|
|
x => {
|
|
return Err(crate::LimboError::ParseError(format!(
|
|
"Unsupported {x:?} clause when compiling project operator",
|
|
)));
|
|
}
|
|
}
|
|
}
|
|
|
|
if columns.is_empty() {
|
|
return Err(crate::LimboError::ParseError(
|
|
"No columns found when compiling project operator".to_string(),
|
|
));
|
|
}
|
|
columns
|
|
} else {
|
|
return Err(crate::LimboError::ParseError(
|
|
"Expression is not a valid SELECT expression".to_string(),
|
|
));
|
|
};
|
|
|
|
// Generate output column names based on aliases or expressions
|
|
let output_column_names = columns
|
|
.iter()
|
|
.map(|c| {
|
|
c.alias.clone().unwrap_or_else(|| match &c.expr {
|
|
Expr::Id(name) => name.as_str().to_string(),
|
|
Expr::Qualified(table, column) => {
|
|
format!("{}.{}", table.as_str(), column.as_str())
|
|
}
|
|
Expr::DoublyQualified(db, table, column) => {
|
|
format!("{}.{}.{}", db.as_str(), table.as_str(), column.as_str())
|
|
}
|
|
_ => c.expr.to_string(),
|
|
})
|
|
})
|
|
.collect();
|
|
|
|
Ok(Self {
|
|
columns,
|
|
input_column_names,
|
|
output_column_names,
|
|
current_state: Delta::new(),
|
|
tracker: None,
|
|
internal_conn,
|
|
})
|
|
}
|
|
|
|
/// Get the columns for this projection
|
|
pub fn columns(&self) -> &[ProjectColumn] {
|
|
&self.columns
|
|
}
|
|
|
|
fn project_values(&self, values: &[Value]) -> Vec<Value> {
|
|
let mut output = Vec::new();
|
|
|
|
for col in &self.columns {
|
|
// Use the internal connection's pager for expression evaluation
|
|
let internal_pager = self.internal_conn.pager.borrow().clone();
|
|
|
|
// Execute the compiled expression (handles both columns and complex expressions)
|
|
let result = col
|
|
.compiled
|
|
.execute(values, internal_pager)
|
|
.expect("Failed to execute compiled expression for the Project operator");
|
|
output.push(result);
|
|
}
|
|
|
|
output
|
|
}
|
|
|
|
fn evaluate_expression(&self, expr: &turso_parser::ast::Expr, values: &[Value]) -> Value {
|
|
use turso_parser::ast::*;
|
|
match expr {
|
|
Expr::Id(name) => {
|
|
if let Some(idx) = self
|
|
.input_column_names
|
|
.iter()
|
|
.position(|c| c == name.as_str())
|
|
{
|
|
if let Some(v) = values.get(idx) {
|
|
return v.clone();
|
|
}
|
|
}
|
|
Value::Null
|
|
}
|
|
Expr::Literal(lit) => {
|
|
match lit {
|
|
Literal::Numeric(n) => {
|
|
if let Ok(i) = n.parse::<i64>() {
|
|
Value::Integer(i)
|
|
} else if let Ok(f) = n.parse::<f64>() {
|
|
Value::Float(f)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
Literal::String(s) => {
|
|
let cleaned = s.trim_matches('\'').trim_matches('"');
|
|
Value::Text(Text::new(cleaned))
|
|
}
|
|
Literal::Null => Value::Null,
|
|
Literal::Blob(_)
|
|
| Literal::Keyword(_)
|
|
| Literal::CurrentDate
|
|
| Literal::CurrentTime
|
|
| Literal::CurrentTimestamp => Value::Null, // Not supported yet
|
|
}
|
|
}
|
|
Expr::Binary(left, op, right) => {
|
|
let left_val = self.evaluate_expression(left, values);
|
|
let right_val = self.evaluate_expression(right, values);
|
|
|
|
match op {
|
|
Operator::Add => match (&left_val, &right_val) {
|
|
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a + b),
|
|
(Value::Float(a), Value::Float(b)) => Value::Float(a + b),
|
|
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 + b),
|
|
(Value::Float(a), Value::Integer(b)) => Value::Float(a + *b as f64),
|
|
_ => Value::Null,
|
|
},
|
|
Operator::Subtract => match (&left_val, &right_val) {
|
|
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a - b),
|
|
(Value::Float(a), Value::Float(b)) => Value::Float(a - b),
|
|
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 - b),
|
|
(Value::Float(a), Value::Integer(b)) => Value::Float(a - *b as f64),
|
|
_ => Value::Null,
|
|
},
|
|
Operator::Multiply => match (&left_val, &right_val) {
|
|
(Value::Integer(a), Value::Integer(b)) => Value::Integer(a * b),
|
|
(Value::Float(a), Value::Float(b)) => Value::Float(a * b),
|
|
(Value::Integer(a), Value::Float(b)) => Value::Float(*a as f64 * b),
|
|
(Value::Float(a), Value::Integer(b)) => Value::Float(a * *b as f64),
|
|
_ => Value::Null,
|
|
},
|
|
Operator::Divide => match (&left_val, &right_val) {
|
|
(Value::Integer(a), Value::Integer(b)) => {
|
|
if *b != 0 {
|
|
Value::Integer(a / b)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
(Value::Float(a), Value::Float(b)) => {
|
|
if *b != 0.0 {
|
|
Value::Float(a / b)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
(Value::Integer(a), Value::Float(b)) => {
|
|
if *b != 0.0 {
|
|
Value::Float(*a as f64 / b)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
(Value::Float(a), Value::Integer(b)) => {
|
|
if *b != 0 {
|
|
Value::Float(a / *b as f64)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
_ => Value::Null,
|
|
},
|
|
_ => Value::Null, // Other operators not supported yet
|
|
}
|
|
}
|
|
Expr::FunctionCall { name, args, .. } => {
|
|
match name.as_str().to_lowercase().as_str() {
|
|
"hex" => {
|
|
if args.len() == 1 {
|
|
let arg_val = self.evaluate_expression(&args[0], values);
|
|
match arg_val {
|
|
Value::Integer(i) => Value::Text(Text::new(&format!("{i:X}"))),
|
|
_ => Value::Null,
|
|
}
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
_ => Value::Null, // Other functions not supported yet
|
|
}
|
|
}
|
|
Expr::Parenthesized(inner) => {
|
|
assert!(
|
|
inner.len() <= 1,
|
|
"Parenthesized expressions with multiple elements are not supported"
|
|
);
|
|
if !inner.is_empty() {
|
|
self.evaluate_expression(&inner[0], values)
|
|
} else {
|
|
Value::Null
|
|
}
|
|
}
|
|
_ => Value::Null, // Other expression types not supported yet
|
|
}
|
|
}
|
|
}
|
|
|
|
impl IncrementalOperator for ProjectOperator {
|
|
fn initialize(&mut self, data: Delta) {
|
|
for (row, weight) in &data.changes {
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_project();
|
|
}
|
|
|
|
let projected = self.project_values(&row.values);
|
|
let projected_row = HashableRow::new(row.rowid, projected);
|
|
self.current_state.changes.push((projected_row, *weight));
|
|
}
|
|
}
|
|
|
|
fn process_delta(&mut self, delta: Delta) -> Delta {
|
|
let mut output_delta = Delta::new();
|
|
|
|
for (row, weight) in &delta.changes {
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_project();
|
|
}
|
|
|
|
let projected = self.project_values(&row.values);
|
|
let projected_row = HashableRow::new(row.rowid, projected);
|
|
|
|
output_delta.changes.push((projected_row.clone(), *weight));
|
|
self.current_state.changes.push((projected_row, *weight));
|
|
}
|
|
|
|
output_delta
|
|
}
|
|
|
|
fn get_current_state(&self) -> Delta {
|
|
// Return a consolidated view of the current state
|
|
let mut consolidated = self.current_state.clone();
|
|
consolidated.consolidate();
|
|
consolidated
|
|
}
|
|
|
|
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
|
|
self.tracker = Some(tracker);
|
|
}
|
|
}
|
|
|
|
/// Join operator - performs incremental joins using DBSP formula
|
|
/// ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
|
|
#[derive(Debug)]
|
|
pub struct JoinOperator {
|
|
join_type: JoinType,
|
|
pub left_on_column: String,
|
|
pub right_on_column: String,
|
|
left_column_names: Vec<String>,
|
|
right_column_names: Vec<String>,
|
|
// Current accumulated state for both sides
|
|
left_state: Delta,
|
|
right_state: Delta,
|
|
// Index for efficient lookups: column_value_as_string -> vec of row_keys
|
|
// We use String representation of values since Value doesn't implement Hash
|
|
left_index: HashMap<String, Vec<i64>>,
|
|
right_index: HashMap<String, Vec<i64>>,
|
|
// Result state
|
|
current_state: Delta,
|
|
tracker: Option<Arc<Mutex<ComputationTracker>>>,
|
|
// For generating unique keys for join results
|
|
next_result_key: i64,
|
|
}
|
|
|
|
impl JoinOperator {
|
|
pub fn new(
|
|
join_type: JoinType,
|
|
left_on_column: String,
|
|
right_on_column: String,
|
|
left_column_names: Vec<String>,
|
|
right_column_names: Vec<String>,
|
|
) -> Self {
|
|
Self {
|
|
join_type,
|
|
left_on_column,
|
|
right_on_column,
|
|
left_column_names,
|
|
right_column_names,
|
|
left_state: Delta::new(),
|
|
right_state: Delta::new(),
|
|
left_index: HashMap::new(),
|
|
right_index: HashMap::new(),
|
|
current_state: Delta::new(),
|
|
tracker: None,
|
|
next_result_key: 0,
|
|
}
|
|
}
|
|
|
|
pub fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
|
|
self.tracker = Some(tracker);
|
|
}
|
|
|
|
/// Build index for a side of the join
|
|
fn build_index(
|
|
state: &Delta,
|
|
column_names: &[String],
|
|
on_column: &str,
|
|
) -> HashMap<String, Vec<i64>> {
|
|
let mut index = HashMap::new();
|
|
|
|
// Find the column index
|
|
let col_idx = column_names.iter().position(|c| c == on_column);
|
|
if col_idx.is_none() {
|
|
return index;
|
|
}
|
|
let col_idx = col_idx.unwrap();
|
|
|
|
// Build the index
|
|
for (row, weight) in &state.changes {
|
|
// Include rows with positive weight in the index
|
|
if *weight > 0 {
|
|
if let Some(key_value) = row.values.get(col_idx) {
|
|
// Convert value to string for indexing
|
|
let key_str = format!("{key_value:?}");
|
|
index
|
|
.entry(key_str)
|
|
.or_insert_with(Vec::new)
|
|
.push(row.rowid);
|
|
}
|
|
}
|
|
}
|
|
|
|
index
|
|
}
|
|
|
|
/// Join two deltas
|
|
fn join_deltas(&self, left_delta: &Delta, right_delta: &Delta, next_key: &mut i64) -> Delta {
|
|
let mut result = Delta::new();
|
|
|
|
// Find column indices
|
|
let left_col_idx = self
|
|
.left_column_names
|
|
.iter()
|
|
.position(|c| c == &self.left_on_column)
|
|
.unwrap_or(0);
|
|
let right_col_idx = self
|
|
.right_column_names
|
|
.iter()
|
|
.position(|c| c == &self.right_on_column)
|
|
.unwrap_or(0);
|
|
|
|
// For each row in left_delta
|
|
for (left_row, left_weight) in &left_delta.changes {
|
|
// Process both inserts and deletes
|
|
|
|
let left_join_value = left_row.values.get(left_col_idx);
|
|
if left_join_value.is_none() {
|
|
continue;
|
|
}
|
|
let left_join_value = left_join_value.unwrap();
|
|
|
|
// Look up matching rows in right_delta
|
|
for (right_row, right_weight) in &right_delta.changes {
|
|
// Process both inserts and deletes
|
|
|
|
let right_join_value = right_row.values.get(right_col_idx);
|
|
if right_join_value.is_none() {
|
|
continue;
|
|
}
|
|
let right_join_value = right_join_value.unwrap();
|
|
|
|
// Check if values match
|
|
if left_join_value == right_join_value {
|
|
// Record the join lookup
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_join_lookup();
|
|
}
|
|
|
|
// Create joined row
|
|
let mut joined_values = left_row.values.clone();
|
|
joined_values.extend(right_row.values.clone());
|
|
|
|
// Generate a unique key for the result
|
|
let result_key = *next_key;
|
|
*next_key += 1;
|
|
|
|
let joined_row = HashableRow::new(result_key, joined_values);
|
|
result
|
|
.changes
|
|
.push((joined_row, left_weight * right_weight));
|
|
}
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Join a delta with the full state using the index
|
|
fn join_delta_with_state(
|
|
&self,
|
|
delta: &Delta,
|
|
state: &Delta,
|
|
delta_on_left: bool,
|
|
next_key: &mut i64,
|
|
) -> Delta {
|
|
let mut result = Delta::new();
|
|
|
|
let (delta_col_idx, state_col_names) = if delta_on_left {
|
|
(
|
|
self.left_column_names
|
|
.iter()
|
|
.position(|c| c == &self.left_on_column)
|
|
.unwrap_or(0),
|
|
&self.right_column_names,
|
|
)
|
|
} else {
|
|
(
|
|
self.right_column_names
|
|
.iter()
|
|
.position(|c| c == &self.right_on_column)
|
|
.unwrap_or(0),
|
|
&self.left_column_names,
|
|
)
|
|
};
|
|
|
|
// Use index for efficient lookup
|
|
let state_index = Self::build_index(
|
|
state,
|
|
state_col_names,
|
|
if delta_on_left {
|
|
&self.right_on_column
|
|
} else {
|
|
&self.left_on_column
|
|
},
|
|
);
|
|
|
|
for (delta_row, delta_weight) in &delta.changes {
|
|
// Process both inserts and deletes
|
|
|
|
let delta_join_value = delta_row.values.get(delta_col_idx);
|
|
if delta_join_value.is_none() {
|
|
continue;
|
|
}
|
|
let delta_join_value = delta_join_value.unwrap();
|
|
|
|
// Use index to find matching rows
|
|
let delta_key_str = format!("{delta_join_value:?}");
|
|
if let Some(matching_keys) = state_index.get(&delta_key_str) {
|
|
for state_key in matching_keys {
|
|
// Look up in the state - find the row with this rowid
|
|
let state_row_opt = state
|
|
.changes
|
|
.iter()
|
|
.find(|(row, weight)| row.rowid == *state_key && *weight > 0);
|
|
|
|
if let Some((state_row, state_weight)) = state_row_opt {
|
|
// Record the join lookup
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_join_lookup();
|
|
}
|
|
|
|
// Create joined row
|
|
let joined_values = if delta_on_left {
|
|
let mut v = delta_row.values.clone();
|
|
v.extend(state_row.values.clone());
|
|
v
|
|
} else {
|
|
let mut v = state_row.values.clone();
|
|
v.extend(delta_row.values.clone());
|
|
v
|
|
};
|
|
|
|
let result_key = *next_key;
|
|
*next_key += 1;
|
|
|
|
let joined_row = HashableRow::new(result_key, joined_values);
|
|
result
|
|
.changes
|
|
.push((joined_row, delta_weight * state_weight));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Initialize both sides of the join
|
|
pub fn initialize_both(&mut self, left_data: Delta, right_data: Delta) {
|
|
self.left_state = left_data.clone();
|
|
self.right_state = right_data.clone();
|
|
|
|
// Build indices
|
|
self.left_index = Self::build_index(
|
|
&self.left_state,
|
|
&self.left_column_names,
|
|
&self.left_on_column,
|
|
);
|
|
self.right_index = Self::build_index(
|
|
&self.right_state,
|
|
&self.right_column_names,
|
|
&self.right_on_column,
|
|
);
|
|
|
|
// Perform initial join
|
|
let mut next_key = self.next_result_key;
|
|
self.current_state = self.join_deltas(&self.left_state, &self.right_state, &mut next_key);
|
|
self.next_result_key = next_key;
|
|
}
|
|
|
|
/// Process deltas for both sides using DBSP formula
|
|
/// ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
|
|
pub fn process_both_deltas(&mut self, left_delta: Delta, right_delta: Delta) -> Delta {
|
|
let mut result = Delta::new();
|
|
let mut next_key = self.next_result_key;
|
|
|
|
// A ⋈ ∂B (existing left with new right)
|
|
let a_join_db =
|
|
self.join_delta_with_state(&right_delta, &self.left_state, false, &mut next_key);
|
|
result.merge(&a_join_db);
|
|
|
|
// ∂A ⋈ B (new left with existing right)
|
|
let da_join_b =
|
|
self.join_delta_with_state(&left_delta, &self.right_state, true, &mut next_key);
|
|
result.merge(&da_join_b);
|
|
|
|
// ∂A ⋈ ∂B (new left with new right)
|
|
let da_join_db = self.join_deltas(&left_delta, &right_delta, &mut next_key);
|
|
result.merge(&da_join_db);
|
|
|
|
// Update the next key counter
|
|
self.next_result_key = next_key;
|
|
|
|
// Update state
|
|
self.left_state.merge(&left_delta);
|
|
self.right_state.merge(&right_delta);
|
|
self.current_state.merge(&result);
|
|
|
|
// Rebuild indices if needed
|
|
self.left_index = Self::build_index(
|
|
&self.left_state,
|
|
&self.left_column_names,
|
|
&self.left_on_column,
|
|
);
|
|
self.right_index = Self::build_index(
|
|
&self.right_state,
|
|
&self.right_column_names,
|
|
&self.right_on_column,
|
|
);
|
|
|
|
result
|
|
}
|
|
|
|
pub fn get_current_state(&self) -> &Delta {
|
|
&self.current_state
|
|
}
|
|
|
|
/// Process a delta from the left table only
|
|
pub fn process_left_delta(&mut self, left_delta: Delta) -> Delta {
|
|
let empty_delta = Delta::new();
|
|
self.process_both_deltas(left_delta, empty_delta)
|
|
}
|
|
|
|
/// Process a delta from the right table only
|
|
pub fn process_right_delta(&mut self, right_delta: Delta) -> Delta {
|
|
let empty_delta = Delta::new();
|
|
self.process_both_deltas(empty_delta, right_delta)
|
|
}
|
|
}
|
|
|
|
/// Aggregate operator - performs incremental aggregation with GROUP BY
|
|
/// Maintains running totals/counts that are updated incrementally
|
|
#[derive(Debug, Clone)]
|
|
pub struct AggregateOperator {
|
|
// GROUP BY columns
|
|
group_by: Vec<String>,
|
|
// Aggregate functions to compute
|
|
aggregates: Vec<AggregateFunction>,
|
|
// Column names from input
|
|
pub input_column_names: Vec<String>,
|
|
// Aggregation state: group_key_str -> aggregate values
|
|
// For each group, we store the aggregate results
|
|
// We use String representation of group keys since Value doesn't implement Hash
|
|
group_states: HashMap<String, AggregateState>,
|
|
// Map to keep track of actual group key values for output
|
|
group_key_values: HashMap<String, Vec<Value>>,
|
|
// Current output state as a Delta
|
|
current_state: Delta,
|
|
tracker: Option<Arc<Mutex<ComputationTracker>>>,
|
|
}
|
|
|
|
/// State for a single group's aggregates
|
|
#[derive(Debug, Clone)]
|
|
struct AggregateState {
|
|
// For COUNT: just the count
|
|
count: i64,
|
|
// For SUM: column_name -> sum value
|
|
sums: HashMap<String, f64>,
|
|
// For AVG: column_name -> (sum, count) for computing average
|
|
avgs: HashMap<String, (f64, i64)>,
|
|
// For MIN: column_name -> min value
|
|
mins: HashMap<String, Value>,
|
|
// For MAX: column_name -> max value
|
|
maxs: HashMap<String, Value>,
|
|
}
|
|
|
|
impl AggregateState {
|
|
fn new() -> Self {
|
|
Self {
|
|
count: 0,
|
|
sums: HashMap::new(),
|
|
avgs: HashMap::new(),
|
|
mins: HashMap::new(),
|
|
maxs: HashMap::new(),
|
|
}
|
|
}
|
|
|
|
/// Apply a delta to this aggregate state
|
|
fn apply_delta(
|
|
&mut self,
|
|
values: &[Value],
|
|
weight: isize,
|
|
aggregates: &[AggregateFunction],
|
|
column_names: &[String],
|
|
) {
|
|
// Update COUNT
|
|
self.count += weight as i64;
|
|
|
|
// Update other aggregates
|
|
for agg in aggregates {
|
|
match agg {
|
|
AggregateFunction::Count => {
|
|
// Already handled above
|
|
}
|
|
AggregateFunction::Sum(col_name) => {
|
|
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
|
|
if let Some(val) = values.get(idx) {
|
|
let num_val = match val {
|
|
Value::Integer(i) => *i as f64,
|
|
Value::Float(f) => *f,
|
|
_ => 0.0,
|
|
};
|
|
*self.sums.entry(col_name.clone()).or_insert(0.0) +=
|
|
num_val * weight as f64;
|
|
}
|
|
}
|
|
}
|
|
AggregateFunction::Avg(col_name) => {
|
|
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
|
|
if let Some(val) = values.get(idx) {
|
|
let num_val = match val {
|
|
Value::Integer(i) => *i as f64,
|
|
Value::Float(f) => *f,
|
|
_ => 0.0,
|
|
};
|
|
let (sum, count) =
|
|
self.avgs.entry(col_name.clone()).or_insert((0.0, 0));
|
|
*sum += num_val * weight as f64;
|
|
*count += weight as i64;
|
|
}
|
|
}
|
|
}
|
|
AggregateFunction::Min(col_name) => {
|
|
// MIN/MAX are more complex for incremental updates
|
|
// For now, we'll need to recompute from the full state
|
|
// This is a limitation we can improve later
|
|
if weight > 0 {
|
|
// Only update on insert
|
|
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
|
|
if let Some(val) = values.get(idx) {
|
|
self.mins
|
|
.entry(col_name.clone())
|
|
.and_modify(|existing| {
|
|
if val < existing {
|
|
*existing = val.clone();
|
|
}
|
|
})
|
|
.or_insert_with(|| val.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
AggregateFunction::Max(col_name) => {
|
|
if weight > 0 {
|
|
// Only update on insert
|
|
if let Some(idx) = column_names.iter().position(|c| c == col_name) {
|
|
if let Some(val) = values.get(idx) {
|
|
self.maxs
|
|
.entry(col_name.clone())
|
|
.and_modify(|existing| {
|
|
if val > existing {
|
|
*existing = val.clone();
|
|
}
|
|
})
|
|
.or_insert_with(|| val.clone());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Convert aggregate state to output values
|
|
fn to_values(&self, aggregates: &[AggregateFunction]) -> Vec<Value> {
|
|
let mut result = Vec::new();
|
|
|
|
for agg in aggregates {
|
|
match agg {
|
|
AggregateFunction::Count => {
|
|
result.push(Value::Integer(self.count));
|
|
}
|
|
AggregateFunction::Sum(col_name) => {
|
|
let sum = self.sums.get(col_name).copied().unwrap_or(0.0);
|
|
// Return as integer if it's a whole number, otherwise as float
|
|
if sum.fract() == 0.0 {
|
|
result.push(Value::Integer(sum as i64));
|
|
} else {
|
|
result.push(Value::Float(sum));
|
|
}
|
|
}
|
|
AggregateFunction::Avg(col_name) => {
|
|
if let Some((sum, count)) = self.avgs.get(col_name) {
|
|
if *count > 0 {
|
|
result.push(Value::Float(sum / *count as f64));
|
|
} else {
|
|
result.push(Value::Null);
|
|
}
|
|
} else {
|
|
result.push(Value::Null);
|
|
}
|
|
}
|
|
AggregateFunction::Min(col_name) => {
|
|
result.push(self.mins.get(col_name).cloned().unwrap_or(Value::Null));
|
|
}
|
|
AggregateFunction::Max(col_name) => {
|
|
result.push(self.maxs.get(col_name).cloned().unwrap_or(Value::Null));
|
|
}
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
}
|
|
|
|
impl AggregateOperator {
|
|
pub fn new(
|
|
group_by: Vec<String>,
|
|
aggregates: Vec<AggregateFunction>,
|
|
input_column_names: Vec<String>,
|
|
) -> Self {
|
|
Self {
|
|
group_by,
|
|
aggregates,
|
|
input_column_names,
|
|
group_states: HashMap::new(),
|
|
group_key_values: HashMap::new(),
|
|
current_state: Delta::new(),
|
|
tracker: None,
|
|
}
|
|
}
|
|
|
|
pub fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
|
|
self.tracker = Some(tracker);
|
|
}
|
|
|
|
/// Extract group key values from a row
|
|
fn extract_group_key(&self, values: &[Value]) -> Vec<Value> {
|
|
let mut key = Vec::new();
|
|
|
|
for group_col in &self.group_by {
|
|
if let Some(idx) = self.input_column_names.iter().position(|c| c == group_col) {
|
|
if let Some(val) = values.get(idx) {
|
|
key.push(val.clone());
|
|
} else {
|
|
key.push(Value::Null);
|
|
}
|
|
} else {
|
|
key.push(Value::Null);
|
|
}
|
|
}
|
|
|
|
key
|
|
}
|
|
|
|
/// Convert group key to string for indexing (since Value doesn't implement Hash)
|
|
fn group_key_to_string(key: &[Value]) -> String {
|
|
key.iter()
|
|
.map(|v| format!("{v:?}"))
|
|
.collect::<Vec<_>>()
|
|
.join(",")
|
|
}
|
|
|
|
/// Process a delta and update aggregate state incrementally
|
|
pub fn process_delta(&mut self, delta: Delta) -> Delta {
|
|
let mut output_delta = Delta::new();
|
|
|
|
// Track which groups were modified and their old values
|
|
let mut modified_groups = HashSet::new();
|
|
let mut old_values: HashMap<String, Vec<Value>> = HashMap::new();
|
|
|
|
// Process each change in the delta
|
|
for (row, weight) in &delta.changes {
|
|
if let Some(tracker) = &self.tracker {
|
|
tracker.lock().unwrap().record_aggregation();
|
|
}
|
|
|
|
// Extract group key
|
|
let group_key = self.extract_group_key(&row.values);
|
|
let group_key_str = Self::group_key_to_string(&group_key);
|
|
|
|
// Store old aggregate values BEFORE applying the delta
|
|
// (only for the first time we see this group in this batch)
|
|
if !modified_groups.contains(&group_key_str) {
|
|
if let Some(state) = self.group_states.get(&group_key_str) {
|
|
let mut old_row = group_key.clone();
|
|
old_row.extend(state.to_values(&self.aggregates));
|
|
old_values.insert(group_key_str.clone(), old_row);
|
|
}
|
|
}
|
|
|
|
modified_groups.insert(group_key_str.clone());
|
|
|
|
// Store the actual group key values
|
|
self.group_key_values
|
|
.insert(group_key_str.clone(), group_key.clone());
|
|
|
|
// Get or create aggregate state for this group
|
|
let state = self
|
|
.group_states
|
|
.entry(group_key_str.clone())
|
|
.or_insert_with(AggregateState::new);
|
|
|
|
// Apply the delta to the aggregate state
|
|
state.apply_delta(
|
|
&row.values,
|
|
*weight,
|
|
&self.aggregates,
|
|
&self.input_column_names,
|
|
);
|
|
}
|
|
|
|
// Generate output delta for modified groups
|
|
for group_key_str in modified_groups {
|
|
// Get the actual group key values
|
|
let group_key = self
|
|
.group_key_values
|
|
.get(&group_key_str)
|
|
.cloned()
|
|
.unwrap_or_default();
|
|
|
|
// Generate a unique key for this group
|
|
// We use a hash of the group key to ensure consistency
|
|
let result_key = group_key_str
|
|
.bytes()
|
|
.fold(0i64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as i64));
|
|
|
|
// Emit retraction for old value if it existed
|
|
if let Some(old_row_values) = old_values.get(&group_key_str) {
|
|
let old_row = HashableRow::new(result_key, old_row_values.clone());
|
|
output_delta.changes.push((old_row.clone(), -1));
|
|
// Also remove from current state
|
|
self.current_state.changes.push((old_row, -1));
|
|
}
|
|
|
|
if let Some(state) = self.group_states.get(&group_key_str) {
|
|
// Build output row: group_by columns + aggregate values
|
|
let mut output_values = group_key.clone();
|
|
output_values.extend(state.to_values(&self.aggregates));
|
|
|
|
// Check if group should be removed (count is 0)
|
|
if state.count > 0 {
|
|
// Add to output delta with positive weight
|
|
let output_row = HashableRow::new(result_key, output_values.clone());
|
|
output_delta.changes.push((output_row.clone(), 1));
|
|
|
|
// Update current state
|
|
self.current_state.changes.push((output_row, 1));
|
|
} else {
|
|
// Group has count=0, remove from state
|
|
// (we already emitted the retraction above if needed)
|
|
self.group_states.remove(&group_key_str);
|
|
self.group_key_values.remove(&group_key_str);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Consolidate current state to handle removals
|
|
self.current_state.consolidate();
|
|
|
|
output_delta
|
|
}
|
|
|
|
pub fn get_current_state(&self) -> &Delta {
|
|
&self.current_state
|
|
}
|
|
}
|
|
|
|
impl IncrementalOperator for AggregateOperator {
|
|
fn initialize(&mut self, data: Delta) {
|
|
// Process all initial data
|
|
self.process_delta(data);
|
|
}
|
|
|
|
fn process_delta(&mut self, delta: Delta) -> Delta {
|
|
self.process_delta(delta)
|
|
}
|
|
|
|
fn get_current_state(&self) -> Delta {
|
|
// Return a consolidated view of the current state
|
|
let mut consolidated = self.current_state.clone();
|
|
consolidated.consolidate();
|
|
consolidated
|
|
}
|
|
|
|
fn set_tracker(&mut self, tracker: Arc<Mutex<ComputationTracker>>) {
|
|
self.tracker = Some(tracker);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::types::Text;
|
|
use crate::Value;
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
/// Assert that we're doing incremental work, not full recomputation
|
|
fn assert_incremental(tracker: &ComputationTracker, expected_ops: usize, data_size: usize) {
|
|
assert!(
|
|
tracker.total_computations() <= expected_ops,
|
|
"Expected <= {} operations for incremental update, got {}",
|
|
expected_ops,
|
|
tracker.total_computations()
|
|
);
|
|
assert!(
|
|
tracker.total_computations() < data_size,
|
|
"Computation count {} suggests full recomputation (data size: {})",
|
|
tracker.total_computations(),
|
|
data_size
|
|
);
|
|
assert_eq!(
|
|
tracker.full_scans, 0,
|
|
"Incremental computation should not perform full scans"
|
|
);
|
|
}
|
|
|
|
// Aggregate tests
|
|
#[test]
|
|
fn test_aggregate_incremental_update_emits_retraction() {
|
|
// This test verifies that when an aggregate value changes,
|
|
// the operator emits both a retraction (-1) of the old value
|
|
// and an insertion (+1) of the new value.
|
|
|
|
// Create an aggregate operator for SUM(age) with no GROUP BY
|
|
let mut agg = AggregateOperator::new(
|
|
vec![], // No GROUP BY
|
|
vec![AggregateFunction::Sum("age".to_string())],
|
|
vec!["id".to_string(), "name".to_string(), "age".to_string()],
|
|
);
|
|
|
|
// Initial data: 3 users
|
|
let mut initial_delta = Delta::new();
|
|
initial_delta.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text("Alice".to_string().into()),
|
|
Value::Integer(25),
|
|
],
|
|
);
|
|
initial_delta.insert(
|
|
2,
|
|
vec![
|
|
Value::Integer(2),
|
|
Value::Text("Bob".to_string().into()),
|
|
Value::Integer(30),
|
|
],
|
|
);
|
|
initial_delta.insert(
|
|
3,
|
|
vec![
|
|
Value::Integer(3),
|
|
Value::Text("Charlie".to_string().into()),
|
|
Value::Integer(35),
|
|
],
|
|
);
|
|
|
|
// Initialize with initial data
|
|
agg.initialize(initial_delta);
|
|
|
|
// Verify initial state: SUM(age) = 25 + 30 + 35 = 90
|
|
let state = agg.get_current_state();
|
|
assert_eq!(state.changes.len(), 1, "Should have one aggregate row");
|
|
let (row, weight) = &state.changes[0];
|
|
assert_eq!(*weight, 1, "Aggregate row should have weight 1");
|
|
assert_eq!(row.values[0], Value::Float(90.0), "SUM should be 90");
|
|
|
|
// Now add a new user (incremental update)
|
|
let mut update_delta = Delta::new();
|
|
update_delta.insert(
|
|
4,
|
|
vec![
|
|
Value::Integer(4),
|
|
Value::Text("David".to_string().into()),
|
|
Value::Integer(40),
|
|
],
|
|
);
|
|
|
|
// Process the incremental update
|
|
let output_delta = agg.process_delta(update_delta);
|
|
|
|
// CRITICAL: The output delta should contain TWO changes:
|
|
// 1. Retraction of old aggregate value (90) with weight -1
|
|
// 2. Insertion of new aggregate value (130) with weight +1
|
|
assert_eq!(
|
|
output_delta.changes.len(),
|
|
2,
|
|
"Expected 2 changes (retraction + insertion), got {}: {:?}",
|
|
output_delta.changes.len(),
|
|
output_delta.changes
|
|
);
|
|
|
|
// Verify the retraction comes first
|
|
let (retraction_row, retraction_weight) = &output_delta.changes[0];
|
|
assert_eq!(
|
|
*retraction_weight, -1,
|
|
"First change should be a retraction"
|
|
);
|
|
assert_eq!(
|
|
retraction_row.values[0],
|
|
Value::Float(90.0),
|
|
"Retracted value should be the old sum (90)"
|
|
);
|
|
|
|
// Verify the insertion comes second
|
|
let (insertion_row, insertion_weight) = &output_delta.changes[1];
|
|
assert_eq!(*insertion_weight, 1, "Second change should be an insertion");
|
|
assert_eq!(
|
|
insertion_row.values[0],
|
|
Value::Float(130.0),
|
|
"Inserted value should be the new sum (130)"
|
|
);
|
|
|
|
// Both changes should have the same row ID (since it's the same aggregate group)
|
|
assert_eq!(
|
|
retraction_row.rowid, insertion_row.rowid,
|
|
"Retraction and insertion should have the same row ID"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aggregate_with_group_by_emits_retractions() {
|
|
// This test verifies that when aggregate values change for grouped data,
|
|
// the operator emits both retractions and insertions correctly for each group.
|
|
|
|
// Create an aggregate operator for SUM(score) GROUP BY team
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["team".to_string()], // GROUP BY team
|
|
vec![AggregateFunction::Sum("score".to_string())],
|
|
vec![
|
|
"id".to_string(),
|
|
"team".to_string(),
|
|
"player".to_string(),
|
|
"score".to_string(),
|
|
],
|
|
);
|
|
|
|
// Initial data: players on different teams
|
|
let mut initial_delta = Delta::new();
|
|
initial_delta.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text("red".to_string().into()),
|
|
Value::Text("Alice".to_string().into()),
|
|
Value::Integer(10),
|
|
],
|
|
);
|
|
initial_delta.insert(
|
|
2,
|
|
vec![
|
|
Value::Integer(2),
|
|
Value::Text("blue".to_string().into()),
|
|
Value::Text("Bob".to_string().into()),
|
|
Value::Integer(15),
|
|
],
|
|
);
|
|
initial_delta.insert(
|
|
3,
|
|
vec![
|
|
Value::Integer(3),
|
|
Value::Text("red".to_string().into()),
|
|
Value::Text("Charlie".to_string().into()),
|
|
Value::Integer(20),
|
|
],
|
|
);
|
|
|
|
// Initialize with initial data
|
|
agg.initialize(initial_delta);
|
|
|
|
// Verify initial state: red team = 30, blue team = 15
|
|
let state = agg.get_current_state();
|
|
assert_eq!(state.changes.len(), 2, "Should have two groups");
|
|
|
|
// Find the red and blue team aggregates
|
|
let mut red_sum = None;
|
|
let mut blue_sum = None;
|
|
for (row, weight) in &state.changes {
|
|
assert_eq!(*weight, 1);
|
|
if let Value::Text(team) = &row.values[0] {
|
|
if team.as_str() == "red" {
|
|
red_sum = Some(&row.values[1]);
|
|
} else if team.as_str() == "blue" {
|
|
blue_sum = Some(&row.values[1]);
|
|
}
|
|
}
|
|
}
|
|
assert_eq!(
|
|
red_sum,
|
|
Some(&Value::Float(30.0)),
|
|
"Red team sum should be 30"
|
|
);
|
|
assert_eq!(
|
|
blue_sum,
|
|
Some(&Value::Float(15.0)),
|
|
"Blue team sum should be 15"
|
|
);
|
|
|
|
// Now add a new player to the red team (incremental update)
|
|
let mut update_delta = Delta::new();
|
|
update_delta.insert(
|
|
4,
|
|
vec![
|
|
Value::Integer(4),
|
|
Value::Text("red".to_string().into()),
|
|
Value::Text("David".to_string().into()),
|
|
Value::Integer(25),
|
|
],
|
|
);
|
|
|
|
// Process the incremental update
|
|
let output_delta = agg.process_delta(update_delta);
|
|
|
|
// Should have 2 changes: retraction of old red team sum, insertion of new red team sum
|
|
// Blue team should NOT be affected
|
|
assert_eq!(
|
|
output_delta.changes.len(),
|
|
2,
|
|
"Expected 2 changes for red team only, got {}: {:?}",
|
|
output_delta.changes.len(),
|
|
output_delta.changes
|
|
);
|
|
|
|
// Both changes should be for the red team
|
|
let mut found_retraction = false;
|
|
let mut found_insertion = false;
|
|
|
|
for (row, weight) in &output_delta.changes {
|
|
if let Value::Text(team) = &row.values[0] {
|
|
assert_eq!(team.as_str(), "red", "Only red team should have changes");
|
|
|
|
if *weight == -1 {
|
|
// Retraction of old value
|
|
assert_eq!(
|
|
row.values[1],
|
|
Value::Float(30.0),
|
|
"Should retract old sum of 30"
|
|
);
|
|
found_retraction = true;
|
|
} else if *weight == 1 {
|
|
// Insertion of new value
|
|
assert_eq!(
|
|
row.values[1],
|
|
Value::Float(55.0),
|
|
"Should insert new sum of 55"
|
|
);
|
|
found_insertion = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert!(found_retraction, "Should have found retraction");
|
|
assert!(found_insertion, "Should have found insertion");
|
|
}
|
|
|
|
// Join tests
|
|
#[test]
|
|
fn test_join_uses_delta_formula() {
|
|
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
|
|
|
|
// Create join operator
|
|
let mut join = JoinOperator::new(
|
|
JoinType::Inner,
|
|
"user_id".to_string(),
|
|
"user_id".to_string(),
|
|
vec!["user_id".to_string(), "email".to_string()],
|
|
vec![
|
|
"login_id".to_string(),
|
|
"user_id".to_string(),
|
|
"timestamp".to_string(),
|
|
],
|
|
);
|
|
join.set_tracker(tracker.clone());
|
|
|
|
// Initial data: emails table
|
|
let mut emails = Delta::new();
|
|
emails.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text(Text::new("alice@example.com")),
|
|
],
|
|
);
|
|
emails.insert(
|
|
2,
|
|
vec![Value::Integer(2), Value::Text(Text::new("bob@example.com"))],
|
|
);
|
|
|
|
// Initial data: logins table
|
|
let mut logins = Delta::new();
|
|
logins.insert(
|
|
1,
|
|
vec![Value::Integer(1), Value::Integer(1), Value::Integer(1000)],
|
|
);
|
|
logins.insert(
|
|
2,
|
|
vec![Value::Integer(2), Value::Integer(1), Value::Integer(2000)],
|
|
);
|
|
|
|
// Initialize join
|
|
join.initialize_both(emails.clone(), logins.clone());
|
|
|
|
// Reset tracker for delta processing
|
|
tracker.lock().unwrap().join_lookups = 0;
|
|
|
|
// Add one login for bob (user_id=2)
|
|
let mut delta_logins = Delta::new();
|
|
delta_logins.insert(
|
|
3,
|
|
vec![Value::Integer(3), Value::Integer(2), Value::Integer(3000)],
|
|
);
|
|
|
|
// Process delta - should use incremental formula
|
|
let empty_delta = Delta::new();
|
|
let output = join.process_both_deltas(empty_delta, delta_logins);
|
|
|
|
// Should have one join result (bob's new login)
|
|
assert_eq!(output.len(), 1);
|
|
|
|
// Verify we used index lookups, not nested loops
|
|
// Should have done 1 lookup (finding bob's email for the new login)
|
|
let lookups = tracker.lock().unwrap().join_lookups;
|
|
assert_eq!(lookups, 1, "Should use index lookup, not scan all emails");
|
|
|
|
// Verify incremental behavior - we processed only the delta
|
|
let t = tracker.lock().unwrap();
|
|
assert_incremental(&t, 1, 3); // 1 operation for 3 total rows
|
|
}
|
|
|
|
#[test]
|
|
fn test_join_maintains_index() {
|
|
// Create join operator
|
|
let mut join = JoinOperator::new(
|
|
JoinType::Inner,
|
|
"id".to_string(),
|
|
"ref_id".to_string(),
|
|
vec!["id".to_string(), "name".to_string()],
|
|
vec!["ref_id".to_string(), "value".to_string()],
|
|
);
|
|
|
|
// Initial data
|
|
let mut left = Delta::new();
|
|
left.insert(1, vec![Value::Integer(1), Value::Text(Text::new("A"))]);
|
|
left.insert(2, vec![Value::Integer(2), Value::Text(Text::new("B"))]);
|
|
|
|
let mut right = Delta::new();
|
|
right.insert(1, vec![Value::Integer(1), Value::Integer(100)]);
|
|
|
|
// Initialize - should build index
|
|
join.initialize_both(left.clone(), right.clone());
|
|
|
|
// Verify initial join worked
|
|
let state = join.get_current_state();
|
|
assert_eq!(state.changes.len(), 1); // One match: id=1
|
|
|
|
// Add new item to left
|
|
let mut delta_left = Delta::new();
|
|
delta_left.insert(3, vec![Value::Integer(3), Value::Text(Text::new("C"))]);
|
|
|
|
// Add matching item to right
|
|
let mut delta_right = Delta::new();
|
|
delta_right.insert(2, vec![Value::Integer(3), Value::Integer(300)]);
|
|
|
|
// Process deltas
|
|
let output = join.process_both_deltas(delta_left, delta_right);
|
|
|
|
// Should have new join result
|
|
assert_eq!(output.len(), 1);
|
|
|
|
// Verify the join result has the expected values
|
|
assert!(!output.changes.is_empty());
|
|
let (result, _weight) = &output.changes[0];
|
|
assert_eq!(result.values.len(), 4); // id, name, ref_id, value
|
|
}
|
|
|
|
#[test]
|
|
fn test_join_formula_correctness() {
|
|
// Test the DBSP formula: ∂(A ⋈ B) = A ⋈ ∂B + ∂A ⋈ B + ∂A ⋈ ∂B
|
|
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
|
|
|
|
let mut join = JoinOperator::new(
|
|
JoinType::Inner,
|
|
"x".to_string(),
|
|
"x".to_string(),
|
|
vec!["x".to_string(), "a".to_string()],
|
|
vec!["x".to_string(), "b".to_string()],
|
|
);
|
|
join.set_tracker(tracker.clone());
|
|
|
|
// Initial state A
|
|
let mut a = Delta::new();
|
|
a.insert(1, vec![Value::Integer(1), Value::Text(Text::new("a1"))]);
|
|
a.insert(2, vec![Value::Integer(2), Value::Text(Text::new("a2"))]);
|
|
|
|
// Initial state B
|
|
let mut b = Delta::new();
|
|
b.insert(1, vec![Value::Integer(1), Value::Text(Text::new("b1"))]);
|
|
b.insert(2, vec![Value::Integer(2), Value::Text(Text::new("b2"))]);
|
|
|
|
join.initialize_both(a.clone(), b.clone());
|
|
|
|
// Reset tracker
|
|
tracker.lock().unwrap().join_lookups = 0;
|
|
|
|
// Delta for A (add x=3)
|
|
let mut delta_a = Delta::new();
|
|
delta_a.insert(3, vec![Value::Integer(3), Value::Text(Text::new("a3"))]);
|
|
|
|
// Delta for B (add x=3 and x=1)
|
|
let mut delta_b = Delta::new();
|
|
delta_b.insert(3, vec![Value::Integer(3), Value::Text(Text::new("b3"))]);
|
|
delta_b.insert(4, vec![Value::Integer(1), Value::Text(Text::new("b1_new"))]);
|
|
|
|
let output = join.process_both_deltas(delta_a, delta_b);
|
|
|
|
// Expected results:
|
|
// A ⋈ ∂B: (1,a1) ⋈ (1,b1_new) = 1 result
|
|
// ∂A ⋈ B: (3,a3) ⋈ nothing = 0 results
|
|
// ∂A ⋈ ∂B: (3,a3) ⋈ (3,b3) = 1 result
|
|
// Total: 2 results
|
|
assert_eq!(output.len(), 2);
|
|
|
|
// Verify we're doing incremental work
|
|
let lookups = tracker.lock().unwrap().join_lookups;
|
|
assert!(lookups <= 4, "Should use efficient index lookups");
|
|
}
|
|
|
|
// Aggregation tests
|
|
#[test]
|
|
fn test_count_increments_not_recounts() {
|
|
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
|
|
|
|
// Create COUNT(*) GROUP BY category
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["category".to_string()],
|
|
vec![AggregateFunction::Count],
|
|
vec![
|
|
"item_id".to_string(),
|
|
"category".to_string(),
|
|
"price".to_string(),
|
|
],
|
|
);
|
|
agg.set_tracker(tracker.clone());
|
|
|
|
// Initial: 100 items in 10 categories (10 items each)
|
|
let mut initial = Delta::new();
|
|
for i in 0..100 {
|
|
let category = format!("cat_{}", i / 10);
|
|
initial.insert(
|
|
i,
|
|
vec![
|
|
Value::Integer(i),
|
|
Value::Text(Text::new(&category)),
|
|
Value::Integer(i * 10),
|
|
],
|
|
);
|
|
}
|
|
agg.initialize(initial);
|
|
|
|
// Reset tracker for delta processing
|
|
tracker.lock().unwrap().aggregation_updates = 0;
|
|
|
|
// Add one item to category 'cat_0'
|
|
let mut delta = Delta::new();
|
|
delta.insert(
|
|
100,
|
|
vec![
|
|
Value::Integer(100),
|
|
Value::Text(Text::new("cat_0")),
|
|
Value::Integer(1000),
|
|
],
|
|
);
|
|
|
|
let output = agg.process_delta(delta);
|
|
|
|
// Should only update one group (cat_0), not recount all groups
|
|
assert_eq!(tracker.lock().unwrap().aggregation_updates, 1);
|
|
|
|
// Output should show cat_0 now has count 11
|
|
assert_eq!(output.len(), 1);
|
|
assert!(!output.changes.is_empty());
|
|
let (change_row, _weight) = &output.changes[0];
|
|
assert_eq!(change_row.values[0], Value::Text(Text::new("cat_0")));
|
|
assert_eq!(change_row.values[1], Value::Integer(11));
|
|
|
|
// Verify incremental behavior
|
|
let t = tracker.lock().unwrap();
|
|
assert_incremental(&t, 1, 101);
|
|
}
|
|
|
|
#[test]
|
|
fn test_sum_updates_incrementally() {
|
|
let tracker = Arc::new(Mutex::new(ComputationTracker::new()));
|
|
|
|
// Create SUM(amount) GROUP BY product
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["product".to_string()],
|
|
vec![AggregateFunction::Sum("amount".to_string())],
|
|
vec![
|
|
"sale_id".to_string(),
|
|
"product".to_string(),
|
|
"amount".to_string(),
|
|
],
|
|
);
|
|
agg.set_tracker(tracker.clone());
|
|
|
|
// Initial sales
|
|
let mut initial = Delta::new();
|
|
initial.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text(Text::new("Widget")),
|
|
Value::Integer(100),
|
|
],
|
|
);
|
|
initial.insert(
|
|
2,
|
|
vec![
|
|
Value::Integer(2),
|
|
Value::Text(Text::new("Gadget")),
|
|
Value::Integer(200),
|
|
],
|
|
);
|
|
initial.insert(
|
|
3,
|
|
vec![
|
|
Value::Integer(3),
|
|
Value::Text(Text::new("Widget")),
|
|
Value::Integer(150),
|
|
],
|
|
);
|
|
agg.initialize(initial);
|
|
|
|
// Check initial state: Widget=250, Gadget=200
|
|
let state = agg.get_current_state();
|
|
let widget_sum = state
|
|
.changes
|
|
.iter()
|
|
.find(|(c, _)| c.values[0] == Value::Text(Text::new("Widget")))
|
|
.map(|(c, _)| c)
|
|
.unwrap();
|
|
assert_eq!(widget_sum.values[1], Value::Integer(250));
|
|
|
|
// Reset tracker
|
|
tracker.lock().unwrap().aggregation_updates = 0;
|
|
|
|
// Add sale of 50 for Widget
|
|
let mut delta = Delta::new();
|
|
delta.insert(
|
|
4,
|
|
vec![
|
|
Value::Integer(4),
|
|
Value::Text(Text::new("Widget")),
|
|
Value::Integer(50),
|
|
],
|
|
);
|
|
|
|
let output = agg.process_delta(delta);
|
|
|
|
// Should only update Widget group
|
|
assert_eq!(tracker.lock().unwrap().aggregation_updates, 1);
|
|
assert_eq!(output.len(), 1);
|
|
|
|
// Widget should now be 300 (250 + 50)
|
|
assert!(!output.changes.is_empty());
|
|
let (change, _weight) = &output.changes[0];
|
|
assert_eq!(change.values[0], Value::Text(Text::new("Widget")));
|
|
assert_eq!(change.values[1], Value::Integer(300));
|
|
}
|
|
|
|
#[test]
|
|
fn test_count_and_sum_together() {
|
|
// Test the example from DBSP_ROADMAP: COUNT(*) and SUM(amount) GROUP BY user_id
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["user_id".to_string()],
|
|
vec![
|
|
AggregateFunction::Count,
|
|
AggregateFunction::Sum("amount".to_string()),
|
|
],
|
|
vec![
|
|
"order_id".to_string(),
|
|
"user_id".to_string(),
|
|
"amount".to_string(),
|
|
],
|
|
);
|
|
|
|
// Initial orders
|
|
let mut initial = Delta::new();
|
|
initial.insert(
|
|
1,
|
|
vec![Value::Integer(1), Value::Integer(1), Value::Integer(100)],
|
|
);
|
|
initial.insert(
|
|
2,
|
|
vec![Value::Integer(2), Value::Integer(1), Value::Integer(200)],
|
|
);
|
|
initial.insert(
|
|
3,
|
|
vec![Value::Integer(3), Value::Integer(2), Value::Integer(150)],
|
|
);
|
|
agg.initialize(initial);
|
|
|
|
// Check initial state
|
|
// User 1: count=2, sum=300
|
|
// User 2: count=1, sum=150
|
|
let state = agg.get_current_state();
|
|
assert_eq!(state.changes.len(), 2);
|
|
|
|
let user1 = state
|
|
.changes
|
|
.iter()
|
|
.find(|(c, _)| c.values[0] == Value::Integer(1))
|
|
.map(|(c, _)| c)
|
|
.unwrap();
|
|
assert_eq!(user1.values[1], Value::Integer(2)); // count
|
|
assert_eq!(user1.values[2], Value::Integer(300)); // sum
|
|
|
|
let user2 = state
|
|
.changes
|
|
.iter()
|
|
.find(|(c, _)| c.values[0] == Value::Integer(2))
|
|
.map(|(c, _)| c)
|
|
.unwrap();
|
|
assert_eq!(user2.values[1], Value::Integer(1)); // count
|
|
assert_eq!(user2.values[2], Value::Integer(150)); // sum
|
|
|
|
// Add order for user 1
|
|
let mut delta = Delta::new();
|
|
delta.insert(
|
|
4,
|
|
vec![Value::Integer(4), Value::Integer(1), Value::Integer(50)],
|
|
);
|
|
let output = agg.process_delta(delta);
|
|
|
|
// Should only update user 1
|
|
assert_eq!(output.len(), 1);
|
|
assert!(!output.changes.is_empty());
|
|
let (change, _weight) = &output.changes[0];
|
|
assert_eq!(change.values[0], Value::Integer(1)); // user_id
|
|
assert_eq!(change.values[1], Value::Integer(3)); // count: 2 + 1
|
|
assert_eq!(change.values[2], Value::Integer(350)); // sum: 300 + 50
|
|
}
|
|
|
|
#[test]
|
|
fn test_avg_maintains_sum_and_count() {
|
|
// Test AVG aggregation
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["category".to_string()],
|
|
vec![AggregateFunction::Avg("value".to_string())],
|
|
vec![
|
|
"id".to_string(),
|
|
"category".to_string(),
|
|
"value".to_string(),
|
|
],
|
|
);
|
|
|
|
// Initial data
|
|
let mut initial = Delta::new();
|
|
initial.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(10),
|
|
],
|
|
);
|
|
initial.insert(
|
|
2,
|
|
vec![
|
|
Value::Integer(2),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(20),
|
|
],
|
|
);
|
|
initial.insert(
|
|
3,
|
|
vec![
|
|
Value::Integer(3),
|
|
Value::Text(Text::new("B")),
|
|
Value::Integer(30),
|
|
],
|
|
);
|
|
agg.initialize(initial);
|
|
|
|
// Check initial averages
|
|
// Category A: avg = (10 + 20) / 2 = 15
|
|
// Category B: avg = 30 / 1 = 30
|
|
let state = agg.get_current_state();
|
|
let cat_a = state
|
|
.changes
|
|
.iter()
|
|
.find(|(c, _)| c.values[0] == Value::Text(Text::new("A")))
|
|
.map(|(c, _)| c)
|
|
.unwrap();
|
|
assert_eq!(cat_a.values[1], Value::Float(15.0));
|
|
|
|
let cat_b = state
|
|
.changes
|
|
.iter()
|
|
.find(|(c, _)| c.values[0] == Value::Text(Text::new("B")))
|
|
.map(|(c, _)| c)
|
|
.unwrap();
|
|
assert_eq!(cat_b.values[1], Value::Float(30.0));
|
|
|
|
// Add value to category A
|
|
let mut delta = Delta::new();
|
|
delta.insert(
|
|
4,
|
|
vec![
|
|
Value::Integer(4),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(30),
|
|
],
|
|
);
|
|
let output = agg.process_delta(delta);
|
|
|
|
// Category A avg should now be (10 + 20 + 30) / 3 = 20
|
|
assert!(!output.changes.is_empty());
|
|
let (change, _weight) = &output.changes[0];
|
|
assert_eq!(change.values[0], Value::Text(Text::new("A")));
|
|
assert_eq!(change.values[1], Value::Float(20.0));
|
|
}
|
|
|
|
#[test]
|
|
fn test_delete_updates_aggregates() {
|
|
// Test that deletes (negative weights) properly update aggregates
|
|
let mut agg = AggregateOperator::new(
|
|
vec!["category".to_string()],
|
|
vec![
|
|
AggregateFunction::Count,
|
|
AggregateFunction::Sum("value".to_string()),
|
|
],
|
|
vec![
|
|
"id".to_string(),
|
|
"category".to_string(),
|
|
"value".to_string(),
|
|
],
|
|
);
|
|
|
|
// Initial data
|
|
let mut initial = Delta::new();
|
|
initial.insert(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(100),
|
|
],
|
|
);
|
|
initial.insert(
|
|
2,
|
|
vec![
|
|
Value::Integer(2),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(200),
|
|
],
|
|
);
|
|
agg.initialize(initial);
|
|
|
|
// Check initial state: count=2, sum=300
|
|
let state = agg.get_current_state();
|
|
assert!(!state.changes.is_empty());
|
|
let (row, _weight) = &state.changes[0];
|
|
assert_eq!(row.values[1], Value::Integer(2)); // count
|
|
assert_eq!(row.values[2], Value::Integer(300)); // sum
|
|
|
|
// Delete one row
|
|
let mut delta = Delta::new();
|
|
delta.delete(
|
|
1,
|
|
vec![
|
|
Value::Integer(1),
|
|
Value::Text(Text::new("A")),
|
|
Value::Integer(100),
|
|
],
|
|
);
|
|
|
|
let output = agg.process_delta(delta);
|
|
|
|
// Should update to count=1, sum=200
|
|
assert!(!output.changes.is_empty());
|
|
let (change_row, _weight) = &output.changes[0];
|
|
assert_eq!(change_row.values[0], Value::Text(Text::new("A")));
|
|
assert_eq!(change_row.values[1], Value::Integer(1)); // count: 2 - 1
|
|
assert_eq!(change_row.values[2], Value::Integer(200)); // sum: 300 - 100
|
|
}
|
|
|
|
#[test]
|
|
fn test_filter_operator_rowid_update() {
|
|
// When a row's rowid changes (e.g., UPDATE t SET a=1 WHERE a=3 on INTEGER PRIMARY KEY),
|
|
// the operator should properly consolidate the state
|
|
|
|
let mut filter = FilterOperator::new(
|
|
FilterPredicate::GreaterThan {
|
|
column: "b".to_string(),
|
|
value: Value::Integer(2),
|
|
},
|
|
vec!["a".to_string(), "b".to_string()],
|
|
);
|
|
|
|
// Initialize with a row (rowid=3, values=[3, 3])
|
|
let mut init_data = Delta::new();
|
|
init_data.insert(3, vec![Value::Integer(3), Value::Integer(3)]);
|
|
filter.initialize(init_data);
|
|
|
|
// Check initial state
|
|
let state = filter.get_current_state();
|
|
assert_eq!(state.changes.len(), 1);
|
|
assert_eq!(state.changes[0].0.rowid, 3);
|
|
assert_eq!(
|
|
state.changes[0].0.values,
|
|
vec![Value::Integer(3), Value::Integer(3)]
|
|
);
|
|
|
|
// Simulate an UPDATE that changes rowid from 3 to 1
|
|
// This is sent as: delete(3) + insert(1)
|
|
let mut update_delta = Delta::new();
|
|
update_delta.delete(3, vec![Value::Integer(3), Value::Integer(3)]);
|
|
update_delta.insert(1, vec![Value::Integer(1), Value::Integer(3)]);
|
|
|
|
let output = filter.process_delta(update_delta);
|
|
|
|
// The output delta should have both changes (both pass the filter b > 2)
|
|
assert_eq!(output.changes.len(), 2);
|
|
assert_eq!(output.changes[0].1, -1); // delete weight
|
|
assert_eq!(output.changes[1].1, 1); // insert weight
|
|
|
|
// The current state should be consolidated to only have rows with positive weight
|
|
let final_state = filter.get_current_state();
|
|
|
|
// After consolidation, we should have only one row with rowid=1
|
|
assert_eq!(
|
|
final_state.changes.len(),
|
|
1,
|
|
"State should be consolidated to have only one row"
|
|
);
|
|
assert_eq!(final_state.changes[0].0.rowid, 1);
|
|
assert_eq!(
|
|
final_state.changes[0].0.values,
|
|
vec![Value::Integer(1), Value::Integer(3)]
|
|
);
|
|
assert_eq!(final_state.changes[0].1, 1); // positive weight
|
|
}
|
|
}
|