Merge 'Fix materialized views where clause issues' from Glauber Costa

This PR fixes issues on WHERE clauses for Materialized Views.
Ultimately, the problems stem from how we had a positive list of what is
a "complex expression" (needs projection), defaulting everything else to
simple (doesn't need projection). I guess it is fair to call me naive.
It should obviously be the other way. Most expressions need computation
=)

Closes #3396
This commit is contained in:
Pekka Enberg
2025-09-27 14:27:06 +03:00
committed by GitHub
3 changed files with 390 additions and 22 deletions

View File

@@ -1638,6 +1638,106 @@ impl DbspCompiler {
},
})
}
LogicalExpr::Between {
expr,
low,
high,
negated,
} => {
// BETWEEN x AND y is rewritten as (expr >= x AND expr <= y)
// NOT BETWEEN x AND y is rewritten as (expr < x OR expr > y)
let expr_ast = Self::logical_to_ast_expr_with_schema(expr, schema)?;
let low_ast = Self::logical_to_ast_expr_with_schema(low, schema)?;
let high_ast = Self::logical_to_ast_expr_with_schema(high, schema)?;
if *negated {
// NOT BETWEEN: (expr < low OR expr > high)
Ok(ast::Expr::Binary(
Box::new(ast::Expr::Binary(
Box::new(expr_ast.clone()),
ast::Operator::Less,
Box::new(low_ast),
)),
ast::Operator::Or,
Box::new(ast::Expr::Binary(
Box::new(expr_ast),
ast::Operator::Greater,
Box::new(high_ast),
)),
))
} else {
// BETWEEN: (expr >= low AND expr <= high)
Ok(ast::Expr::Binary(
Box::new(ast::Expr::Binary(
Box::new(expr_ast.clone()),
ast::Operator::GreaterEquals,
Box::new(low_ast),
)),
ast::Operator::And,
Box::new(ast::Expr::Binary(
Box::new(expr_ast),
ast::Operator::LessEquals,
Box::new(high_ast),
)),
))
}
}
LogicalExpr::InList {
expr,
list,
negated,
} => {
let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
let values: Result<Vec<_>> = list
.iter()
.map(|item| {
let ast_expr = Self::logical_to_ast_expr_with_schema(item, schema)?;
Ok(Box::new(ast_expr))
})
.collect();
Ok(ast::Expr::InList {
lhs,
not: *negated,
rhs: values?,
})
}
LogicalExpr::Like {
expr,
pattern,
escape,
negated,
} => {
let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
let rhs = Box::new(Self::logical_to_ast_expr_with_schema(pattern, schema)?);
let escape_expr = escape
.map(|c| Box::new(ast::Expr::Literal(ast::Literal::String(c.to_string()))));
Ok(ast::Expr::Like {
lhs,
not: *negated,
op: ast::LikeOperator::Like,
rhs,
escape: escape_expr,
})
}
LogicalExpr::IsNull { expr, negated } => {
let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
if *negated {
// IS NOT NULL needs to be represented differently
Ok(ast::Expr::Unary(
ast::UnaryOperator::Not,
Box::new(ast::Expr::IsNull(inner_expr)),
))
} else {
Ok(ast::Expr::IsNull(inner_expr))
}
}
LogicalExpr::Cast { expr, type_name } => {
let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
Ok(ast::Expr::Cast {
expr: inner_expr,
type_name: type_name.clone(),
})
}
_ => Err(LimboError::ParseError(format!(
"Cannot convert LogicalExpr to AST Expr: {expr:?}"
))),
@@ -1648,21 +1748,55 @@ impl DbspCompiler {
fn predicate_needs_projection(expr: &LogicalExpr) -> bool {
match expr {
LogicalExpr::BinaryExpr { left, op, right } => {
// Only these specific simple patterns DON'T need projection
match (left.as_ref(), right.as_ref()) {
// Simple column to literal - OK
(LogicalExpr::Column(_), LogicalExpr::Literal(_)) => false,
// Simple column to column - OK
(LogicalExpr::Column(_), LogicalExpr::Column(_)) => false,
// Simple column to literal comparisons
(LogicalExpr::Column(_), LogicalExpr::Literal(_))
if matches!(
op,
BinaryOperator::Equals
| BinaryOperator::NotEquals
| BinaryOperator::Greater
| BinaryOperator::GreaterEquals
| BinaryOperator::Less
| BinaryOperator::LessEquals
) =>
{
false
}
// Simple column to column comparisons
(LogicalExpr::Column(_), LogicalExpr::Column(_))
if matches!(
op,
BinaryOperator::Equals
| BinaryOperator::NotEquals
| BinaryOperator::Greater
| BinaryOperator::GreaterEquals
| BinaryOperator::Less
| BinaryOperator::LessEquals
) =>
{
false
}
// AND/OR of simple expressions - check recursively
_ if matches!(op, BinaryOperator::And | BinaryOperator::Or) => {
Self::predicate_needs_projection(left)
|| Self::predicate_needs_projection(right)
}
// Any other pattern needs projection
// Everything else needs projection
_ => true,
}
}
_ => false,
// These simple cases don't need projection
LogicalExpr::Column(_) | LogicalExpr::Literal(_) => false,
// Default: assume we need projection for safety
// This includes: Between, InList, Like, IsNull, Cast, ScalarFunction, Case,
// InSubquery, Exists, ScalarSubquery, and any future expression types
_ => true,
}
}
@@ -1684,7 +1818,7 @@ impl DbspCompiler {
return Ok(expr.clone());
}
// For expressions like (age * 2) > 30, we want to extract (age * 2)
// For comparison expressions, check if we need to extract a subexpression
if matches!(
op,
BinaryOperator::Greater
@@ -1694,17 +1828,30 @@ impl DbspCompiler {
| BinaryOperator::Equals
| BinaryOperator::NotEquals
) {
// Return the left side if it's not a simple column
if !matches!(left.as_ref(), LogicalExpr::Column(_)) {
Ok((**left).clone())
} else {
// Must be the whole expression then
Ok(expr.clone())
// If the left side is complex (not a column), extract it
if !matches!(
left.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
) {
return Ok((**left).clone());
}
// If the right side is complex (not a literal), extract it
if !matches!(
right.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
) {
return Ok((**right).clone());
}
// Both sides are simple but the expression as a whole might need projection
// (e.g., for arithmetic operations)
Ok(expr.clone())
} else {
// For other binary operators (arithmetic, etc.), return the whole expression
Ok(expr.clone())
}
}
// For non-binary expressions (BETWEEN, IN, LIKE, functions, etc.),
// we need to compute the whole expression as a boolean
_ => Ok(expr.clone()),
}
}
@@ -1729,20 +1876,61 @@ impl DbspCompiler {
// Check if this is a complex comparison that needs replacement
if Self::predicate_needs_projection(expr) {
// Replace the complex expression (left side) with the temp column
return Ok(LogicalExpr::BinaryExpr {
left: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
op: *op,
right: right.clone(),
});
// Determine which side is complex and needs replacement
let left_is_simple = matches!(
left.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
);
let right_is_simple = matches!(
right.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
);
if !left_is_simple {
// Left side is complex - replace it with temp column
return Ok(LogicalExpr::BinaryExpr {
left: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
op: *op,
right: right.clone(),
});
} else if !right_is_simple {
// Right side is complex - replace it with temp column
return Ok(LogicalExpr::BinaryExpr {
left: left.clone(),
op: *op,
right: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
});
} else {
// Both sides are simple, but the expression as a whole needs projection
// This shouldn't happen normally, but keep the expression as-is
return Ok(expr.clone());
}
}
// Simple comparison - keep as is
Ok(expr.clone())
}
// For non-binary expressions that need projection (BETWEEN, IN, etc.),
// replace the whole expression with a column reference to the temp column
// The temp column will hold the boolean result of evaluating the expression
_ if Self::predicate_needs_projection(expr) => {
// The complex expression result is in the temp column
// We need to check if it's true (non-zero)
Ok(LogicalExpr::BinaryExpr {
left: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
op: BinaryOperator::Equals,
right: Box::new(LogicalExpr::Literal(Value::Integer(1))), // true = 1 in SQL
})
}
_ => Ok(expr.clone()),
}
}

View File

@@ -346,6 +346,11 @@ pub enum LogicalExpr {
escape: Option<char>,
negated: bool,
},
/// CAST expression
Cast {
expr: Box<LogicalExpr>,
type_name: Option<ast::Type>,
},
}
/// Column reference
@@ -1774,6 +1779,14 @@ impl<'a> LogicalPlanBuilder<'a> {
self.build_expr(&exprs[0], _schema)
}
ast::Expr::Cast { expr, type_name } => {
let inner = self.build_expr(expr, _schema)?;
Ok(LogicalExpr::Cast {
expr: Box::new(inner),
type_name: type_name.clone(),
})
}
_ => Err(LimboError::ParseError(format!(
"Unsupported expression type in logical plan: {expr:?}"
))),

View File

@@ -1428,3 +1428,170 @@ do_execsql_test_on_specific_db {:memory:} matview-union-all-text-filter {
2
3
4}
# Test BETWEEN in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-between-filter {
CREATE TABLE products(id INTEGER PRIMARY KEY, name TEXT, price INTEGER);
INSERT INTO products VALUES
(1, 'Cheap', 10),
(2, 'Mid1', 50),
(3, 'Mid2', 75),
(4, 'Expensive', 150);
CREATE MATERIALIZED VIEW mid_range AS
SELECT id, name, price FROM products WHERE price BETWEEN 40 AND 100;
SELECT * FROM mid_range ORDER BY id;
} {2|Mid1|50
3|Mid2|75}
# Test IN list in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-in-filter {
CREATE TABLE orders(id INTEGER PRIMARY KEY, customer TEXT, status TEXT);
INSERT INTO orders VALUES
(1, 'Alice', 'shipped'),
(2, 'Bob', 'pending'),
(3, 'Charlie', 'delivered'),
(4, 'David', 'cancelled'),
(5, 'Eve', 'shipped');
CREATE MATERIALIZED VIEW active_orders AS
SELECT id, customer FROM orders WHERE status IN ('pending', 'shipped');
SELECT * FROM active_orders ORDER BY id;
} {1|Alice
2|Bob
5|Eve}
# Test CAST with TEXT in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-cast-text {
CREATE TABLE records(id INTEGER PRIMARY KEY, code TEXT);
INSERT INTO records VALUES
(1, 'A100'),
(2, 'B200'),
(3, 'A300');
CREATE MATERIALIZED VIEW filtered AS
SELECT id FROM records WHERE code < CAST('B' AS TEXT);
SELECT * FROM filtered ORDER BY id;
} {1
3}
# Test BETWEEN and IN together
do_execsql_test_on_specific_db {:memory:} matview-between-and-in {
CREATE TABLE inventory(id INTEGER PRIMARY KEY, product TEXT, quantity INTEGER, location TEXT);
INSERT INTO inventory VALUES
(1, 'Widget', 50, 'WH1'),
(2, 'Gadget', 30, 'WH2'),
(3, 'Tool', 80, 'WH1'),
(4, 'Part', 15, 'WH3'),
(5, 'Device', 45, 'WH2');
CREATE MATERIALIZED VIEW wh1_wh2_medium_stock AS
SELECT id, product, quantity
FROM inventory
WHERE quantity BETWEEN 25 AND 60
AND location IN ('WH1', 'WH2');
SELECT * FROM wh1_wh2_medium_stock ORDER BY id;
} {1|Widget|50
2|Gadget|30
5|Device|45}
# Test complex OR conditions with IN
do_execsql_test_on_specific_db {:memory:} matview-complex-or-with-in {
CREATE TABLE shipments(id INTEGER PRIMARY KEY, size INTEGER, mode TEXT, priority TEXT);
INSERT INTO shipments VALUES
(1, 5, 'AIR', 'high'),
(2, 15, 'TRUCK', 'normal'),
(3, 8, 'AIR', 'normal'),
(4, 20, 'SHIP', 'low'),
(5, 12, 'AIR_REG', 'high');
CREATE MATERIALIZED VIEW express_shipments AS
SELECT id, size, mode
FROM shipments
WHERE (size BETWEEN 5 AND 10 AND mode IN ('AIR', 'AIR_REG'))
OR priority = 'high';
SELECT * FROM express_shipments ORDER BY id;
} {1|5|AIR
3|8|AIR
5|12|AIR_REG}
# Test join with BETWEEN in WHERE
do_execsql_test_on_specific_db {:memory:} matview-join-with-between {
CREATE TABLE parts(id INTEGER PRIMARY KEY, size INTEGER);
CREATE TABLE suppliers(id INTEGER PRIMARY KEY, part_id INTEGER, price INTEGER);
INSERT INTO parts VALUES (1, 5), (2, 10), (3, 20);
INSERT INTO suppliers VALUES (1, 1, 100), (2, 2, 150), (3, 3, 200);
CREATE MATERIALIZED VIEW medium_parts AS
SELECT p.id, p.size, s.price
FROM parts p
JOIN suppliers s ON p.id = s.part_id
WHERE p.size BETWEEN 8 AND 15;
SELECT * FROM medium_parts ORDER BY id;
} {2|10|150}
# Test join with IN in WHERE
do_execsql_test_on_specific_db {:memory:} matview-join-with-in {
CREATE TABLE customers(id INTEGER PRIMARY KEY, region TEXT);
CREATE TABLE orders(id INTEGER PRIMARY KEY, customer_id INTEGER, amount INTEGER);
INSERT INTO customers VALUES (1, 'USA'), (2, 'Canada'), (3, 'UK'), (4, 'Mexico');
INSERT INTO orders VALUES (1, 1, 100), (2, 2, 200), (3, 3, 150), (4, 4, 300);
CREATE MATERIALIZED VIEW north_america_orders AS
SELECT c.region, o.amount
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE c.region IN ('USA', 'Canada', 'Mexico');
SELECT * FROM north_america_orders ORDER BY region, amount;
} {Canada|200
Mexico|300
USA|100}
# Test incremental maintenance with BETWEEN
do_execsql_test_on_specific_db {:memory:} matview-between-incremental {
CREATE TABLE items(id INTEGER PRIMARY KEY, value INTEGER);
INSERT INTO items VALUES (1, 5), (2, 15);
CREATE MATERIALIZED VIEW mid_values AS
SELECT id, value FROM items WHERE value BETWEEN 10 AND 20;
SELECT COUNT(*) FROM mid_values;
INSERT INTO items VALUES (3, 12), (4, 25);
SELECT * FROM mid_values ORDER BY id;
UPDATE items SET value = 30 WHERE id = 2;
SELECT * FROM mid_values ORDER BY id;
} {1
2|15
3|12
3|12}
# Test incremental maintenance with IN
do_execsql_test_on_specific_db {:memory:} matview-in-incremental {
CREATE TABLE logs(id INTEGER PRIMARY KEY, level TEXT, message TEXT);
INSERT INTO logs VALUES (1, 'INFO', 'start'), (2, 'DEBUG', 'test');
CREATE MATERIALIZED VIEW important_logs AS
SELECT id, level, message FROM logs WHERE level IN ('ERROR', 'WARN', 'INFO');
SELECT COUNT(*) FROM important_logs;
INSERT INTO logs VALUES (3, 'ERROR', 'fail'), (4, 'TRACE', 'detail');
SELECT * FROM important_logs ORDER BY id;
DELETE FROM logs WHERE id = 1;
SELECT * FROM important_logs ORDER BY id;
} {1
1|INFO|start
3|ERROR|fail
3|ERROR|fail}