Make sure complex expressions in filters go through Project

We had code for this, but the code had a fatal flaw: it tried to detect
a complex operation (an operation that needs projection), and return
false (no need for projection), for the others.

This is the exact opposite of what we should do: we should identify the
*simple* operations, and then return true (needs projection) for the
rest.

CAST is a special beast, since it is not a function, but rather, a
special opcode. Everything else above is the true just the same. But for
CAST, we have to do the extra work to capture it in the logical plan and
pass it down.

Fixes #3372
Fixes #3370
Fixes #3369
This commit is contained in:
Glauber Costa
2025-09-26 21:57:38 -03:00
parent 9cd869f660
commit 3ee97ddf36
3 changed files with 390 additions and 22 deletions

View File

@@ -1638,6 +1638,106 @@ impl DbspCompiler {
}, },
}) })
} }
LogicalExpr::Between {
expr,
low,
high,
negated,
} => {
// BETWEEN x AND y is rewritten as (expr >= x AND expr <= y)
// NOT BETWEEN x AND y is rewritten as (expr < x OR expr > y)
let expr_ast = Self::logical_to_ast_expr_with_schema(expr, schema)?;
let low_ast = Self::logical_to_ast_expr_with_schema(low, schema)?;
let high_ast = Self::logical_to_ast_expr_with_schema(high, schema)?;
if *negated {
// NOT BETWEEN: (expr < low OR expr > high)
Ok(ast::Expr::Binary(
Box::new(ast::Expr::Binary(
Box::new(expr_ast.clone()),
ast::Operator::Less,
Box::new(low_ast),
)),
ast::Operator::Or,
Box::new(ast::Expr::Binary(
Box::new(expr_ast),
ast::Operator::Greater,
Box::new(high_ast),
)),
))
} else {
// BETWEEN: (expr >= low AND expr <= high)
Ok(ast::Expr::Binary(
Box::new(ast::Expr::Binary(
Box::new(expr_ast.clone()),
ast::Operator::GreaterEquals,
Box::new(low_ast),
)),
ast::Operator::And,
Box::new(ast::Expr::Binary(
Box::new(expr_ast),
ast::Operator::LessEquals,
Box::new(high_ast),
)),
))
}
}
LogicalExpr::InList {
expr,
list,
negated,
} => {
let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
let values: Result<Vec<_>> = list
.iter()
.map(|item| {
let ast_expr = Self::logical_to_ast_expr_with_schema(item, schema)?;
Ok(Box::new(ast_expr))
})
.collect();
Ok(ast::Expr::InList {
lhs,
not: *negated,
rhs: values?,
})
}
LogicalExpr::Like {
expr,
pattern,
escape,
negated,
} => {
let lhs = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
let rhs = Box::new(Self::logical_to_ast_expr_with_schema(pattern, schema)?);
let escape_expr = escape
.map(|c| Box::new(ast::Expr::Literal(ast::Literal::String(c.to_string()))));
Ok(ast::Expr::Like {
lhs,
not: *negated,
op: ast::LikeOperator::Like,
rhs,
escape: escape_expr,
})
}
LogicalExpr::IsNull { expr, negated } => {
let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
if *negated {
// IS NOT NULL needs to be represented differently
Ok(ast::Expr::Unary(
ast::UnaryOperator::Not,
Box::new(ast::Expr::IsNull(inner_expr)),
))
} else {
Ok(ast::Expr::IsNull(inner_expr))
}
}
LogicalExpr::Cast { expr, type_name } => {
let inner_expr = Box::new(Self::logical_to_ast_expr_with_schema(expr, schema)?);
Ok(ast::Expr::Cast {
expr: inner_expr,
type_name: type_name.clone(),
})
}
_ => Err(LimboError::ParseError(format!( _ => Err(LimboError::ParseError(format!(
"Cannot convert LogicalExpr to AST Expr: {expr:?}" "Cannot convert LogicalExpr to AST Expr: {expr:?}"
))), ))),
@@ -1648,21 +1748,55 @@ impl DbspCompiler {
fn predicate_needs_projection(expr: &LogicalExpr) -> bool { fn predicate_needs_projection(expr: &LogicalExpr) -> bool {
match expr { match expr {
LogicalExpr::BinaryExpr { left, op, right } => { LogicalExpr::BinaryExpr { left, op, right } => {
// Only these specific simple patterns DON'T need projection
match (left.as_ref(), right.as_ref()) { match (left.as_ref(), right.as_ref()) {
// Simple column to literal - OK // Simple column to literal comparisons
(LogicalExpr::Column(_), LogicalExpr::Literal(_)) => false, (LogicalExpr::Column(_), LogicalExpr::Literal(_))
// Simple column to column - OK if matches!(
(LogicalExpr::Column(_), LogicalExpr::Column(_)) => false, op,
BinaryOperator::Equals
| BinaryOperator::NotEquals
| BinaryOperator::Greater
| BinaryOperator::GreaterEquals
| BinaryOperator::Less
| BinaryOperator::LessEquals
) =>
{
false
}
// Simple column to column comparisons
(LogicalExpr::Column(_), LogicalExpr::Column(_))
if matches!(
op,
BinaryOperator::Equals
| BinaryOperator::NotEquals
| BinaryOperator::Greater
| BinaryOperator::GreaterEquals
| BinaryOperator::Less
| BinaryOperator::LessEquals
) =>
{
false
}
// AND/OR of simple expressions - check recursively // AND/OR of simple expressions - check recursively
_ if matches!(op, BinaryOperator::And | BinaryOperator::Or) => { _ if matches!(op, BinaryOperator::And | BinaryOperator::Or) => {
Self::predicate_needs_projection(left) Self::predicate_needs_projection(left)
|| Self::predicate_needs_projection(right) || Self::predicate_needs_projection(right)
} }
// Any other pattern needs projection
// Everything else needs projection
_ => true, _ => true,
} }
} }
_ => false, // These simple cases don't need projection
LogicalExpr::Column(_) | LogicalExpr::Literal(_) => false,
// Default: assume we need projection for safety
// This includes: Between, InList, Like, IsNull, Cast, ScalarFunction, Case,
// InSubquery, Exists, ScalarSubquery, and any future expression types
_ => true,
} }
} }
@@ -1684,7 +1818,7 @@ impl DbspCompiler {
return Ok(expr.clone()); return Ok(expr.clone());
} }
// For expressions like (age * 2) > 30, we want to extract (age * 2) // For comparison expressions, check if we need to extract a subexpression
if matches!( if matches!(
op, op,
BinaryOperator::Greater BinaryOperator::Greater
@@ -1694,17 +1828,30 @@ impl DbspCompiler {
| BinaryOperator::Equals | BinaryOperator::Equals
| BinaryOperator::NotEquals | BinaryOperator::NotEquals
) { ) {
// Return the left side if it's not a simple column // If the left side is complex (not a column), extract it
if !matches!(left.as_ref(), LogicalExpr::Column(_)) { if !matches!(
Ok((**left).clone()) left.as_ref(),
} else { LogicalExpr::Column(_) | LogicalExpr::Literal(_)
// Must be the whole expression then ) {
Ok(expr.clone()) return Ok((**left).clone());
} }
// If the right side is complex (not a literal), extract it
if !matches!(
right.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
) {
return Ok((**right).clone());
}
// Both sides are simple but the expression as a whole might need projection
// (e.g., for arithmetic operations)
Ok(expr.clone())
} else { } else {
// For other binary operators (arithmetic, etc.), return the whole expression
Ok(expr.clone()) Ok(expr.clone())
} }
} }
// For non-binary expressions (BETWEEN, IN, LIKE, functions, etc.),
// we need to compute the whole expression as a boolean
_ => Ok(expr.clone()), _ => Ok(expr.clone()),
} }
} }
@@ -1729,7 +1876,18 @@ impl DbspCompiler {
// Check if this is a complex comparison that needs replacement // Check if this is a complex comparison that needs replacement
if Self::predicate_needs_projection(expr) { if Self::predicate_needs_projection(expr) {
// Replace the complex expression (left side) with the temp column // Determine which side is complex and needs replacement
let left_is_simple = matches!(
left.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
);
let right_is_simple = matches!(
right.as_ref(),
LogicalExpr::Column(_) | LogicalExpr::Literal(_)
);
if !left_is_simple {
// Left side is complex - replace it with temp column
return Ok(LogicalExpr::BinaryExpr { return Ok(LogicalExpr::BinaryExpr {
left: Box::new(LogicalExpr::Column(Column { left: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(), name: temp_column_name.to_string(),
@@ -1738,11 +1896,41 @@ impl DbspCompiler {
op: *op, op: *op,
right: right.clone(), right: right.clone(),
}); });
} else if !right_is_simple {
// Right side is complex - replace it with temp column
return Ok(LogicalExpr::BinaryExpr {
left: left.clone(),
op: *op,
right: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
});
} else {
// Both sides are simple, but the expression as a whole needs projection
// This shouldn't happen normally, but keep the expression as-is
return Ok(expr.clone());
}
} }
// Simple comparison - keep as is // Simple comparison - keep as is
Ok(expr.clone()) Ok(expr.clone())
} }
// For non-binary expressions that need projection (BETWEEN, IN, etc.),
// replace the whole expression with a column reference to the temp column
// The temp column will hold the boolean result of evaluating the expression
_ if Self::predicate_needs_projection(expr) => {
// The complex expression result is in the temp column
// We need to check if it's true (non-zero)
Ok(LogicalExpr::BinaryExpr {
left: Box::new(LogicalExpr::Column(Column {
name: temp_column_name.to_string(),
table: None,
})),
op: BinaryOperator::Equals,
right: Box::new(LogicalExpr::Literal(Value::Integer(1))), // true = 1 in SQL
})
}
_ => Ok(expr.clone()), _ => Ok(expr.clone()),
} }
} }

View File

@@ -346,6 +346,11 @@ pub enum LogicalExpr {
escape: Option<char>, escape: Option<char>,
negated: bool, negated: bool,
}, },
/// CAST expression
Cast {
expr: Box<LogicalExpr>,
type_name: Option<ast::Type>,
},
} }
/// Column reference /// Column reference
@@ -1774,6 +1779,14 @@ impl<'a> LogicalPlanBuilder<'a> {
self.build_expr(&exprs[0], _schema) self.build_expr(&exprs[0], _schema)
} }
ast::Expr::Cast { expr, type_name } => {
let inner = self.build_expr(expr, _schema)?;
Ok(LogicalExpr::Cast {
expr: Box::new(inner),
type_name: type_name.clone(),
})
}
_ => Err(LimboError::ParseError(format!( _ => Err(LimboError::ParseError(format!(
"Unsupported expression type in logical plan: {expr:?}" "Unsupported expression type in logical plan: {expr:?}"
))), ))),

View File

@@ -1428,3 +1428,170 @@ do_execsql_test_on_specific_db {:memory:} matview-union-all-text-filter {
2 2
3 3
4} 4}
# Test BETWEEN in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-between-filter {
CREATE TABLE products(id INTEGER PRIMARY KEY, name TEXT, price INTEGER);
INSERT INTO products VALUES
(1, 'Cheap', 10),
(2, 'Mid1', 50),
(3, 'Mid2', 75),
(4, 'Expensive', 150);
CREATE MATERIALIZED VIEW mid_range AS
SELECT id, name, price FROM products WHERE price BETWEEN 40 AND 100;
SELECT * FROM mid_range ORDER BY id;
} {2|Mid1|50
3|Mid2|75}
# Test IN list in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-in-filter {
CREATE TABLE orders(id INTEGER PRIMARY KEY, customer TEXT, status TEXT);
INSERT INTO orders VALUES
(1, 'Alice', 'shipped'),
(2, 'Bob', 'pending'),
(3, 'Charlie', 'delivered'),
(4, 'David', 'cancelled'),
(5, 'Eve', 'shipped');
CREATE MATERIALIZED VIEW active_orders AS
SELECT id, customer FROM orders WHERE status IN ('pending', 'shipped');
SELECT * FROM active_orders ORDER BY id;
} {1|Alice
2|Bob
5|Eve}
# Test CAST with TEXT in WHERE clause
do_execsql_test_on_specific_db {:memory:} matview-cast-text {
CREATE TABLE records(id INTEGER PRIMARY KEY, code TEXT);
INSERT INTO records VALUES
(1, 'A100'),
(2, 'B200'),
(3, 'A300');
CREATE MATERIALIZED VIEW filtered AS
SELECT id FROM records WHERE code < CAST('B' AS TEXT);
SELECT * FROM filtered ORDER BY id;
} {1
3}
# Test BETWEEN and IN together
do_execsql_test_on_specific_db {:memory:} matview-between-and-in {
CREATE TABLE inventory(id INTEGER PRIMARY KEY, product TEXT, quantity INTEGER, location TEXT);
INSERT INTO inventory VALUES
(1, 'Widget', 50, 'WH1'),
(2, 'Gadget', 30, 'WH2'),
(3, 'Tool', 80, 'WH1'),
(4, 'Part', 15, 'WH3'),
(5, 'Device', 45, 'WH2');
CREATE MATERIALIZED VIEW wh1_wh2_medium_stock AS
SELECT id, product, quantity
FROM inventory
WHERE quantity BETWEEN 25 AND 60
AND location IN ('WH1', 'WH2');
SELECT * FROM wh1_wh2_medium_stock ORDER BY id;
} {1|Widget|50
2|Gadget|30
5|Device|45}
# Test complex OR conditions with IN
do_execsql_test_on_specific_db {:memory:} matview-complex-or-with-in {
CREATE TABLE shipments(id INTEGER PRIMARY KEY, size INTEGER, mode TEXT, priority TEXT);
INSERT INTO shipments VALUES
(1, 5, 'AIR', 'high'),
(2, 15, 'TRUCK', 'normal'),
(3, 8, 'AIR', 'normal'),
(4, 20, 'SHIP', 'low'),
(5, 12, 'AIR_REG', 'high');
CREATE MATERIALIZED VIEW express_shipments AS
SELECT id, size, mode
FROM shipments
WHERE (size BETWEEN 5 AND 10 AND mode IN ('AIR', 'AIR_REG'))
OR priority = 'high';
SELECT * FROM express_shipments ORDER BY id;
} {1|5|AIR
3|8|AIR
5|12|AIR_REG}
# Test join with BETWEEN in WHERE
do_execsql_test_on_specific_db {:memory:} matview-join-with-between {
CREATE TABLE parts(id INTEGER PRIMARY KEY, size INTEGER);
CREATE TABLE suppliers(id INTEGER PRIMARY KEY, part_id INTEGER, price INTEGER);
INSERT INTO parts VALUES (1, 5), (2, 10), (3, 20);
INSERT INTO suppliers VALUES (1, 1, 100), (2, 2, 150), (3, 3, 200);
CREATE MATERIALIZED VIEW medium_parts AS
SELECT p.id, p.size, s.price
FROM parts p
JOIN suppliers s ON p.id = s.part_id
WHERE p.size BETWEEN 8 AND 15;
SELECT * FROM medium_parts ORDER BY id;
} {2|10|150}
# Test join with IN in WHERE
do_execsql_test_on_specific_db {:memory:} matview-join-with-in {
CREATE TABLE customers(id INTEGER PRIMARY KEY, region TEXT);
CREATE TABLE orders(id INTEGER PRIMARY KEY, customer_id INTEGER, amount INTEGER);
INSERT INTO customers VALUES (1, 'USA'), (2, 'Canada'), (3, 'UK'), (4, 'Mexico');
INSERT INTO orders VALUES (1, 1, 100), (2, 2, 200), (3, 3, 150), (4, 4, 300);
CREATE MATERIALIZED VIEW north_america_orders AS
SELECT c.region, o.amount
FROM customers c
JOIN orders o ON c.id = o.customer_id
WHERE c.region IN ('USA', 'Canada', 'Mexico');
SELECT * FROM north_america_orders ORDER BY region, amount;
} {Canada|200
Mexico|300
USA|100}
# Test incremental maintenance with BETWEEN
do_execsql_test_on_specific_db {:memory:} matview-between-incremental {
CREATE TABLE items(id INTEGER PRIMARY KEY, value INTEGER);
INSERT INTO items VALUES (1, 5), (2, 15);
CREATE MATERIALIZED VIEW mid_values AS
SELECT id, value FROM items WHERE value BETWEEN 10 AND 20;
SELECT COUNT(*) FROM mid_values;
INSERT INTO items VALUES (3, 12), (4, 25);
SELECT * FROM mid_values ORDER BY id;
UPDATE items SET value = 30 WHERE id = 2;
SELECT * FROM mid_values ORDER BY id;
} {1
2|15
3|12
3|12}
# Test incremental maintenance with IN
do_execsql_test_on_specific_db {:memory:} matview-in-incremental {
CREATE TABLE logs(id INTEGER PRIMARY KEY, level TEXT, message TEXT);
INSERT INTO logs VALUES (1, 'INFO', 'start'), (2, 'DEBUG', 'test');
CREATE MATERIALIZED VIEW important_logs AS
SELECT id, level, message FROM logs WHERE level IN ('ERROR', 'WARN', 'INFO');
SELECT COUNT(*) FROM important_logs;
INSERT INTO logs VALUES (3, 'ERROR', 'fail'), (4, 'TRACE', 'detail');
SELECT * FROM important_logs ORDER BY id;
DELETE FROM logs WHERE id = 1;
SELECT * FROM important_logs ORDER BY id;
} {1
1|INFO|start
3|ERROR|fail
3|ERROR|fail}