TableMask: take tables referenced in subqueries into account

This influences valid potential join orders.
This commit is contained in:
Jussi Saurio
2025-10-27 15:58:49 +02:00
parent 59363a1be3
commit f288dfd3d0
5 changed files with 134 additions and 47 deletions

View File

@@ -9,7 +9,7 @@ use crate::{
translate::{
collate::get_collseq_from_expr,
expr::as_binary_components,
plan::{JoinOrderMember, TableReferences, WhereTerm},
plan::{JoinOrderMember, NonFromClauseSubquery, TableReferences, WhereTerm},
planner::{table_mask_from_expr, TableMask},
},
util::exprs_are_equivalent,
@@ -188,6 +188,7 @@ pub fn constraints_from_where_clause(
where_clause: &[WhereTerm],
table_references: &TableReferences,
available_indexes: &HashMap<String, VecDeque<Arc<Index>>>,
subqueries: &[NonFromClauseSubquery],
) -> Result<Vec<TableConstraints>> {
let mut constraints = Vec::new();
@@ -241,7 +242,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
table_col_pos: *column,
lhs_mask: table_mask_from_expr(rhs, table_references)?,
lhs_mask: table_mask_from_expr(rhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator),
usable: true,
});
@@ -258,7 +259,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Rhs),
operator,
table_col_pos: rowid_alias_column.unwrap(),
lhs_mask: table_mask_from_expr(rhs, table_references)?,
lhs_mask: table_mask_from_expr(rhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator),
usable: true,
});
@@ -274,7 +275,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
table_col_pos: *column,
lhs_mask: table_mask_from_expr(lhs, table_references)?,
lhs_mask: table_mask_from_expr(lhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator),
usable: true,
});
@@ -288,7 +289,7 @@ pub fn constraints_from_where_clause(
where_clause_pos: (i, BinaryExprSide::Lhs),
operator: opposite_cmp_op(operator),
table_col_pos: rowid_alias_column.unwrap(),
lhs_mask: table_mask_from_expr(lhs, table_references)?,
lhs_mask: table_mask_from_expr(lhs, table_references, subqueries)?,
selectivity: estimate_selectivity(table_column, operator),
usable: true,
});

View File

@@ -540,9 +540,13 @@ mod tests {
let where_clause = vec![];
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order(
table_references.joined_tables(),
@@ -569,9 +573,13 @@ mod tests {
let where_clause = vec![];
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * from test_table
// expecting best_best_plan() not to do any work due to empty where clause.
@@ -610,9 +618,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let available_indexes = HashMap::new();
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * FROM test_table WHERE id = 42
// expecting a RowidEq access method because id is a rowid alias.
@@ -678,9 +690,13 @@ mod tests {
});
available_indexes.insert("test_table".to_string(), VecDeque::from([index]));
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
// SELECT * FROM test_table WHERE id = 42
// expecting an IndexScan access method because id is a primary key with an index
let result = compute_best_join_order(
@@ -757,9 +773,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order(
table_references.joined_tables(),
@@ -932,9 +952,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order(
table_references.joined_tables(),
@@ -1041,9 +1065,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let available_indexes = HashMap::new();
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
@@ -1147,9 +1175,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let available_indexes = HashMap::new();
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let result = compute_best_join_order(
table_references.joined_tables(),
@@ -1231,9 +1263,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
// Run the optimizer
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
@@ -1352,9 +1388,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
@@ -1460,9 +1500,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),
@@ -1586,9 +1630,13 @@ mod tests {
let table_references = TableReferences::new(joined_tables, vec![]);
let access_methods_arena = RefCell::new(Vec::new());
let table_constraints =
constraints_from_where_clause(&where_clause, &table_references, &available_indexes)
.unwrap();
let table_constraints = constraints_from_where_clause(
&where_clause,
&table_references,
&available_indexes,
&[],
)
.unwrap();
let BestJoinOrderResult { best_plan, .. } = compute_best_join_order(
table_references.joined_tables(),

View File

@@ -23,8 +23,8 @@ use crate::{
constraints::{RangeConstraintRef, SeekRangeConstraint, TableConstraints},
},
plan::{
ColumnUsedMask, OuterQueryReference, QueryDestination, ResultSetColumn, Scan,
SeekKeyComponent,
ColumnUsedMask, NonFromClauseSubquery, OuterQueryReference, QueryDestination,
ResultSetColumn, Scan, SeekKeyComponent,
},
},
types::SeekOp,
@@ -89,6 +89,7 @@ pub fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()
&mut plan.where_clause,
&mut plan.order_by,
&mut plan.group_by,
&plan.non_from_clause_subqueries,
)?;
if let Some(best_join_order) = best_join_order {
@@ -114,6 +115,7 @@ fn optimize_delete_plan(plan: &mut DeletePlan, schema: &Schema) -> Result<()> {
&mut plan.where_clause,
&mut plan.order_by,
&mut None,
&[],
)?;
Ok(())
@@ -138,6 +140,7 @@ fn optimize_update_plan(
&mut plan.where_clause,
&mut plan.order_by,
&mut None,
&[],
)?;
let table_ref = &mut plan.table_references.joined_tables_mut()[0];
@@ -337,6 +340,7 @@ fn optimize_table_access(
where_clause: &mut [WhereTerm],
order_by: &mut Vec<(Box<ast::Expr>, SortOrder)>,
group_by: &mut Option<GroupBy>,
subqueries: &[NonFromClauseSubquery],
) -> Result<Option<Vec<JoinOrderMember>>> {
if table_references.joined_tables().len() > TableReferences::MAX_JOINED_TABLES {
crate::bail_parse_error!(
@@ -346,8 +350,12 @@ fn optimize_table_access(
}
let access_methods_arena = RefCell::new(Vec::new());
let maybe_order_target = compute_order_target(order_by, group_by.as_mut());
let constraints_per_table =
constraints_from_where_clause(where_clause, table_references, available_indexes)?;
let constraints_per_table = constraints_from_where_clause(
where_clause,
table_references,
available_indexes,
subqueries,
)?;
// Currently the expressions we evaluate as constraints are binary expressions that will never be true for a NULL operand.
// If there are any constraints on the right hand side table of an outer join that are not part of the outer join condition,
@@ -806,7 +814,9 @@ impl Optimizable for ast::Expr {
return true;
}
let table_ref = tables.find_joined_table_by_internal_id(*table).unwrap();
let (_, table_ref) = tables
.find_table_by_internal_id(*table)
.expect("table not found");
let columns = table_ref.columns();
let column = &columns[*column];
column.primary_key || column.notnull

View File

@@ -891,6 +891,7 @@ impl TableMask {
pub fn table_mask_from_expr(
top_level_expr: &Expr,
table_references: &TableReferences,
subqueries: &[NonFromClauseSubquery],
) -> Result<TableMask> {
let mut mask = TableMask::new();
walk_expr(top_level_expr, &mut |expr: &Expr| -> Result<WalkControl> {
@@ -912,6 +913,34 @@ pub fn table_mask_from_expr(
crate::bail_parse_error!("table not found in joined_tables");
}
}
// Given something like WHERE t.a = (SELECT ...), we can only evaluate that expression
// when all both table 't' and all outer scope tables referenced by the subquery OR its nested subqueries are in scope.
// Hence, the tables referenced in subqueries must be added to the table mask.
Expr::SubqueryResult { subquery_id, .. } => {
let Some(subquery) = subqueries.iter().find(|s| s.internal_id == *subquery_id)
else {
crate::bail_parse_error!("subquery not found");
};
let SubqueryState::Unevaluated { plan } = &subquery.state else {
crate::bail_parse_error!("subquery has already been evaluated");
};
let used_outer_query_refs = plan
.as_ref()
.unwrap()
.table_references
.outer_query_refs()
.iter()
.filter(|t| t.is_used());
for outer_query_ref in used_outer_query_refs {
if let Some(table_idx) = table_references
.joined_tables()
.iter()
.position(|t| t.internal_id == outer_query_ref.internal_id)
{
mask.add_table(table_idx);
}
}
}
_ => {}
}
Ok(WalkControl::Continue)

View File

@@ -503,7 +503,6 @@ fn prepare_one_select_plan(
&mut windows,
)?;
}
// TODO: support subqueries in the following positions:
// - result column of a select, e.g.: SELECT x = (SELECT ...) FROM t