From bed932c18669fef5c7337ef596fcbc9c18333d4d Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Fri, 29 Nov 2024 22:44:14 +0200 Subject: [PATCH 1/6] Support join USING --- COMPAT.md | 1 + core/translate/plan.rs | 61 ++++++++++++++++++++++++++++- core/translate/planner.rs | 81 +++++++++++++++++++++++++++++++-------- testing/join.test | 10 ++++- 4 files changed, 135 insertions(+), 18 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index b106b9be9..765a46a9e 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -61,6 +61,7 @@ This document describes the SQLite compatibility status of Limbo: | SELECT ... CROSS JOIN | Partial | | | SELECT ... INNER JOIN | Partial | | | SELECT ... OUTER JOIN | Partial | | +| SELECT ... JOIN USING | Yes | | | UPDATE | No | | | UPSERT | No | | | VACUUM | No | | diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8c402a6a0..b76cde8c3 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -8,7 +8,7 @@ use sqlite3_parser::ast; use crate::{ function::AggFunc, - schema::{BTreeTable, Index}, + schema::{BTreeTable, Column, Index}, Result, }; @@ -60,6 +60,64 @@ pub enum IterationDirection { Backwards, } +impl SourceOperator { + pub fn select_star(&self, out_columns: &mut Vec) { + for (table_ref, col, idx) in self.select_star_helper() { + out_columns.push(ResultSetColumn { + expr: ast::Expr::Column { + database: None, + table: table_ref.table_index, + column: idx, + is_rowid_alias: col.primary_key, + }, + contains_aggregates: false, + }); + } + } + + /// All this ceremony is required to deduplicate columns when joining with USING + fn select_star_helper(&self) -> Vec<(&BTreeTableReference, &Column, usize)> { + match self { + SourceOperator::Join { + left, right, using, .. + } => { + let mut columns = left.select_star_helper(); + + // Join columns are filtered out from the right side + // in the case of a USING join. + if let Some(using_cols) = using { + let right_columns = right.select_star_helper(); + + for (table_ref, col, idx) in right_columns { + if !using_cols + .iter() + .any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0)) + { + columns.push((table_ref, col, idx)); + } + } + } else { + columns.extend(right.select_star_helper()); + } + columns + } + SourceOperator::Scan { + table_reference, .. + } + | SourceOperator::Search { + table_reference, .. + } => table_reference + .table + .columns + .iter() + .enumerate() + .map(|(i, col)| (table_reference, col, i)) + .collect(), + SourceOperator::Nothing => Vec::new(), + } + } +} + /** A SourceOperator is a Node in the query plan that reads data from a table. */ @@ -75,6 +133,7 @@ pub enum SourceOperator { right: Box, predicates: Option>, outer: bool, + using: Option, }, // Scan operator // This operator is used to scan a table. diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 8a0d88890..aee72a954 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -281,19 +281,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - for table_reference in plan.referenced_tables.iter() { - for (idx, col) in table_reference.table.columns.iter().enumerate() { - plan.result_columns.push(ResultSetColumn { - expr: ast::Expr::Column { - database: None, // TODO: support different databases - table: table_reference.table_index, - column: idx, - is_rowid_alias: col.primary_key, - }, - contains_aggregates: false, - }); - } - } + plan.source.select_star(&mut plan.result_columns); } ast::ResultColumn::TableStar(name) => { let name_normalized = normalize_ident(name.0.as_str()); @@ -538,13 +526,14 @@ fn parse_from( let mut table_index = 1; for join in from.joins.unwrap_or_default().into_iter() { - let (right, outer, predicates) = + let (right, outer, using, predicates) = parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), predicates, outer, + using, id: operator_id_counter.get_next_id(), }; table_index += 1; @@ -559,7 +548,12 @@ fn parse_join( operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, table_index: usize, -) -> Result<(SourceOperator, bool, Option>)> { +) -> Result<( + SourceOperator, + bool, + Option, + Option>, +)> { let ast::JoinedSelectTable { operator, table, @@ -599,6 +593,7 @@ fn parse_join( _ => false, }; + let mut using = None; let mut predicates = None; if let Some(constraint) = constraint { match constraint { @@ -610,7 +605,60 @@ fn parse_join( } predicates = Some(preds); } - ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), + ast::JoinConstraint::Using(distinct_names) => { + // USING join is replaced with a list of equality predicates + let mut using_predicates = vec![]; + for distinct_name in distinct_names.iter() { + let name_normalized = normalize_ident(distinct_name.0.as_str()); + let left_table = &tables[table_index - 1]; + let right_table = &tables[table_index]; + let left_col = left_table + .table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == name_normalized); + if left_col.is_none() { + crate::bail_parse_error!( + "Column {}.{} not found", + left_table.table_identifier, + distinct_name.0 + ); + } + let right_col = right_table + .table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == name_normalized); + if right_col.is_none() { + crate::bail_parse_error!( + "Column {}.{} not found", + right_table.table_identifier, + distinct_name.0 + ); + } + let (left_col_idx, left_col) = left_col.unwrap(); + let (right_col_idx, right_col) = right_col.unwrap(); + using_predicates.push(ast::Expr::Binary( + Box::new(ast::Expr::Column { + database: None, + table: left_table.table_index, + column: left_col_idx, + is_rowid_alias: left_col.primary_key, + }), + ast::Operator::Equals, + Box::new(ast::Expr::Column { + database: None, + table: right_table.table_index, + column: right_col_idx, + is_rowid_alias: right_col.primary_key, + }), + )); + } + predicates = Some(using_predicates); + using = Some(distinct_names); + } } } @@ -622,6 +670,7 @@ fn parse_join( iter_dir: None, }, outer, + using, predicates, )) } diff --git a/testing/join.test b/testing/join.test index 2341be5a7..2bbf1b4fa 100755 --- a/testing/join.test +++ b/testing/join.test @@ -212,4 +212,12 @@ do_execsql_test join-utilizing-both-seekrowid-and-secondary-index { select u.first_name, p.name from users u join products p on u.id = p.id and u.age > 70; } {Matthew|boots Nicholas|shorts -Jamie|hat} \ No newline at end of file +Jamie|hat} + +# important difference between regular SELECT * join and a SELECT * USING join is that the join keys are deduplicated +# from the result in the USING case. +do_execsql_test join-using { + select * from users join products using (id) limit 3; +} {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|hat|79.0 +2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 +3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} \ No newline at end of file From 81b66054539aa7c661fd9ad36d08edcc51698c5a Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 30 Nov 2024 10:34:11 +0200 Subject: [PATCH 2/6] support NATURAL JOIN --- COMPAT.md | 1 + core/translate/planner.rs | 49 +++++++++++++++++++++++++++++++++------ testing/join.test | 7 ++++++ 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 765a46a9e..fe0d3315f 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -62,6 +62,7 @@ This document describes the SQLite compatibility status of Limbo: | SELECT ... INNER JOIN | Partial | | | SELECT ... OUTER JOIN | Partial | | | SELECT ... JOIN USING | Yes | | +| SELECT ... NATURAL JOIN | Yes | | | UPDATE | No | | | UPSERT | No | | | VACUUM | No | | diff --git a/core/translate/planner.rs b/core/translate/planner.rs index aee72a954..fdafc2e3c 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -582,19 +582,54 @@ fn parse_join( tables.push(table.clone()); - let outer = match operator { + let (outer, natural) = match operator { ast::JoinOperator::TypedJoin(Some(join_type)) => { - if join_type == JoinType::LEFT | JoinType::OUTER { - true - } else { - join_type == JoinType::RIGHT | JoinType::OUTER - } + let is_outer = join_type.contains(JoinType::OUTER); + let is_natural = join_type.contains(JoinType::NATURAL); + (is_outer, is_natural) } - _ => false, + _ => (false, false), }; let mut using = None; let mut predicates = None; + + if natural && constraint.is_some() { + crate::bail_parse_error!("NATURAL JOIN cannot be combined with ON or USING clause"); + } + + let constraint = if natural { + // NATURAL JOIN is first transformed into a USING join with the common columns + let left_table = &tables[table_index - 1]; + let right_table = &tables[table_index]; + let left_cols = &left_table.table.columns; + let right_cols = &right_table.table.columns; + let mut distinct_names = None; + // TODO: O(n^2) maybe not great for large tables + for left_col in left_cols.iter() { + for right_col in right_cols.iter() { + if left_col.name == right_col.name { + if distinct_names.is_none() { + distinct_names = + Some(ast::DistinctNames::new(ast::Name(left_col.name.clone()))); + } else { + distinct_names + .as_mut() + .unwrap() + .insert(ast::Name(left_col.name.clone())) + .unwrap(); + } + } + } + } + if distinct_names.is_none() { + crate::bail_parse_error!("No columns found to NATURAL join on"); + } + Some(ast::JoinConstraint::Using(distinct_names.unwrap())) + } else { + constraint + }; + if let Some(constraint) = constraint { match constraint { ast::JoinConstraint::On(expr) => { diff --git a/testing/join.test b/testing/join.test index 2bbf1b4fa..2bb3a872d 100755 --- a/testing/join.test +++ b/testing/join.test @@ -220,4 +220,11 @@ do_execsql_test join-using { select * from users join products using (id) limit 3; } {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|hat|79.0 2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 +3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} + +# NATURAL JOIN desugars to JOIN USING (common_column1, common_column2...) +do_execsql_test join-using { + select * from users natural join products limit 3; +} {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|hat|79.0 +2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} \ No newline at end of file From 4f027035de18dce045f6bb66888b3543bc592d42 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 3 Dec 2024 19:19:57 +0200 Subject: [PATCH 3/6] tests for multiple joins --- testing/join.test | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/testing/join.test b/testing/join.test index 2bb3a872d..c3d3af21c 100755 --- a/testing/join.test +++ b/testing/join.test @@ -222,9 +222,21 @@ do_execsql_test join-using { 2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} +do_execsql_test join-using-multiple { + select u.first_name, u.last_name, p.name from users u join users u2 using(id) join products p using(id) limit 3; +} {"Jamie|Foster|hat +Cindy|Salazar|cap +Tommy|Perry|shirt"} + # NATURAL JOIN desugars to JOIN USING (common_column1, common_column2...) do_execsql_test join-using { select * from users natural join products limit 3; } {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|hat|79.0 2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 -3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} \ No newline at end of file +3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} + +do_execsql_test natural-join-multiple { + select u.first_name, u.last_name, p.name from users u natural join users u2 natural join products p limit 3; +} {"Jamie|Foster|hat +Cindy|Salazar|cap +Tommy|Perry|shirt"} \ No newline at end of file From 7924f9b64d3b5338a91526a8a0a3a41700766129 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 3 Dec 2024 19:47:20 +0200 Subject: [PATCH 4/6] consider all joined tables instead of just previous in natural/using --- core/translate/planner.rs | 70 +++++++++++++++++++++++---------------- testing/join.test | 11 ++++-- 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index fdafc2e3c..7f58a1d5d 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -600,26 +600,34 @@ fn parse_join( let constraint = if natural { // NATURAL JOIN is first transformed into a USING join with the common columns - let left_table = &tables[table_index - 1]; + let left_tables = &tables[..table_index]; + assert!(!left_tables.is_empty()); let right_table = &tables[table_index]; - let left_cols = &left_table.table.columns; let right_cols = &right_table.table.columns; let mut distinct_names = None; - // TODO: O(n^2) maybe not great for large tables - for left_col in left_cols.iter() { - for right_col in right_cols.iter() { - if left_col.name == right_col.name { - if distinct_names.is_none() { - distinct_names = - Some(ast::DistinctNames::new(ast::Name(left_col.name.clone()))); - } else { - distinct_names - .as_mut() - .unwrap() - .insert(ast::Name(left_col.name.clone())) - .unwrap(); + // TODO: O(n^2) maybe not great for large tables or big multiway joins + for right_col in right_cols.iter() { + let mut found_match = false; + for left_table in left_tables.iter() { + for left_col in left_table.table.columns.iter() { + if left_col.name == right_col.name { + if distinct_names.is_none() { + distinct_names = + Some(ast::DistinctNames::new(ast::Name(left_col.name.clone()))); + } else { + distinct_names + .as_mut() + .unwrap() + .insert(ast::Name(left_col.name.clone())) + .unwrap(); + } + found_match = true; + break; } } + if found_match { + break; + } } } if distinct_names.is_none() { @@ -645,18 +653,25 @@ fn parse_join( let mut using_predicates = vec![]; for distinct_name in distinct_names.iter() { let name_normalized = normalize_ident(distinct_name.0.as_str()); - let left_table = &tables[table_index - 1]; + let left_tables = &tables[..table_index]; + assert!(!left_tables.is_empty()); let right_table = &tables[table_index]; - let left_col = left_table - .table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == name_normalized); + let mut left_col = None; + for (left_table_idx, left_table) in left_tables.iter().enumerate() { + left_col = left_table + .table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == name_normalized) + .map(|(idx, col)| (left_table_idx, idx, col)); + if left_col.is_some() { + break; + } + } if left_col.is_none() { crate::bail_parse_error!( - "Column {}.{} not found", - left_table.table_identifier, + "cannot join using column {} - column not present in all tables", distinct_name.0 ); } @@ -668,17 +683,16 @@ fn parse_join( .find(|(_, col)| col.name == name_normalized); if right_col.is_none() { crate::bail_parse_error!( - "Column {}.{} not found", - right_table.table_identifier, + "cannot join using column {} - column not present in all tables", distinct_name.0 ); } - let (left_col_idx, left_col) = left_col.unwrap(); + let (left_table_idx, left_col_idx, left_col) = left_col.unwrap(); let (right_col_idx, right_col) = right_col.unwrap(); using_predicates.push(ast::Expr::Binary( Box::new(ast::Expr::Column { database: None, - table: left_table.table_index, + table: left_table_idx, column: left_col_idx, is_rowid_alias: left_col.primary_key, }), diff --git a/testing/join.test b/testing/join.test index c3d3af21c..980ae531d 100755 --- a/testing/join.test +++ b/testing/join.test @@ -236,7 +236,14 @@ do_execsql_test join-using { 3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} do_execsql_test natural-join-multiple { - select u.first_name, u.last_name, p.name from users u natural join users u2 natural join products p limit 3; + select u.first_name, u2.last_name, p.name from users u natural join users u2 natural join products p limit 3; } {"Jamie|Foster|hat Cindy|Salazar|cap -Tommy|Perry|shirt"} \ No newline at end of file +Tommy|Perry|shirt"} + +# have to be able to join between 1st table and 3rd table as well +do_execsql_test natural-join-and-using-join { + select u.id, u2.id, p.id from users u natural join products p join users u2 using (first_name) limit 3; +} {"1|1|1 +1|1204|1 +1|1261|1"} \ No newline at end of file From 840caed2f7656f793cec8162b5ff6369acd6a80e Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 3 Dec 2024 20:26:13 +0200 Subject: [PATCH 5/6] Fix bug with multiway joins that include the same table multiple times --- core/translate/plan.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index b76cde8c3..7db65b713 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -365,7 +365,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( table_refs_mask |= 1 << tables .iter() - .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) + .position(|t| &t.table_identifier == &table_reference.table_identifier) .unwrap(); } SourceOperator::Search { @@ -374,7 +374,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( table_refs_mask |= 1 << tables .iter() - .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) + .position(|t| &t.table_identifier == &table_reference.table_identifier) .unwrap(); } SourceOperator::Nothing => {} From fe88d45e5e11ab704443220a6796fca80f647b99 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 3 Dec 2024 20:56:25 +0200 Subject: [PATCH 6/6] Add more comments to push_predicate/push_predicates --- core/translate/optimizer.rs | 59 +++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 307df356b..604f0f9e0 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -260,16 +260,24 @@ fn eliminate_constants( /** Recursively pushes predicates down the tree, as far as possible. + Where a predicate is pushed determines at which loop level it will be evaluated. + For example, in SELECT * FROM t1 JOIN t2 JOIN t3 WHERE t1.a = t2.a AND t2.b = t3.b AND t1.c = 1 + the predicate t1.c = 1 can be pushed to t1 and will be evaluated in the first (outermost) loop, + the predicate t1.a = t2.a can be pushed to t2 and will be evaluated in the second loop + while t2.b = t3.b will be evaluated in the third loop. */ fn push_predicates( operator: &mut SourceOperator, where_clause: &mut Option>, referenced_tables: &Vec, ) -> Result<()> { + // First try to push down any predicates from the WHERE clause if let Some(predicates) = where_clause { let mut i = 0; while i < predicates.len() { + // Take ownership of predicate to try pushing it down let predicate = predicates[i].take_ownership(); + // If predicate was successfully pushed (None returned), remove it from WHERE let Some(predicate) = push_predicate(operator, predicate, referenced_tables)? else { predicates.remove(i); continue; @@ -277,10 +285,12 @@ fn push_predicates( predicates[i] = predicate; i += 1; } + // Clean up empty WHERE clause if predicates.is_empty() { *where_clause = None; } } + match operator { SourceOperator::Join { left, @@ -289,6 +299,7 @@ fn push_predicates( outer, .. } => { + // Recursively push predicates down both sides of join push_predicates(left, where_clause, referenced_tables)?; push_predicates(right, where_clause, referenced_tables)?; @@ -300,34 +311,41 @@ fn push_predicates( let mut i = 0; while i < predicates.len() { - // try to push the predicate to the left side first, then to the right side - - // temporarily take ownership of the predicate let predicate_owned = predicates[i].take_ownership(); - // left join predicates cant be pushed to the left side + + // For a join like SELECT * FROM left INNER JOIN right ON left.id = right.id AND left.name = 'foo' + // the predicate 'left.name = 'foo' can already be evaluated in the outer loop (left side of join) + // because the row can immediately be skipped if left.name != 'foo'. + // But for a LEFT JOIN, we can't do this since we need to ensure that all rows from the left table are included, + // even if there are no matching rows from the right table. This is why we can't push LEFT JOIN predicates to the left side. let push_result = if *outer { Some(predicate_owned) } else { push_predicate(left, predicate_owned, referenced_tables)? }; - // if the predicate was pushed to a child, remove it from the list + + // Try pushing to left side first (see comment above for reasoning) let Some(predicate) = push_result else { predicates.remove(i); continue; }; - // otherwise try to push it to the right side - // if it was pushed to the right side, remove it from the list + + // Then try right side let Some(predicate) = push_predicate(right, predicate, referenced_tables)? else { predicates.remove(i); continue; }; - // otherwise keep the predicate in the list + + // If neither side could take it, keep in join predicates (not sure if this actually happens in practice) + // this is effectively the same as pushing to the right side, so maybe it could be removed and assert here + // that we don't reach this code predicates[i] = predicate; i += 1; } Ok(()) } + // Base cases - nowhere else to push to SourceOperator::Scan { .. } => Ok(()), SourceOperator::Search { .. } => Ok(()), SourceOperator::Nothing => Ok(()), @@ -349,24 +367,29 @@ fn push_predicate( table_reference, .. } => { + // Find position of this table in referenced_tables array let table_index = referenced_tables .iter() .position(|t| t.table_identifier == table_reference.table_identifier) .unwrap(); + // Get bitmask showing which tables this predicate references let predicate_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &predicate)?; - // the expression is allowed to refer to tables on its left, i.e. the righter bits in the mask - // e.g. if this table is 0010, and the table on its right in the join is 0100: - // if predicate_bitmask is 0011, the predicate can be pushed (refers to this table and the table on its left) - // if predicate_bitmask is 0001, the predicate can be pushed (refers to the table on its left) - // if predicate_bitmask is 0101, the predicate can't be pushed (refers to this table and a table on its right) + // Each table has a bit position based on join order from left to right + // e.g. in SELECT * FROM t1 JOIN t2 JOIN t3 + // t1 is position 0 (001), t2 is position 1 (010), t3 is position 2 (100) + // To push a predicate to a given table, it can only reference that table and tables to its left + // Example: For table t2 at position 1 (bit 010): + // - Can push: 011 (t2 + t1), 001 (just t1), 010 (just t2) + // - Can't push: 110 (t2 + t3) let next_table_on_the_right_in_join_bitmask = 1 << (table_index + 1); if predicate_bitmask >= next_table_on_the_right_in_join_bitmask { return Ok(Some(predicate)); } + // Add predicate to this table's filters if predicates.is_none() { predicates.replace(vec![predicate]); } else { @@ -375,7 +398,8 @@ fn push_predicate( Ok(None) } - SourceOperator::Search { .. } => Ok(Some(predicate)), + // Search nodes don't exist yet at this point; Scans are transformed to Search in use_indexes() + SourceOperator::Search { .. } => unreachable!(), SourceOperator::Join { left, right, @@ -383,31 +407,36 @@ fn push_predicate( outer, .. } => { + // Try pushing to left side first let push_result_left = push_predicate(left, predicate, referenced_tables)?; if push_result_left.is_none() { return Ok(None); } + // Then try right side let push_result_right = push_predicate(right, push_result_left.unwrap(), referenced_tables)?; if push_result_right.is_none() { return Ok(None); } + // For LEFT JOIN, predicates must stay at join level if *outer { return Ok(Some(push_result_right.unwrap())); } let pred = push_result_right.unwrap(); + // Get bitmasks for tables referenced in predicate and both sides of join let table_refs_bitmask = get_table_ref_bitmask_for_ast_expr(referenced_tables, &pred)?; - let left_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, left)?; let right_bitmask = get_table_ref_bitmask_for_operator(referenced_tables, right)?; + // If predicate doesn't reference tables from both sides, it can't be a join condition if table_refs_bitmask & left_bitmask == 0 || table_refs_bitmask & right_bitmask == 0 { return Ok(Some(pred)); } + // Add as join predicate since it references both sides if join_on_preds.is_none() { join_on_preds.replace(vec![pred]); } else {