From bed932c18669fef5c7337ef596fcbc9c18333d4d Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Fri, 29 Nov 2024 22:44:14 +0200 Subject: [PATCH] Support join USING --- COMPAT.md | 1 + core/translate/plan.rs | 61 ++++++++++++++++++++++++++++- core/translate/planner.rs | 81 +++++++++++++++++++++++++++++++-------- testing/join.test | 10 ++++- 4 files changed, 135 insertions(+), 18 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index b106b9be9..765a46a9e 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -61,6 +61,7 @@ This document describes the SQLite compatibility status of Limbo: | SELECT ... CROSS JOIN | Partial | | | SELECT ... INNER JOIN | Partial | | | SELECT ... OUTER JOIN | Partial | | +| SELECT ... JOIN USING | Yes | | | UPDATE | No | | | UPSERT | No | | | VACUUM | No | | diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8c402a6a0..b76cde8c3 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -8,7 +8,7 @@ use sqlite3_parser::ast; use crate::{ function::AggFunc, - schema::{BTreeTable, Index}, + schema::{BTreeTable, Column, Index}, Result, }; @@ -60,6 +60,64 @@ pub enum IterationDirection { Backwards, } +impl SourceOperator { + pub fn select_star(&self, out_columns: &mut Vec) { + for (table_ref, col, idx) in self.select_star_helper() { + out_columns.push(ResultSetColumn { + expr: ast::Expr::Column { + database: None, + table: table_ref.table_index, + column: idx, + is_rowid_alias: col.primary_key, + }, + contains_aggregates: false, + }); + } + } + + /// All this ceremony is required to deduplicate columns when joining with USING + fn select_star_helper(&self) -> Vec<(&BTreeTableReference, &Column, usize)> { + match self { + SourceOperator::Join { + left, right, using, .. + } => { + let mut columns = left.select_star_helper(); + + // Join columns are filtered out from the right side + // in the case of a USING join. + if let Some(using_cols) = using { + let right_columns = right.select_star_helper(); + + for (table_ref, col, idx) in right_columns { + if !using_cols + .iter() + .any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0)) + { + columns.push((table_ref, col, idx)); + } + } + } else { + columns.extend(right.select_star_helper()); + } + columns + } + SourceOperator::Scan { + table_reference, .. + } + | SourceOperator::Search { + table_reference, .. + } => table_reference + .table + .columns + .iter() + .enumerate() + .map(|(i, col)| (table_reference, col, i)) + .collect(), + SourceOperator::Nothing => Vec::new(), + } + } +} + /** A SourceOperator is a Node in the query plan that reads data from a table. */ @@ -75,6 +133,7 @@ pub enum SourceOperator { right: Box, predicates: Option>, outer: bool, + using: Option, }, // Scan operator // This operator is used to scan a table. diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 8a0d88890..aee72a954 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -281,19 +281,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - for table_reference in plan.referenced_tables.iter() { - for (idx, col) in table_reference.table.columns.iter().enumerate() { - plan.result_columns.push(ResultSetColumn { - expr: ast::Expr::Column { - database: None, // TODO: support different databases - table: table_reference.table_index, - column: idx, - is_rowid_alias: col.primary_key, - }, - contains_aggregates: false, - }); - } - } + plan.source.select_star(&mut plan.result_columns); } ast::ResultColumn::TableStar(name) => { let name_normalized = normalize_ident(name.0.as_str()); @@ -538,13 +526,14 @@ fn parse_from( let mut table_index = 1; for join in from.joins.unwrap_or_default().into_iter() { - let (right, outer, predicates) = + let (right, outer, using, predicates) = parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), predicates, outer, + using, id: operator_id_counter.get_next_id(), }; table_index += 1; @@ -559,7 +548,12 @@ fn parse_join( operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, table_index: usize, -) -> Result<(SourceOperator, bool, Option>)> { +) -> Result<( + SourceOperator, + bool, + Option, + Option>, +)> { let ast::JoinedSelectTable { operator, table, @@ -599,6 +593,7 @@ fn parse_join( _ => false, }; + let mut using = None; let mut predicates = None; if let Some(constraint) = constraint { match constraint { @@ -610,7 +605,60 @@ fn parse_join( } predicates = Some(preds); } - ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), + ast::JoinConstraint::Using(distinct_names) => { + // USING join is replaced with a list of equality predicates + let mut using_predicates = vec![]; + for distinct_name in distinct_names.iter() { + let name_normalized = normalize_ident(distinct_name.0.as_str()); + let left_table = &tables[table_index - 1]; + let right_table = &tables[table_index]; + let left_col = left_table + .table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == name_normalized); + if left_col.is_none() { + crate::bail_parse_error!( + "Column {}.{} not found", + left_table.table_identifier, + distinct_name.0 + ); + } + let right_col = right_table + .table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == name_normalized); + if right_col.is_none() { + crate::bail_parse_error!( + "Column {}.{} not found", + right_table.table_identifier, + distinct_name.0 + ); + } + let (left_col_idx, left_col) = left_col.unwrap(); + let (right_col_idx, right_col) = right_col.unwrap(); + using_predicates.push(ast::Expr::Binary( + Box::new(ast::Expr::Column { + database: None, + table: left_table.table_index, + column: left_col_idx, + is_rowid_alias: left_col.primary_key, + }), + ast::Operator::Equals, + Box::new(ast::Expr::Column { + database: None, + table: right_table.table_index, + column: right_col_idx, + is_rowid_alias: right_col.primary_key, + }), + )); + } + predicates = Some(using_predicates); + using = Some(distinct_names); + } } } @@ -622,6 +670,7 @@ fn parse_join( iter_dir: None, }, outer, + using, predicates, )) } diff --git a/testing/join.test b/testing/join.test index 2341be5a7..2bbf1b4fa 100755 --- a/testing/join.test +++ b/testing/join.test @@ -212,4 +212,12 @@ do_execsql_test join-utilizing-both-seekrowid-and-secondary-index { select u.first_name, p.name from users u join products p on u.id = p.id and u.age > 70; } {Matthew|boots Nicholas|shorts -Jamie|hat} \ No newline at end of file +Jamie|hat} + +# important difference between regular SELECT * join and a SELECT * USING join is that the join keys are deduplicated +# from the result in the USING case. +do_execsql_test join-using { + select * from users join products using (id) limit 3; +} {"1|Jamie|Foster|dylan00@example.com|496-522-9493|62375 Johnson Rest Suite 322|West Lauriestad|IL|35865|94|hat|79.0 +2|Cindy|Salazar|williamsrebecca@example.com|287-934-1135|75615 Stacey Shore|South Stephanie|NC|85181|37|cap|82.0 +3|Tommy|Perry|warechristopher@example.org|001-288-554-8139x0276|2896 Paul Fall Apt. 972|Michaelborough|VA|15691|18|shirt|18.0"} \ No newline at end of file