From d0466e1cae4b12dca4f1f210cff3ab521f83315b Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 17 Nov 2024 15:59:24 +0200 Subject: [PATCH 01/32] introduce Column member of ast::Expr and bind idents to columns --- core/translate/expr.rs | 169 ++------------ core/translate/optimizer.rs | 143 ++++++------ core/translate/plan.rs | 46 +--- core/translate/planner.rs | 206 ++++++++++++++++-- core/translate/select.rs | 1 + vendored/sqlite3-parser/src/parser/ast/fmt.rs | 1 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 11 + 7 files changed, 303 insertions(+), 274 deletions(-) diff --git a/core/translate/expr.rs b/core/translate/expr.rs index fca31b0ae..6fa78c6f6 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1440,23 +1440,29 @@ pub fn translate_expr( } } ast::Expr::FunctionCallStar { .. } => todo!(), - ast::Expr::Id(ident) => { - // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); - let (idx, col_type, cursor_id, is_rowid_alias) = - resolve_ident_table(program, &ident.0, referenced_tables, cursor_hint)?; - if is_rowid_alias { + ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before translation"), + ast::Expr::Column { + database: _, + table, + column, + is_primary_key, + } => { + let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); + let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier, cursor_hint); + if *is_primary_key { program.emit_insn(Insn::RowId { cursor_id, dest: target_register, }); } else { program.emit_insn(Insn::Column { - column: idx, - dest: target_register, cursor_id, + column: *column, + dest: target_register, }); } - maybe_apply_affinity(col_type, target_register, program); + let column = &tbl_ref.table.columns[*column]; + maybe_apply_affinity(column.ty, target_register, program); Ok(target_register) } ast::Expr::InList { .. } => todo!(), @@ -1539,28 +1545,8 @@ pub fn translate_expr( } Ok(target_register) } - ast::Expr::Qualified(tbl, ident) => { - let (idx, col_type, cursor_id, is_primary_key) = resolve_ident_qualified( - program, - &tbl.0, - &ident.0, - referenced_tables.unwrap(), - cursor_hint, - )?; - if is_primary_key { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: idx, - dest: target_register, - cursor_id, - }); - } - maybe_apply_affinity(col_type, target_register, program); - Ok(target_register) + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified should be resolved to a Column before translation") } ast::Expr::Raise(_, _) => todo!(), ast::Expr::Subquery(_) => todo!(), @@ -1604,112 +1590,6 @@ fn wrap_eval_jump_expr( program.preassign_label_to_next_insn(if_true_label); } -pub fn resolve_ident_qualified( - program: &ProgramBuilder, - table_name: &str, - ident: &str, - referenced_tables: &[BTreeTableReference], - cursor_hint: Option, -) -> Result<(usize, Type, usize, bool)> { - let ident = normalize_ident(ident); - let table_name = normalize_ident(table_name); - for table_reference in referenced_tables.iter() { - if table_reference.table_identifier == table_name { - let res = table_reference - .table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| (idx, col.ty, col.primary_key)); - let mut idx; - let mut col_type; - let mut is_primary_key; - if res.is_some() { - (idx, col_type, is_primary_key) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == format!("{}.{}", table_name, ident)) - }) { - idx = res.0; - col_type = res.1.ty; - is_primary_key = res.1.primary_key; - } - } - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, cursor_hint); - return Ok((idx, col_type, cursor_id, is_primary_key)); - } - } - } - crate::bail_parse_error!( - "column with qualified name {}.{} not found", - table_name, - ident - ); -} - -pub fn resolve_ident_table( - program: &ProgramBuilder, - ident: &str, - referenced_tables: Option<&[BTreeTableReference]>, - cursor_hint: Option, -) -> Result<(usize, Type, usize, bool)> { - let ident = normalize_ident(ident); - let mut found = Vec::new(); - for table_reference in referenced_tables.unwrap() { - let res = table_reference - .table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident) - .map(|(idx, col)| { - ( - idx, - col.ty, - table_reference.table.column_is_rowid_alias(col), - ) - }); - let mut idx; - let mut col_type; - let mut is_rowid_alias; - if res.is_some() { - (idx, col_type, is_rowid_alias) = res.unwrap(); - // overwrite if cursor hint is provided - if let Some(cursor_hint) = cursor_hint { - let cols = &program.cursor_ref[cursor_hint].1; - if let Some(res) = cols.as_ref().and_then(|res| { - res.columns() - .iter() - .enumerate() - .find(|x| x.1.name == *ident) - }) { - idx = res.0; - col_type = res.1.ty; - is_rowid_alias = table_reference.table.column_is_rowid_alias(res.1); - } - } - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, cursor_hint); - found.push((idx, col_type, cursor_id, is_rowid_alias)); - } - } - if found.len() == 1 { - return Ok(found[0]); - } - if found.is_empty() { - crate::bail_parse_error!("column with name {} not found", ident.as_str()); - } - - crate::bail_parse_error!("ambiguous column name {}", ident.as_str()); -} - pub fn resolve_ident_pseudo_table(ident: &String, pseudo_table: &PseudoTable) -> Result { let res = pseudo_table .columns @@ -1827,13 +1707,8 @@ pub fn translate_aggregation( if agg.args.len() == 2 { match &agg.args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - delimiter_expr = - ast::Expr::Literal(ast::Literal::String(ident.0.to_string())); - } else { - delimiter_expr = agg.args[1].clone(); - } + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); } ast::Expr::Literal(ast::Literal::String(s)) => { delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); @@ -1926,12 +1801,8 @@ pub fn translate_aggregation( let delimiter_expr: ast::Expr; match &agg.args[1] { - ast::Expr::Id(ident) => { - if ident.0.starts_with('"') { - crate::bail_parse_error!("no such column: \",\" - should this be a string literal in single-quotes?"); - } else { - delimiter_expr = agg.args[1].clone(); - } + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); } ast::Expr::Literal(ast::Literal::String(s)) => { delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 833943fe3..682ed6c4b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, rc::Rc}; use sqlite3_parser::ast; -use crate::{schema::Index, util::normalize_ident, Result}; +use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, @@ -39,6 +39,7 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCac )?; eliminate_unnecessary_orderby( &mut select_plan.root_operator, + &select_plan.referenced_tables, &select_plan.available_indexes, )?; find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache); @@ -48,21 +49,28 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCac fn _operator_is_already_ordered_by( operator: &mut Operator, key: &mut ast::Expr, + referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result { match operator { Operator::Scan { table_reference, .. - } => Ok(key.is_primary_key_of(table_reference)), + } => Ok(key.is_primary_key_of(table_reference.table_index)), Operator::Search { table_reference, search, .. } => match search { - Search::PrimaryKeyEq { .. } => Ok(key.is_primary_key_of(table_reference)), - Search::PrimaryKeySearch { .. } => Ok(key.is_primary_key_of(table_reference)), + Search::PrimaryKeyEq { .. } => Ok(key.is_primary_key_of(table_reference.table_index)), + Search::PrimaryKeySearch { .. } => { + Ok(key.is_primary_key_of(table_reference.table_index)) + } Search::IndexSearch { index, .. } => { - let index_idx = key.check_index_scan(table_reference, available_indexes)?; + let index_idx = key.check_index_scan( + table_reference.table_index, + referenced_tables, + available_indexes, + )?; let index_is_the_same = index_idx .map(|i| Rc::ptr_eq(&available_indexes[i], index)) .unwrap_or(false); @@ -70,13 +78,13 @@ fn _operator_is_already_ordered_by( } }, Operator::Join { left, .. } => { - _operator_is_already_ordered_by(left, key, available_indexes) + _operator_is_already_ordered_by(left, key, referenced_tables, available_indexes) } Operator::Aggregate { source, .. } => { - _operator_is_already_ordered_by(source, key, available_indexes) + _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) } Operator::Projection { source, .. } => { - _operator_is_already_ordered_by(source, key, available_indexes) + _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) } _ => Ok(false), } @@ -84,6 +92,7 @@ fn _operator_is_already_ordered_by( fn eliminate_unnecessary_orderby( operator: &mut Operator, + referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result<()> { match operator { @@ -95,7 +104,7 @@ fn eliminate_unnecessary_orderby( let (key, direction) = key.first_mut().unwrap(); - let already_ordered = _operator_is_already_ordered_by(source, key, available_indexes)?; + let already_ordered = _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes)?; if already_ordered { push_scan_direction(source, direction); @@ -105,7 +114,7 @@ fn eliminate_unnecessary_orderby( Ok(()) } Operator::Limit { source, .. } => { - eliminate_unnecessary_orderby(source, available_indexes)?; + eliminate_unnecessary_orderby(source, referenced_tables, available_indexes)?; Ok(()) } _ => Ok(()), @@ -135,14 +144,19 @@ fn use_indexes( let fs = filter.as_mut().unwrap(); for i in 0..fs.len() { let f = fs[i].take_ownership(); - let table_ref = referenced_tables + let table_index = referenced_tables .iter() - .find(|t| { + .position(|t| { Rc::ptr_eq(&t.table, &table_reference.table) && t.table_identifier == table_reference.table_identifier }) .unwrap(); - match try_extract_index_search_expression(f, table_ref, available_indexes)? { + match try_extract_index_search_expression( + f, + table_index, + referenced_tables, + available_indexes, + )? { Either::Left(non_index_using_expr) => { fs[i] = non_index_using_expr; } @@ -150,7 +164,7 @@ fn use_indexes( fs.remove(i); *operator = Operator::Search { id: *id, - table_reference: table_ref.clone(), + table_reference: table_reference.clone(), predicates: Some(fs.clone()), search: index_search, step: 0, @@ -833,7 +847,8 @@ fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_o name: _, filter_over: _, } => 0, - ast::Expr::Id(_) => 0, + ast::Expr::Id(_) => unreachable!("Ids have been bound to Column references"), + ast::Expr::Column { .. } => 0, ast::Expr::InList { lhs, not: _, rhs } => { let mut mask = 0; mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); @@ -890,7 +905,9 @@ fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_o } mask } - ast::Expr::Qualified(_, _) => 0, + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified expressions have been bound to Column references") + } ast::Expr::Raise(_, _) => 0, ast::Expr::Subquery(_) => 0, ast::Expr::Unary(_op, expr) => { @@ -984,80 +1001,57 @@ pub trait Optimizable { .check_constant()? .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } - fn is_primary_key_of(&self, table_reference: &BTreeTableReference) -> bool; + fn is_primary_key_of(&self, table_index: usize) -> bool; fn check_index_scan( &mut self, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result>; } impl Optimizable for ast::Expr { - fn is_primary_key_of(&self, table_reference: &BTreeTableReference) -> bool { + fn is_primary_key_of(&self, table_index: usize) -> bool { match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - table_reference - .table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - - tbl == table_reference.table_identifier - && table_reference - .table - .get_column(&ident) - .map_or(false, |(_, c)| c.primary_key) - } + ast::Expr::Column { + table, + column, + is_primary_key, + .. + } => *is_primary_key && *table == table_index, _ => false, } } fn check_index_scan( &mut self, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result> { match self { - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let indexes = available_indexes - .iter() - .enumerate() - .filter(|(_, i)| { - i.table_name == table_reference.table_identifier - && i.columns.iter().any(|c| c.name == ident) - }) - .collect::>(); - if indexes.is_empty() { - return Ok(None); - } - if indexes.len() > 1 { - crate::bail_parse_error!("ambiguous column name {}", ident) - } - Ok(Some(indexes.first().unwrap().0)) - } - ast::Expr::Qualified(_, ident) => { - let ident = normalize_ident(&ident.0); - let index = available_indexes.iter().enumerate().find(|(_, i)| { - if i.table_name != table_reference.table.name { - return false; + ast::Expr::Column { table, column, .. } => { + for (idx, index) in available_indexes.iter().enumerate() { + if index.table_name == referenced_tables[*table].table.name { + let column = referenced_tables[*table] + .table + .columns + .get(*column) + .unwrap(); + if index.columns.first().unwrap().name == column.name { + return Ok(Some(idx)); + } } - i.columns.iter().any(|c| normalize_ident(&c.name) == ident) - }); - if index.is_none() { - return Ok(None); } - Ok(Some(index.unwrap().0)) + Ok(None) } ast::Expr::Binary(lhs, op, rhs) => { - let lhs_index = lhs.check_index_scan(table_reference, available_indexes)?; + let lhs_index = + lhs.check_index_scan(table_index, referenced_tables, available_indexes)?; if lhs_index.is_some() { return Ok(lhs_index); } - let rhs_index = rhs.check_index_scan(table_reference, available_indexes)?; + let rhs_index = + rhs.check_index_scan(table_index, referenced_tables, available_indexes)?; if rhs_index.is_some() { // swap lhs and rhs let lhs_new = rhs.take_ownership(); @@ -1196,12 +1190,13 @@ pub enum Either { pub fn try_extract_index_search_expression( expr: ast::Expr, - table_reference: &BTreeTableReference, + table_index: usize, + referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result> { match expr { ast::Expr::Binary(mut lhs, operator, mut rhs) => { - if lhs.is_primary_key_of(table_reference) { + if lhs.is_primary_key_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *rhs })); @@ -1219,7 +1214,7 @@ pub fn try_extract_index_search_expression( } } - if rhs.is_primary_key_of(table_reference) { + if rhs.is_primary_key_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *lhs })); @@ -1237,7 +1232,9 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = lhs.check_index_scan(table_reference, available_indexes)? { + if let Some(index_index) = + lhs.check_index_scan(table_index, referenced_tables, available_indexes)? + { match operator { ast::Operator::Equals | ast::Operator::Greater @@ -1254,7 +1251,9 @@ pub fn try_extract_index_search_expression( } } - if let Some(index_index) = rhs.check_index_scan(table_reference, available_indexes)? { + if let Some(index_index) = + rhs.check_index_scan(table_index, referenced_tables, available_indexes)? + { match operator { ast::Operator::Equals | ast::Operator::Greater diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 9502e67ca..8c1eff7be 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -9,7 +9,6 @@ use sqlite3_parser::ast; use crate::{ function::AggFunc, schema::{BTreeTable, Index}, - util::normalize_ident, Result, }; @@ -146,6 +145,7 @@ pub enum Operator { pub struct BTreeTableReference { pub table: Rc, pub table_identifier: String, + pub table_index: usize, } /// An enum that represents a search operation that can be used to search for a row in a table using an index @@ -574,46 +574,12 @@ pub fn get_table_ref_bitmask_for_ast_expr<'a>( table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?; table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?; } - ast::Expr::Id(ident) => { - let ident = normalize_ident(&ident.0); - let matching_tables = tables - .iter() - .enumerate() - .filter(|(_, table_reference)| table_reference.table.get_column(&ident).is_some()); - - let mut matches = 0; - let mut matching_tbl = None; - for table in matching_tables { - matching_tbl = Some(table); - matches += 1; - if matches > 1 { - crate::bail_parse_error!("ambiguous column name {}", &ident) - } - } - - if let Some((tbl_index, _)) = matching_tbl { - table_refs_mask |= 1 << tbl_index; - } else { - crate::bail_parse_error!("column not found: {}", &ident) - } + ast::Expr::Column { table, .. } => { + table_refs_mask |= 1 << table; } - ast::Expr::Qualified(tbl, ident) => { - let tbl = normalize_ident(&tbl.0); - let ident = normalize_ident(&ident.0); - let matching_table = tables - .iter() - .enumerate() - .find(|(_, t)| t.table_identifier == tbl); - - if matching_table.is_none() { - crate::bail_parse_error!("introspect: table not found: {}", &tbl) - } - let (table_index, table_reference) = matching_table.unwrap(); - if table_reference.table.get_column(&ident).is_none() { - crate::bail_parse_error!("column with qualified name {}.{} not found", &tbl, &ident) - } - - table_refs_mask |= 1 << table_index; + ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before optimizer"), + ast::Expr::Qualified(_, _) => { + unreachable!("Qualified should be resolved to a Column before optimizer") } ast::Expr::Literal(_) => {} ast::Expr::Like { lhs, rhs, .. } => { diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0a7fac1e2..7f803e514 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -59,6 +59,166 @@ fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) { } } +/// Recursively resolve column references in an expression. +/// Id, Qualified and DoublyQualified are converted to Column. +fn bind_column_references( + expr: &mut ast::Expr, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match expr { + ast::Expr::Id(id) => { + let mut match_result = None; + for (tbl_idx, table) in referenced_tables.iter().enumerate() { + let col_idx = table + .table + .columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + if col_idx.is_some() { + if match_result.is_some() { + crate::bail_parse_error!("Column {} is ambiguous", id.0); + } + let col = table.table.columns.get(col_idx.unwrap()).unwrap(); + match_result = Some((tbl_idx, col_idx.unwrap(), col.primary_key)); + } + } + if match_result.is_none() { + crate::bail_parse_error!("Column {} not found", id.0); + } + let (tbl_idx, col_idx, is_primary_key) = match_result.unwrap(); + *expr = ast::Expr::Column { + database: None, // TODO: support different databases + table: tbl_idx, + column: col_idx, + is_primary_key, + }; + Ok(()) + } + ast::Expr::Qualified(tbl, id) => { + let matching_tbl_idx = referenced_tables + .iter() + .position(|t| t.table_identifier.eq_ignore_ascii_case(&tbl.0)); + if matching_tbl_idx.is_none() { + crate::bail_parse_error!("Table {} not found", tbl.0); + } + let tbl_idx = matching_tbl_idx.unwrap(); + let col_idx = referenced_tables[tbl_idx] + .table + .columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&id.0)); + if col_idx.is_none() { + crate::bail_parse_error!("Column {} not found", id.0); + } + let col = referenced_tables[tbl_idx] + .table + .columns + .get(col_idx.unwrap()) + .unwrap(); + *expr = ast::Expr::Column { + database: None, // TODO: support different databases + table: tbl_idx, + column: col_idx.unwrap(), + is_primary_key: col.primary_key, + }; + Ok(()) + } + ast::Expr::Between { + lhs, + not: _, + start, + end, + } => { + bind_column_references(lhs, referenced_tables)?; + bind_column_references(start, referenced_tables)?; + bind_column_references(end, referenced_tables)?; + Ok(()) + } + ast::Expr::Binary(expr, _operator, expr1) => { + bind_column_references(expr, referenced_tables)?; + bind_column_references(expr1, referenced_tables)?; + Ok(()) + } + ast::Expr::Case { + base, + when_then_pairs, + else_expr, + } => { + if let Some(base) = base { + bind_column_references(base, referenced_tables)?; + } + for (when, then) in when_then_pairs { + bind_column_references(when, referenced_tables)?; + bind_column_references(then, referenced_tables)?; + } + if let Some(else_expr) = else_expr { + bind_column_references(else_expr, referenced_tables)?; + } + Ok(()) + } + ast::Expr::Cast { expr, type_name: _ } => bind_column_references(expr, referenced_tables), + ast::Expr::Collate(expr, _string) => bind_column_references(expr, referenced_tables), + ast::Expr::FunctionCall { + name: _, + distinctness: _, + args, + order_by: _, + filter_over: _, + } => { + if let Some(args) = args { + for arg in args { + bind_column_references(arg, referenced_tables)?; + } + } + Ok(()) + } + // Column references cannot exist before binding + ast::Expr::Column { .. } => unreachable!(), + ast::Expr::DoublyQualified(_, _, _) => todo!(), + ast::Expr::Exists(_) => todo!(), + ast::Expr::FunctionCallStar { .. } => Ok(()), + ast::Expr::InList { lhs, not: _, rhs } => { + bind_column_references(lhs, referenced_tables)?; + if let Some(rhs) = rhs { + for arg in rhs { + bind_column_references(arg, referenced_tables)?; + } + } + Ok(()) + } + ast::Expr::InSelect { .. } => todo!(), + ast::Expr::InTable { .. } => todo!(), + ast::Expr::IsNull(expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Like { lhs, rhs, .. } => { + bind_column_references(lhs, referenced_tables)?; + bind_column_references(rhs, referenced_tables)?; + Ok(()) + } + ast::Expr::Literal(_) => Ok(()), + ast::Expr::Name(_) => todo!(), + ast::Expr::NotNull(expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Parenthesized(expr) => { + for e in expr.iter_mut() { + bind_column_references(e, referenced_tables)?; + } + Ok(()) + } + ast::Expr::Raise(_, _) => todo!(), + ast::Expr::Subquery(_) => todo!(), + ast::Expr::Unary(_, expr) => { + bind_column_references(expr, referenced_tables)?; + Ok(()) + } + ast::Expr::Variable(_) => todo!(), + } +} + #[allow(clippy::extra_unused_lifetimes)] pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result { match select.body.select { @@ -66,7 +226,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ let col_count = columns.len(); @@ -84,6 +244,9 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ + ast::ResultColumn::Expr(mut expr, _) => { + bind_column_references(&mut expr, &referenced_tables)?; projection_expressions.push(ProjectionColumn::Column(expr.clone())); match expr.clone() { ast::Expr::FunctionCall { @@ -174,7 +338,10 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

()?; if column_number == 0 { crate::bail_parse_error!("invalid column index: {}", column_number); @@ -235,6 +402,8 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

todo!(), @@ -324,9 +494,10 @@ fn parse_from( let mut tables = vec![first_table]; + let mut table_index = 1; for join in from.joins.unwrap_or_default().into_iter() { let (right, outer, predicates) = - parse_join(schema, join, operator_id_counter, &mut tables)?; + parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; operator = Operator::Join { left: Box::new(operator), right: Box::new(right), @@ -334,7 +505,8 @@ fn parse_from( outer, id: operator_id_counter.get_next_id(), step: 0, - } + }; + table_index += 1; } Ok((operator, tables)) @@ -345,6 +517,7 @@ fn parse_join( join: ast::JoinedSelectTable, operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, + table_index: usize, ) -> Result<(Operator, bool, Option>)> { let ast::JoinedSelectTable { operator, @@ -366,6 +539,7 @@ fn parse_join( BTreeTableReference { table: table.clone(), table_identifier: alias.unwrap_or(qualified_name.name.0), + table_index, } } _ => todo!(), @@ -384,14 +558,20 @@ fn parse_join( _ => false, }; - let predicates = constraint.map(|c| match c { - ast::JoinConstraint::On(expr) => { - let mut predicates = vec![]; - break_predicate_at_and_boundaries(expr, &mut predicates); - predicates + let mut predicates = None; + if let Some(constraint) = constraint { + match constraint { + ast::JoinConstraint::On(expr) => { + let mut preds = vec![]; + break_predicate_at_and_boundaries(expr, &mut preds); + for predicate in preds.iter_mut() { + bind_column_references(predicate, tables)?; + } + predicates = Some(preds); + } + ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), } - ast::JoinConstraint::Using(_) => todo!("USING joins not supported yet"), - }); + } Ok(( Operator::Scan { diff --git a/core/translate/select.rs b/core/translate/select.rs index d486f6c23..2b946b0fd 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -18,6 +18,7 @@ pub fn translate_select( ) -> Result { let select_plan = prepare_select_plan(schema, select)?; let (optimized_plan, expr_result_cache) = optimize_plan(select_plan)?; + println!("{:?}", expr_result_cache); emit_program( database_header, optimized_plan, diff --git a/vendored/sqlite3-parser/src/parser/ast/fmt.rs b/vendored/sqlite3-parser/src/parser/ast/fmt.rs index 7ee2d1af4..80f87eefb 100644 --- a/vendored/sqlite3-parser/src/parser/ast/fmt.rs +++ b/vendored/sqlite3-parser/src/parser/ast/fmt.rs @@ -637,6 +637,7 @@ impl ToTokens for Expr { Ok(()) } Self::Id(id) => id.to_tokens(s), + Self::Column { .. } => Ok(()), Self::InList { lhs, not, rhs } => { lhs.to_tokens(s)?; if *not { diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index e3b9f86fa..29ec84dd6 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -327,6 +327,17 @@ pub enum Expr { }, /// Identifier Id(Id), + /// Column + Column { + /// the x in `x.y.z`. index of the db in catalog. + database: Option, + /// the y in `x.y.z`. index of the table in catalog. + table: usize, + /// the z in `x.y.z`. index of the column in the table. + column: usize, + /// is the column a primary key + is_primary_key: bool, + }, /// `IN` InList { /// expression From 3f9e60633f8b7dc9c505ad1d8c6f445a7dfc64ad Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 12:33:41 +0200 Subject: [PATCH 02/32] select refactor: order by and basic agg kinda work --- core/lib.rs | 4 +- core/translate/emitter.rs | 4283 +++++++++++------ core/translate/expr.rs | 141 +- core/translate/insert.rs | 2 +- core/translate/optimizer.rs | 708 +-- core/translate/plan.rs | 281 +- core/translate/planner.rs | 295 +- core/translate/select.rs | 11 +- vendored/sqlite3-parser/src/parser/ast/fmt.rs | 1 + vendored/sqlite3-parser/src/parser/ast/mod.rs | 5 + 10 files changed, 3032 insertions(+), 2699 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index b7344c260..bc97a7c7b 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -235,8 +235,8 @@ impl Connection { Cmd::ExplainQueryPlan(stmt) => { match stmt { ast::Stmt::Select(select) => { - let plan = prepare_select_plan(&self.schema.borrow(), select)?; - let (plan, _) = optimize_plan(plan)?; + let plan = prepare_select_plan(&*self.schema.borrow(), select)?; + let plan = optimize_plan(plan)?; println!("{}", plan); } _ => todo!(), diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 5f3402389..fe23832db 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -17,9 +17,8 @@ use super::expr::{ translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, ConditionMetadata, }; -use super::optimizer::ExpressionResultCache; -use super::plan::{BTreeTableReference, Plan}; -use super::plan::{Operator, ProjectionColumn}; +use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; +use super::plan::{ResultSetColumn, SourceOperator}; /** * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. @@ -27,28 +26,28 @@ use super::plan::{Operator, ProjectionColumn}; * - step: perform a single step of the operator, emitting bytecode instructions as needed, and returning a result indicating whether the operator is ready to emit a result row */ -pub trait Emitter { - fn step( - &mut self, - pb: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result; - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result; - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - metadata: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()>; -} +// pub trait Emitter { +// fn step( +// &mut self, +// pb: &mut ProgramBuilder, +// m: &mut Metadata, +// referenced_tables: &[BTreeTableReference], +// ) -> Result; +// fn result_columns( +// &self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// metadata: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result; +// fn result_row( +// &mut self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// metadata: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result<()>; +// } #[derive(Debug)] pub struct LeftJoinMetadata { @@ -136,1552 +135,1493 @@ pub struct Metadata { sorts: HashMap, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, - expr_result_cache: ExpressionResultCache, + // register holding the start of the result set + result_set_register_start: usize, } -/// Emitters return one of three possible results from the step() method: -/// - Continue: the operator is not yet ready to emit a result row -/// - ReadyToEmit: the operator is ready to emit a result row -/// - Done: the operator has completed execution -/// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. -/// At that point, it will return ReadyToEmit. -/// Finally, when the Scan operator has emitted a Next instruction, it will return Done. -/// -/// Parent operators are free to make decisions based on the result a child operator's step() method. -/// -/// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. -/// When the root operator returns Done, the bytecode plan is complete. -#[derive(Debug, PartialEq)] -pub enum OpStepResult { - Continue, - ReadyToEmit, - Done, -} - -impl Emitter for Operator { - fn step( - &mut self, - program: &mut ProgramBuilder, - m: &mut Metadata, - referenced_tables: &[BTreeTableReference], - ) -> Result { - let current_operator_column_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, - id, - step, - predicates, - iter_dir, - } => { - *step += 1; - const SCAN_OPEN_READ: usize = 1; - const SCAN_BODY: usize = 2; - const SCAN_NEXT: usize = 3; - let reverse = iter_dir - .as_ref() - .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); - match *step { - SCAN_OPEN_READ => { - let cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - let root_page = table_reference.table.root_page; - let next_row_label = program.allocate_label(); - m.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - Ok(OpStepResult::Continue) - } - SCAN_BODY => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - if reverse { - program.emit_insn(Insn::LastAsync { cursor_id }); - } else { - program.emit_insn(Insn::RewindAsync { cursor_id }); - } - let scan_loop_body_label = program.allocate_label(); - let halt_label = m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - if reverse { - Insn::LastAwait { - cursor_id, - pc_if_empty: *halt_label, - } - } else { - Insn::RewindAwait { - cursor_id, - pc_if_empty: *halt_label, - } - }, - *halt_label, - ); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - - let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); - if let Some(preds) = predicates { - for expr in preds { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - expr, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SCAN_NEXT => { - let cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - if reverse { - program.emit_insn(Insn::PrevAsync { cursor_id }); - } else { - program.emit_insn(Insn::NextAsync { cursor_id }); - } - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - - if reverse { - program.emit_insn_with_label_dependency( - Insn::PrevAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } else { - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - } - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Search { - table_reference, - search, - predicates, - step, - id, - .. - } => { - *step += 1; - const SEARCH_OPEN_READ: usize = 1; - const SEARCH_BODY: usize = 2; - const SEARCH_NEXT: usize = 3; - match *step { - SEARCH_OPEN_READ => { - let table_cursor_id = program.alloc_cursor_id( - Some(table_reference.table_identifier.clone()), - Some(Table::BTree(table_reference.table.clone())), - ); - - let next_row_label = program.allocate_label(); - - if !matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - m.next_row_labels.insert(*id, next_row_label); - } - - let scan_loop_body_label = program.allocate_label(); - m.scan_loop_body_labels.push(scan_loop_body_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - - if let Search::IndexSearch { index, .. } = search { - let index_cursor_id = program.alloc_cursor_id( - Some(index.name.clone()), - Some(Table::Index(index.clone())), - ); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - } - Ok(OpStepResult::Continue) - } - SEARCH_BODY => { - let table_cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); - - // Open the loop for the index search. - // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - if !matches!(search, Search::PrimaryKeyEq { .. }) { - let index_cursor_id = if let Search::IndexSearch { index, .. } = search - { - Some(program.resolve_cursor_id(&index.name, None)) - } else { - None - }; - let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); - let cmp_reg = program.alloc_register(); - let (cmp_expr, cmp_op) = match search { - Search::IndexSearch { - cmp_expr, cmp_op, .. - } => (cmp_expr, cmp_op), - Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - // TODO this only handles ascending indexes - match cmp_op { - ast::Operator::Equals - | ast::Operator::Greater - | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - ast::Operator::Less | ast::Operator::LessEquals => { - program.emit_insn(Insn::Null { - dest: cmp_reg, - dest_end: None, - }); - } - _ => unreachable!(), - } - program.emit_insn_with_label_dependency( - match cmp_op { - ast::Operator::Equals | ast::Operator::GreaterEquals => { - Insn::SeekGE { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - } - } - ast::Operator::Greater - | ast::Operator::Less - | ast::Operator::LessEquals => Insn::SeekGT { - is_index: index_cursor_id.is_some(), - cursor_id: index_cursor_id.unwrap_or(table_cursor_id), - start_reg: cmp_reg, - num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), - }, - _ => unreachable!(), - }, - *m.termination_label_stack.last().unwrap(), - ); - if *cmp_op == ast::Operator::Less - || *cmp_op == ast::Operator::LessEquals - { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; - } - - program.defer_label_resolution( - scan_loop_body_label, - program.offset() as usize, - ); - // TODO: We are currently only handling ascending indexes. - // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. - // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. - // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. - // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. - // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. - // - // For primary key searches we emit RowId and then compare it to the seek value. - - let abort_jump_target = *m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - match cmp_op { - ast::Operator::Equals | ast::Operator::LessEquals => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGT { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - ast::Operator::Less => { - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn_with_label_dependency( - Insn::IdxGE { - cursor_id: index_cursor_id, - start_reg: cmp_reg, - num_regs: 1, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } else { - let rowid_reg = program.alloc_register(); - program.emit_insn(Insn::RowId { - cursor_id: table_cursor_id, - dest: rowid_reg, - }); - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: rowid_reg, - rhs: cmp_reg, - target_pc: abort_jump_target, - }, - abort_jump_target, - ); - } - } - _ => {} - } - - if let Some(index_cursor_id) = index_cursor_id { - program.emit_insn(Insn::DeferredSeek { - index_cursor_id, - table_cursor_id, - }); - } - } - - let jump_label = m - .next_row_labels - .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if let Search::PrimaryKeyEq { cmp_expr } = search { - let src_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - src_reg, - None, - None, - )?; - program.emit_insn_with_label_dependency( - Insn::SeekRowid { - cursor_id: table_cursor_id, - src_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - } - if let Some(predicates) = predicates { - for predicate in predicates.iter() { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false: *jump_label, - }; - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - program.resolve_label(jump_target_when_true, program.offset()); - } - } - - Ok(OpStepResult::ReadyToEmit) - } - SEARCH_NEXT => { - if matches!(search, Search::PrimaryKeyEq { .. }) { - // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. - return Ok(OpStepResult::Done); - } - let cursor_id = match search { - Search::IndexSearch { index, .. } => { - program.resolve_cursor_id(&index.name, None) - } - Search::PrimaryKeySearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier, None) - } - Search::PrimaryKeyEq { .. } => unreachable!(), - }; - program - .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); - program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = m.scan_loop_body_labels.pop().unwrap(); - program.emit_insn_with_label_dependency( - Insn::NextAwait { - cursor_id, - pc_if_next: jump_label, - }, - jump_label, - ); - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Join { - left, - right, - outer, - predicates, - step, - id, - .. - } => { - *step += 1; - const JOIN_INIT: usize = 1; - const JOIN_DO_JOIN: usize = 2; - const JOIN_END: usize = 3; - match *step { - JOIN_INIT => { - if *outer { - let lj_metadata = LeftJoinMetadata { - match_flag_register: program.alloc_register(), - set_match_flag_true_label: program.allocate_label(), - check_match_flag_label: program.allocate_label(), - on_match_jump_to_label: program.allocate_label(), - }; - m.left_joins.insert(*id, lj_metadata); - } - left.step(program, m, referenced_tables)?; - right.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Continue) - } - JOIN_DO_JOIN => { - left.step(program, m, referenced_tables)?; - - let mut jump_target_when_false = *m - .next_row_labels - .get(&right.id()) - .or(m.next_row_labels.get(&left.id())) - .unwrap_or(m.termination_label_stack.last().unwrap()); - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.emit_insn(Insn::Integer { - value: 0, - dest: lj_meta.match_flag_register, - }); - jump_target_when_false = lj_meta.check_match_flag_label; - } - m.next_row_labels.insert(right.id(), jump_target_when_false); - - right.step(program, m, referenced_tables)?; - - if let Some(predicates) = predicates { - let jump_target_when_true = program.allocate_label(); - let condition_metadata = ConditionMetadata { - jump_if_condition_is_true: false, - jump_target_when_true, - jump_target_when_false, - }; - for predicate in predicates.iter() { - translate_condition_expr( - program, - referenced_tables, - predicate, - None, - condition_metadata, - )?; - } - program.resolve_label(jump_target_when_true, program.offset()); - } - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - program.defer_label_resolution( - lj_meta.set_match_flag_true_label, - program.offset() as usize, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: lj_meta.match_flag_register, - }); - } - - Ok(OpStepResult::ReadyToEmit) - } - JOIN_END => { - right.step(program, m, referenced_tables)?; - - if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); - // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) - program.resolve_label(lj_meta.check_match_flag_label, program.offset()); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: lj_meta.match_flag_register, - target_pc: lj_meta.on_match_jump_to_label, - decrement_by: 0, - }, - lj_meta.on_match_jump_to_label, - ); - // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL - let right_cursor_id = match right.as_ref() { - Operator::Scan { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - Operator::Search { - table_reference, .. - } => program - .resolve_cursor_id(&table_reference.table_identifier, None), - _ => unreachable!(), - }; - program.emit_insn(Insn::NullRow { - cursor_id: right_cursor_id, - }); - // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: lj_meta.set_match_flag_true_label, - }, - lj_meta.set_match_flag_true_label, - ); - } - let next_row_label = if *outer { - m.left_joins.get(id).unwrap().on_match_jump_to_label - } else { - *m.next_row_labels.get(&right.id()).unwrap() - }; - // This points to the NextAsync instruction of the left table - program.resolve_label(next_row_label, program.offset()); - left.step(program, m, referenced_tables)?; - - Ok(OpStepResult::Done) - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Aggregate { - id, - source, - aggregates, - group_by, - step, - .. - } => { - *step += 1; - - // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b - if let Some(group_by) = group_by { - const GROUP_BY_INIT: usize = 1; - const GROUP_BY_INSERT_INTO_SORTER: usize = 2; - const GROUP_BY_SORT_AND_COMPARE: usize = 3; - const GROUP_BY_PREPARE_ROW: usize = 4; - const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; - match *step { - GROUP_BY_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - - let sort_cursor = program.alloc_cursor_id(None, None); - - let abort_flag_register = program.alloc_register(); - let data_in_accumulator_indicator_register = program.alloc_register(); - let group_exprs_comparison_register = - program.alloc_registers(group_by.len()); - let group_exprs_accumulator_register = - program.alloc_registers(group_by.len()); - let agg_exprs_start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers - .insert(*id, agg_exprs_start_reg); - let sorter_key_register = program.alloc_register(); - - let subroutine_accumulator_clear_label = program.allocate_label(); - let subroutine_accumulator_output_label = program.allocate_label(); - let sorter_data_label = program.allocate_label(); - let grouping_done_label = program.allocate_label(); - - let mut order = Vec::new(); - const ASCENDING: i64 = 0; - for _ in group_by.iter() { - order.push(OwnedValue::Integer(ASCENDING)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: current_operator_column_count, - order: OwnedRecord::new(order), - }); - - program.add_comment(program.offset(), "clear group by abort flag"); - program.emit_insn(Insn::Integer { - value: 0, - dest: abort_flag_register, - }); - - program.add_comment( - program.offset(), - "initialize group by comparison registers to NULL", - ); - program.emit_insn(Insn::Null { - dest: group_exprs_comparison_register, - dest_end: if group_by.len() > 1 { - Some(group_exprs_comparison_register + group_by.len() - 1) - } else { - None - }, - }); - - program.add_comment( - program.offset(), - "go to clear accumulator subroutine", - ); - - let subroutine_accumulator_clear_return_offset_register = - program.alloc_register(); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - m.group_bys.insert( - *id, - GroupByMetadata { - sort_cursor, - subroutine_accumulator_clear_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_output_return_offset_register: program - .alloc_register(), - accumulator_indicator_set_true_label: program.allocate_label(), - sorter_data_label, - grouping_done_label, - abort_flag_register, - data_in_accumulator_indicator_register, - group_exprs_accumulator_register, - group_exprs_comparison_register, - sorter_key_register, - }, - ); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - GROUP_BY_INSERT_INTO_SORTER => { - let sort_keys_count = group_by.len(); - let start_reg = program.alloc_registers(current_operator_column_count); - for (i, expr) in group_by.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - None, - )?; - } - for (i, agg) in aggregates.iter().enumerate() { - // TODO it's a hack to assume aggregate functions have exactly one argument. - // Counterpoint e.g. GROUP_CONCAT(expr, separator). - // - // Here we are collecting scalars for the group by sorter, which will include - // both the group by expressions and the aggregate arguments. - // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` - // the sorter will have two scalars: u.first_name and u.age. - // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. - // - // This is why we take the first argument of each aggregate function currently. - // It's mostly an artifact of the current architecture being a bit poor; we should recognize - // which scalars are dependencies of aggregate functions and explicitly collect those. - let expr = &agg.args[0]; - let agg_reg = start_reg + sort_keys_count + i; - translate_expr( - program, - Some(referenced_tables), - expr, - agg_reg, - None, - None, - )?; - } - - let group_by_metadata = m.group_bys.get(id).unwrap(); - - program.emit_insn(Insn::MakeRecord { - start_reg, - count: current_operator_column_count, - dest_reg: group_by_metadata.sorter_key_register, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::SorterInsert { - cursor_id: group_by_metadata.sort_cursor, - record_reg: group_by_metadata.sorter_key_register, - }); - - return Ok(OpStepResult::Continue); - } - #[allow(clippy::never_loop)] - GROUP_BY_SORT_AND_COMPARE => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - - let group_by_metadata = m.group_bys.get_mut(id).unwrap(); - - let GroupByMetadata { - group_exprs_comparison_register: comparison_register, - subroutine_accumulator_output_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_clear_label, - data_in_accumulator_indicator_register, - accumulator_indicator_set_true_label, - group_exprs_accumulator_register: group_exprs_start_register, - abort_flag_register, - sorter_key_register, - .. - } = *group_by_metadata; - let halt_label = *m.termination_label_stack.first().unwrap(); - - let mut column_names = - Vec::with_capacity(current_operator_column_count); - for expr in group_by - .iter() - .chain(aggregates.iter().map(|agg| &agg.args[0])) - // FIXME: just blindly taking the first arg is a hack - { - // Sorter column names for group by are now just determined by stringifying the expression, since the group by - // columns and aggregations can be practically anything. - // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle - // things like `count(1)` and `COUNT(1)` the same way - column_names.push(expr.to_string()); - } - let pseudo_columns = column_names - .iter() - .map(|name| Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }) - .collect::>(); - - let pseudo_table = Rc::new(PseudoTable { - columns: pseudo_columns, - }); - - let pseudo_cursor = program - .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sorter_key_register, - num_fields: current_operator_column_count, - }); - - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: group_by_metadata.sort_cursor, - pc_if_empty: group_by_metadata.grouping_done_label, - }, - group_by_metadata.grouping_done_label, - ); - - program.defer_label_resolution( - group_by_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: group_by_metadata.sort_cursor, - dest_reg: group_by_metadata.sorter_key_register, - pseudo_cursor, - }); - - let groups_start_reg = program.alloc_registers(group_by.len()); - for (i, expr) in group_by.iter().enumerate() { - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - let group_reg = groups_start_reg + i; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: group_reg, - }); - } - - program.emit_insn(Insn::Compare { - start_reg_a: comparison_register, - start_reg_b: groups_start_reg, - count: group_by.len(), - }); - - let agg_step_label = program.allocate_label(); - - program.add_comment( - program.offset(), - "start new group if comparison is not equal", - ); - program.emit_insn_with_label_dependency( - Insn::Jump { - target_pc_lt: program.offset() + 1, - target_pc_eq: agg_step_label, - target_pc_gt: program.offset() + 1, - }, - agg_step_label, - ); - - program.emit_insn(Insn::Move { - source_reg: groups_start_reg, - dest_reg: comparison_register, - count: group_by.len(), - }); - - program.add_comment( - program.offset(), - "check if ended group had data, and output if so", - ); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_output_label, - return_reg: - subroutine_accumulator_output_return_offset_register, - }, - subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "check abort flag"); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: abort_flag_register, - target_pc: halt_label, - decrement_by: 0, - }, - m.termination_label_stack[0], - ); - - program - .add_comment(program.offset(), "goto clear accumulator subroutine"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: subroutine_accumulator_clear_label, - return_reg: subroutine_accumulator_clear_return_offset_register, - }, - subroutine_accumulator_clear_label, - ); - - program.resolve_label(agg_step_label, program.offset()); - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - Some(pseudo_cursor), - )?; - } - - program.add_comment( - program.offset(), - "don't emit group columns if continuing existing group", - ); - program.emit_insn_with_label_dependency( - Insn::If { - target_pc: accumulator_indicator_set_true_label, - reg: data_in_accumulator_indicator_register, - null_reg: 0, // unused in this case - }, - accumulator_indicator_set_true_label, - ); - - for (i, expr) in group_by.iter().enumerate() { - let key_reg = group_exprs_start_register + i; - let sorter_column_index = - resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; - program.emit_insn(Insn::Column { - cursor_id: pseudo_cursor, - column: sorter_column_index, - dest: key_reg, - }); - } - - program.resolve_label( - accumulator_indicator_set_true_label, - program.offset(), - ); - program.add_comment(program.offset(), "indicate data in accumulator"); - program.emit_insn(Insn::Integer { - value: 1, - dest: data_in_accumulator_indicator_register, - }); - - return Ok(OpStepResult::Continue); - } - GROUP_BY_PREPARE_ROW => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: group_by_metadata.sort_cursor, - pc_if_next: group_by_metadata.sorter_data_label, - }, - group_by_metadata.sorter_data_label, - ); - - program.resolve_label( - group_by_metadata.grouping_done_label, - program.offset(), - ); - - program.add_comment(program.offset(), "emit row for final group"); - program.emit_insn_with_label_dependency( - Insn::Gosub { - target_pc: group_by_metadata - .subroutine_accumulator_output_label, - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }, - group_by_metadata.subroutine_accumulator_output_label, - ); - - program.add_comment(program.offset(), "group by finished"); - let termination_label = - m.termination_label_stack[m.termination_label_stack.len() - 2]; - program.emit_insn_with_label_dependency( - Insn::Goto { - target_pc: termination_label, - }, - termination_label, - ); - program.emit_insn(Insn::Integer { - value: 1, - dest: group_by_metadata.abort_flag_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.resolve_label( - group_by_metadata.subroutine_accumulator_output_label, - program.offset(), - ); - - program.add_comment( - program.offset(), - "output group by row subroutine start", - ); - let termination_label = *m.termination_label_stack.last().unwrap(); - program.emit_insn_with_label_dependency( - Insn::IfPos { - reg: group_by_metadata.data_in_accumulator_indicator_register, - target_pc: termination_label, - decrement_by: 0, - }, - termination_label, - ); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - return Ok(OpStepResult::ReadyToEmit); - } - GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_output_return_offset_register, - }); - - program.add_comment( - program.offset(), - "clear accumulator subroutine start", - ); - program.resolve_label( - group_by_metadata.subroutine_accumulator_clear_label, - program.offset(), - ); - let start_reg = group_by_metadata.group_exprs_accumulator_register; - program.emit_insn(Insn::Null { - dest: start_reg, - dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), - }); - - program.emit_insn(Insn::Integer { - value: 0, - dest: group_by_metadata.data_in_accumulator_indicator_register, - }); - program.emit_insn(Insn::Return { - return_reg: group_by_metadata - .subroutine_accumulator_clear_return_offset_register, - }); - } - _ => { - return Ok(OpStepResult::Done); - } - } - } - - // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t - - const AGGREGATE_INIT: usize = 1; - const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; - match *step { - AGGREGATE_INIT => { - let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); - let num_aggs = aggregates.len(); - let start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers.insert(*id, start_reg); - - Ok(OpStepResult::Continue) - } - AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - let start_reg = m.aggregation_start_registers.get(id).unwrap(); - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = start_reg + i; - translate_aggregation( - program, - referenced_tables, - agg, - agg_result_reg, - None, - )?; - } - } - OpStepResult::Done => { - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - _ => Ok(OpStepResult::Done), - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { source, step, .. } => { - *step += 1; - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::ReadyToEmit); - } - OpStepResult::Done => return Ok(OpStepResult::Done), - } - } - } - Operator::Order { - id, - source, - key, - step, - } => { - *step += 1; - const ORDER_INIT: usize = 1; - const ORDER_INSERT_INTO_SORTER: usize = 2; - const ORDER_SORT_AND_OPEN_LOOP: usize = 3; - const ORDER_NEXT: usize = 4; - match *step { - ORDER_INIT => { - m.termination_label_stack.push(program.allocate_label()); - let sort_cursor = program.alloc_cursor_id(None, None); - m.sorts.insert( - *id, - SortMetadata { - sort_cursor, - pseudo_table_cursor: usize::MAX, // will be set later - sorter_data_register: program.alloc_register(), - sorter_data_label: program.allocate_label(), - done_label: program.allocate_label(), - }, - ); - let mut order = Vec::new(); - for (_, direction) in key.iter() { - order.push(OwnedValue::Integer(*direction as i64)); - } - program.emit_insn(Insn::SorterOpen { - cursor_id: sort_cursor, - columns: key.len(), - order: OwnedRecord::new(order), - }); - - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit => { - return Ok(OpStepResult::Continue); - } - OpStepResult::Done => { - return Ok(OpStepResult::Done); - } - } - } - } - ORDER_INSERT_INTO_SORTER => { - let sort_keys_count = key.len(); - let source_cols_count = source.column_count(referenced_tables); - let start_reg = program.alloc_registers(sort_keys_count); - source.result_columns(program, referenced_tables, m, None)?; - - for (i, (expr, _)) in key.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - m.expr_result_cache - .get_cached_result_registers(*id, i) - .as_ref(), - )?; - } - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - program.emit_insn(Insn::MakeRecord { - start_reg, - count: sort_keys_count + source_cols_count, - dest_reg: sort_metadata.sorter_data_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_metadata.sort_cursor, - record_reg: sort_metadata.sorter_data_register, - }); - - Ok(OpStepResult::Continue) - } - #[allow(clippy::never_loop)] - ORDER_SORT_AND_OPEN_LOOP => { - loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => { - break; - } - _ => unreachable!(), - } - } - program.resolve_label( - m.termination_label_stack.pop().unwrap(), - program.offset(), - ); - let column_names = source.column_names(); - let mut pseudo_columns = vec![]; - for (i, _) in key.iter().enumerate() { - pseudo_columns.push(Column { - name: format!("sort_key_{}", i), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - for name in column_names { - pseudo_columns.push(Column { - name: name.clone(), - primary_key: false, - ty: crate::schema::Type::Null, - }); - } - - let num_fields = pseudo_columns.len(); - - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); - let sort_metadata = m.sorts.get(id).unwrap(); - - program.emit_insn(Insn::OpenPseudo { - cursor_id: pseudo_cursor, - content_reg: sort_metadata.sorter_data_register, - num_fields, - }); - - program.emit_insn_with_label_dependency( - Insn::SorterSort { - cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sort_metadata.done_label, - }, - sort_metadata.done_label, - ); - - program.defer_label_resolution( - sort_metadata.sorter_data_label, - program.offset() as usize, - ); - program.emit_insn(Insn::SorterData { - cursor_id: sort_metadata.sort_cursor, - dest_reg: sort_metadata.sorter_data_register, - pseudo_cursor, - }); - - let sort_metadata = m.sorts.get_mut(id).unwrap(); - - sort_metadata.pseudo_table_cursor = pseudo_cursor; - - Ok(OpStepResult::ReadyToEmit) - } - ORDER_NEXT => { - let sort_metadata = m.sorts.get(id).unwrap(); - program.emit_insn_with_label_dependency( - Insn::SorterNext { - cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.sorter_data_label, - }, - sort_metadata.sorter_data_label, - ); - - program.resolve_label(sort_metadata.done_label, program.offset()); - - Ok(OpStepResult::Done) - } - _ => unreachable!(), - } - } - Operator::Projection { source, step, .. } => { - *step += 1; - const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; - const PROJECTION_FINALIZE_SOURCE: usize = 2; - match *step { - PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { - match source.step(program, m, referenced_tables)? { - OpStepResult::Continue => continue, - OpStepResult::ReadyToEmit | OpStepResult::Done => { - if matches!(**source, Operator::Aggregate { .. }) { - source.result_columns(program, referenced_tables, m, None)?; - } - return Ok(OpStepResult::ReadyToEmit); - } - } - }, - PROJECTION_FINALIZE_SOURCE => { - match source.step(program, m, referenced_tables)? { - OpStepResult::Done => Ok(OpStepResult::Done), - _ => unreachable!(), - } - } - _ => Ok(OpStepResult::Done), - } - } - Operator::Nothing => Ok(OpStepResult::Done), - } - } - fn result_columns( - &self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result { - let col_count = self.column_count(referenced_tables); - match self { - Operator::Scan { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Search { - table_reference, .. - } => { - let start_reg = program.alloc_registers(col_count); - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - - Ok(start_reg) - } - Operator::Join { left, right, .. } => { - let left_start_reg = - left.result_columns(program, referenced_tables, m, cursor_override)?; - right.result_columns(program, referenced_tables, m, cursor_override)?; - - Ok(left_start_reg) - } - Operator::Aggregate { - id, - aggregates, - group_by, - .. - } => { - let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); - program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); - let mut result_column_idx = 0; - for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = *agg_start_reg + i; - program.emit_insn(Insn::AggFinal { - register: agg_result_reg, - func: agg.func.clone(), - }); - m.expr_result_cache.cache_result_register( - *id, - result_column_idx, - agg_result_reg, - agg.original_expr.clone(), - ); - result_column_idx += 1; - } - - if let Some(group_by) = group_by { - let output_row_start_reg = - program.alloc_registers(aggregates.len() + group_by.len()); - let group_by_metadata = m.group_bys.get(id).unwrap(); - program.emit_insn(Insn::Copy { - src_reg: group_by_metadata.group_exprs_accumulator_register, - dst_reg: output_row_start_reg, - amount: group_by.len() - 1, - }); - for (i, source_expr) in group_by.iter().enumerate() { - m.expr_result_cache.cache_result_register( - *id, - result_column_idx + i, - output_row_start_reg + i, - source_expr.clone(), - ); - } - program.emit_insn(Insn::Copy { - src_reg: *agg_start_reg, - dst_reg: output_row_start_reg + group_by.len(), - amount: aggregates.len() - 1, - }); - - Ok(output_row_start_reg) - } else { - Ok(*agg_start_reg) - } - } - Operator::Filter { .. } => unreachable!("predicates have been pushed down"), - Operator::Limit { .. } => { - unimplemented!() - } - Operator::Order { id, key, .. } => { - let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; - let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); - let start_column_offset = key.len(); - let column_count = pseudo_table.columns().len() - start_column_offset; - let start_reg = program.alloc_registers(column_count); - translate_table_columns( - program, - cursor_id, - &pseudo_table, - start_column_offset, - start_reg, - ); - - Ok(start_reg) - } - Operator::Projection { - expressions, id, .. - } => { - let expr_count = expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(); - let start_reg = program.alloc_registers(expr_count); - let mut cur_reg = start_reg; - for expr in expressions { - match expr { - ProjectionColumn::Column(expr) => { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - cursor_override.map(|c| c.cursor_id), - m.expr_result_cache - .get_cached_result_registers(*id, cur_reg - start_reg) - .as_ref(), - )?; - m.expr_result_cache.cache_result_register( - *id, - cur_reg - start_reg, - cur_reg, - expr.clone(), - ); - cur_reg += 1; - } - ProjectionColumn::Star => { - for table_reference in referenced_tables.iter() { - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program.resolve_cursor_id( - &table_reference.table_identifier, - None, - ) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - ProjectionColumn::TableStar(table_reference) => { - let table_ref = referenced_tables - .iter() - .find(|t| t.table_identifier == table_reference.table_identifier) - .unwrap(); - - let table = cursor_override - .map(|c| c.pseudo_table.clone()) - .unwrap_or_else(|| Table::BTree(table_ref.table.clone())); - let cursor_id = - cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { - program - .resolve_cursor_id(&table_reference.table_identifier, None) - }); - let start_column_offset = - cursor_override.map(|c| c.sort_key_len).unwrap_or(0); - cur_reg = translate_table_columns( - program, - cursor_id, - &table, - start_column_offset, - cur_reg, - ); - } - } - } - - Ok(start_reg) - } - Operator::Nothing => unimplemented!(), - } - } - fn result_row( - &mut self, - program: &mut ProgramBuilder, - referenced_tables: &[BTreeTableReference], - m: &mut Metadata, - cursor_override: Option<&SortCursorOverride>, - ) -> Result<()> { - match self { - Operator::Limit { source, limit, .. } => { - source.result_row(program, referenced_tables, m, cursor_override)?; - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: *limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - let jump_label = m.termination_label_stack.first().unwrap(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - - Ok(()) - } - operator => { - let start_reg = - operator.result_columns(program, referenced_tables, m, cursor_override)?; - program.emit_insn(Insn::ResultRow { - start_reg, - count: operator.column_count(referenced_tables), - }); - Ok(()) - } - } - } -} - -fn prologue( - cache: ExpressionResultCache, -) -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { +// /// Emitters return one of three possible results from the step() method: +// /// - Continue: the operator is not yet ready to emit a result row +// /// - ReadyToEmit: the operator is ready to emit a result row +// /// - Done: the operator has completed execution +// /// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. +// /// At that point, it will return ReadyToEmit. +// /// Finally, when the Scan operator has emitted a Next instruction, it will return Done. +// /// +// /// Parent operators are free to make decisions based on the result a child operator's step() method. +// /// +// /// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. +// /// When the root operator returns Done, the bytecode plan is complete. +// #[derive(Debug, PartialEq)] +// pub enum OpStepResult { +// Continue, +// ReadyToEmit, +// Done, +// } + +// impl Emitter for SourceOperator { +// fn step( +// &mut self, +// program: &mut ProgramBuilder, +// m: &mut Metadata, +// referenced_tables: &[BTreeTableReference], +// ) -> Result { +// let current_operator_column_count = self.column_count(referenced_tables); +// match self { +// SourceOperator::Scan { +// table_reference, +// id, +// step, +// predicates, +// iter_dir, +// } => { +// *step += 1; +// const SCAN_OPEN_READ: usize = 1; +// const SCAN_BODY: usize = 2; +// const SCAN_NEXT: usize = 3; +// let reverse = iter_dir +// .as_ref() +// .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); +// match *step { +// SCAN_OPEN_READ => { +// let cursor_id = program.alloc_cursor_id( +// Some(table_reference.table_identifier.clone()), +// Some(Table::BTree(table_reference.table.clone())), +// ); +// let root_page = table_reference.table.root_page; +// let next_row_label = program.allocate_label(); +// m.next_row_labels.insert(*id, next_row_label); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id, +// root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); + +// Ok(OpStepResult::Continue) +// } +// SCAN_BODY => { +// let cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); +// if reverse { +// program.emit_insn(Insn::LastAsync { cursor_id }); +// } else { +// program.emit_insn(Insn::RewindAsync { cursor_id }); +// } +// let scan_loop_body_label = program.allocate_label(); +// let halt_label = m.termination_label_stack.last().unwrap(); +// program.emit_insn_with_label_dependency( +// if reverse { +// Insn::LastAwait { +// cursor_id, +// pc_if_empty: *halt_label, +// } +// } else { +// Insn::RewindAwait { +// cursor_id, +// pc_if_empty: *halt_label, +// } +// }, +// *halt_label, +// ); +// m.scan_loop_body_labels.push(scan_loop_body_label); +// program.defer_label_resolution( +// scan_loop_body_label, +// program.offset() as usize, +// ); + +// let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); +// if let Some(preds) = predicates { +// for expr in preds { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false: *jump_label, +// }; +// translate_condition_expr( +// program, +// referenced_tables, +// expr, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// program.resolve_label(jump_target_when_true, program.offset()); +// } +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// SCAN_NEXT => { +// let cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); +// program +// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); +// if reverse { +// program.emit_insn(Insn::PrevAsync { cursor_id }); +// } else { +// program.emit_insn(Insn::NextAsync { cursor_id }); +// } +// let jump_label = m.scan_loop_body_labels.pop().unwrap(); + +// if reverse { +// program.emit_insn_with_label_dependency( +// Insn::PrevAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// } else { +// program.emit_insn_with_label_dependency( +// Insn::NextAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// } +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Search { +// table_reference, +// search, +// predicates, +// step, +// id, +// .. +// } => { +// *step += 1; +// const SEARCH_OPEN_READ: usize = 1; +// const SEARCH_BODY: usize = 2; +// const SEARCH_NEXT: usize = 3; +// match *step { +// SEARCH_OPEN_READ => { +// let table_cursor_id = program.alloc_cursor_id( +// Some(table_reference.table_identifier.clone()), +// Some(Table::BTree(table_reference.table.clone())), +// ); + +// let next_row_label = program.allocate_label(); + +// if !matches!(search, Search::PrimaryKeyEq { .. }) { +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. +// m.next_row_labels.insert(*id, next_row_label); +// } + +// let scan_loop_body_label = program.allocate_label(); +// m.scan_loop_body_labels.push(scan_loop_body_label); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id: table_cursor_id, +// root_page: table_reference.table.root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); + +// if let Search::IndexSearch { index, .. } = search { +// let index_cursor_id = program.alloc_cursor_id( +// Some(index.name.clone()), +// Some(Table::Index(index.clone())), +// ); +// program.emit_insn(Insn::OpenReadAsync { +// cursor_id: index_cursor_id, +// root_page: index.root_page, +// }); +// program.emit_insn(Insn::OpenReadAwait); +// } +// Ok(OpStepResult::Continue) +// } +// SEARCH_BODY => { +// let table_cursor_id = +// program.resolve_cursor_id(&table_reference.table_identifier, None); + +// // Open the loop for the index search. +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. +// if !matches!(search, Search::PrimaryKeyEq { .. }) { +// let index_cursor_id = if let Search::IndexSearch { index, .. } = search +// { +// Some(program.resolve_cursor_id(&index.name, None)) +// } else { +// None +// }; +// let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); +// let cmp_reg = program.alloc_register(); +// let (cmp_expr, cmp_op) = match search { +// Search::IndexSearch { +// cmp_expr, cmp_op, .. +// } => (cmp_expr, cmp_op), +// Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), +// Search::PrimaryKeyEq { .. } => unreachable!(), +// }; +// // TODO this only handles ascending indexes +// match cmp_op { +// ast::Operator::Equals +// | ast::Operator::Greater +// | ast::Operator::GreaterEquals => { +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// cmp_reg, +// None, +// m.result_set_register_start, +// )?; +// } +// ast::Operator::Less | ast::Operator::LessEquals => { +// program.emit_insn(Insn::Null { +// dest: cmp_reg, +// dest_end: None, +// }); +// } +// _ => unreachable!(), +// } +// program.emit_insn_with_label_dependency( +// match cmp_op { +// ast::Operator::Equals | ast::Operator::GreaterEquals => { +// Insn::SeekGE { +// is_index: index_cursor_id.is_some(), +// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: *m.termination_label_stack.last().unwrap(), +// } +// } +// ast::Operator::Greater +// | ast::Operator::Less +// | ast::Operator::LessEquals => Insn::SeekGT { +// is_index: index_cursor_id.is_some(), +// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: *m.termination_label_stack.last().unwrap(), +// }, +// _ => unreachable!(), +// }, +// *m.termination_label_stack.last().unwrap(), +// ); +// if *cmp_op == ast::Operator::Less +// || *cmp_op == ast::Operator::LessEquals +// { +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// cmp_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// program.defer_label_resolution( +// scan_loop_body_label, +// program.offset() as usize, +// ); +// // TODO: We are currently only handling ascending indexes. +// // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. +// // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. +// // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. +// // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. +// // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. +// // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. +// // +// // For primary key searches we emit RowId and then compare it to the seek value. + +// let abort_jump_target = *m +// .next_row_labels +// .get(id) +// .unwrap_or(m.termination_label_stack.last().unwrap()); +// match cmp_op { +// ast::Operator::Equals | ast::Operator::LessEquals => { +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn_with_label_dependency( +// Insn::IdxGT { +// cursor_id: index_cursor_id, +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } else { +// let rowid_reg = program.alloc_register(); +// program.emit_insn(Insn::RowId { +// cursor_id: table_cursor_id, +// dest: rowid_reg, +// }); +// program.emit_insn_with_label_dependency( +// Insn::Gt { +// lhs: rowid_reg, +// rhs: cmp_reg, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } +// } +// ast::Operator::Less => { +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn_with_label_dependency( +// Insn::IdxGE { +// cursor_id: index_cursor_id, +// start_reg: cmp_reg, +// num_regs: 1, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } else { +// let rowid_reg = program.alloc_register(); +// program.emit_insn(Insn::RowId { +// cursor_id: table_cursor_id, +// dest: rowid_reg, +// }); +// program.emit_insn_with_label_dependency( +// Insn::Ge { +// lhs: rowid_reg, +// rhs: cmp_reg, +// target_pc: abort_jump_target, +// }, +// abort_jump_target, +// ); +// } +// } +// _ => {} +// } + +// if let Some(index_cursor_id) = index_cursor_id { +// program.emit_insn(Insn::DeferredSeek { +// index_cursor_id, +// table_cursor_id, +// }); +// } +// } + +// let jump_label = m +// .next_row_labels +// .get(id) +// .unwrap_or(m.termination_label_stack.last().unwrap()); + +// if let Search::PrimaryKeyEq { cmp_expr } = search { +// let src_reg = program.alloc_register(); +// translate_expr( +// program, +// Some(referenced_tables), +// cmp_expr, +// src_reg, +// None, +// m.result_set_register_start, +// )?; +// program.emit_insn_with_label_dependency( +// Insn::SeekRowid { +// cursor_id: table_cursor_id, +// src_reg, +// target_pc: *jump_label, +// }, +// *jump_label, +// ); +// } +// if let Some(predicates) = predicates { +// for predicate in predicates.iter() { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false: *jump_label, +// }; +// translate_condition_expr( +// program, +// referenced_tables, +// predicate, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// program.resolve_label(jump_target_when_true, program.offset()); +// } +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// SEARCH_NEXT => { +// if matches!(search, Search::PrimaryKeyEq { .. }) { +// // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. +// return Ok(OpStepResult::Done); +// } +// let cursor_id = match search { +// Search::IndexSearch { index, .. } => { +// program.resolve_cursor_id(&index.name, None) +// } +// Search::PrimaryKeySearch { .. } => { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// } +// Search::PrimaryKeyEq { .. } => unreachable!(), +// }; +// program +// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); +// program.emit_insn(Insn::NextAsync { cursor_id }); +// let jump_label = m.scan_loop_body_labels.pop().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::NextAwait { +// cursor_id, +// pc_if_next: jump_label, +// }, +// jump_label, +// ); +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Join { +// left, +// right, +// outer, +// predicates, +// step, +// id, +// .. +// } => { +// *step += 1; +// const JOIN_INIT: usize = 1; +// const JOIN_DO_JOIN: usize = 2; +// const JOIN_END: usize = 3; +// match *step { +// JOIN_INIT => { +// if *outer { +// let lj_metadata = LeftJoinMetadata { +// match_flag_register: program.alloc_register(), +// set_match_flag_true_label: program.allocate_label(), +// check_match_flag_label: program.allocate_label(), +// on_match_jump_to_label: program.allocate_label(), +// }; +// m.left_joins.insert(*id, lj_metadata); +// } +// left.step(program, m, referenced_tables)?; +// right.step(program, m, referenced_tables)?; + +// Ok(OpStepResult::Continue) +// } +// JOIN_DO_JOIN => { +// left.step(program, m, referenced_tables)?; + +// let mut jump_target_when_false = *m +// .next_row_labels +// .get(&right.id()) +// .or(m.next_row_labels.get(&left.id())) +// .unwrap_or(m.termination_label_stack.last().unwrap()); + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: lj_meta.match_flag_register, +// }); +// jump_target_when_false = lj_meta.check_match_flag_label; +// } +// m.next_row_labels.insert(right.id(), jump_target_when_false); + +// right.step(program, m, referenced_tables)?; + +// if let Some(predicates) = predicates { +// let jump_target_when_true = program.allocate_label(); +// let condition_metadata = ConditionMetadata { +// jump_if_condition_is_true: false, +// jump_target_when_true, +// jump_target_when_false, +// }; +// for predicate in predicates.iter() { +// translate_condition_expr( +// program, +// referenced_tables, +// predicate, +// None, +// condition_metadata, +// m.result_set_register_start, +// )?; +// } +// program.resolve_label(jump_target_when_true, program.offset()); +// } + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// program.defer_label_resolution( +// lj_meta.set_match_flag_true_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: lj_meta.match_flag_register, +// }); +// } + +// Ok(OpStepResult::ReadyToEmit) +// } +// JOIN_END => { +// right.step(program, m, referenced_tables)?; + +// if *outer { +// let lj_meta = m.left_joins.get(id).unwrap(); +// // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) +// program.resolve_label(lj_meta.check_match_flag_label, program.offset()); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: lj_meta.match_flag_register, +// target_pc: lj_meta.on_match_jump_to_label, +// decrement_by: 0, +// }, +// lj_meta.on_match_jump_to_label, +// ); +// // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL +// let right_cursor_id = match right.as_ref() { +// SourceOperator::Scan { +// table_reference, .. +// } => program +// .resolve_cursor_id(&table_reference.table_identifier, None), +// SourceOperator::Search { +// table_reference, .. +// } => program +// .resolve_cursor_id(&table_reference.table_identifier, None), +// _ => unreachable!(), +// }; +// program.emit_insn(Insn::NullRow { +// cursor_id: right_cursor_id, +// }); +// // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null +// program.emit_insn_with_label_dependency( +// Insn::Goto { +// target_pc: lj_meta.set_match_flag_true_label, +// }, +// lj_meta.set_match_flag_true_label, +// ); +// } +// let next_row_label = if *outer { +// m.left_joins.get(id).unwrap().on_match_jump_to_label +// } else { +// *m.next_row_labels.get(&right.id()).unwrap() +// }; +// // This points to the NextAsync instruction of the left table +// program.resolve_label(next_row_label, program.offset()); +// left.step(program, m, referenced_tables)?; + +// Ok(OpStepResult::Done) +// } +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Projection { +// id, +// source, +// expressions, +// aggregates, +// group_by, +// step, +// .. +// } => { +// *step += 1; + +// if !aggregates.is_empty() && group_by.is_none() { +// const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; +// const PROJECTION_FINALIZE_SOURCE: usize = 2; +// match *step { +// PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit | OpStepResult::Done => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// } +// }, +// PROJECTION_FINALIZE_SOURCE => { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => return Ok(OpStepResult::Done), +// _ => unreachable!(), +// } +// } +// _ => return Ok(OpStepResult::Done), +// } +// } + +// // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b +// if let Some(group_by) = group_by { +// const GROUP_BY_INIT: usize = 1; +// const GROUP_BY_INSERT_INTO_SORTER: usize = 2; +// const GROUP_BY_SORT_AND_COMPARE: usize = 3; +// const GROUP_BY_PREPARE_ROW: usize = 4; +// const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; +// match *step { +// GROUP_BY_INIT => { +// let agg_final_label = program.allocate_label(); +// m.termination_label_stack.push(agg_final_label); +// let num_aggs = aggregates.len(); + +// let sort_cursor = program.alloc_cursor_id(None, None); + +// let abort_flag_register = program.alloc_register(); +// let data_in_accumulator_indicator_register = program.alloc_register(); +// let group_exprs_comparison_register = +// program.alloc_registers(group_by.len()); +// let group_exprs_accumulator_register = +// program.alloc_registers(group_by.len()); +// let agg_exprs_start_reg = program.alloc_registers(num_aggs); +// m.aggregation_start_registers +// .insert(*id, agg_exprs_start_reg); +// let sorter_key_register = program.alloc_register(); + +// let subroutine_accumulator_clear_label = program.allocate_label(); +// let subroutine_accumulator_output_label = program.allocate_label(); +// let sorter_data_label = program.allocate_label(); +// let grouping_done_label = program.allocate_label(); + +// let mut order = Vec::new(); +// const ASCENDING: i64 = 0; +// for _ in group_by.iter() { +// order.push(OwnedValue::Integer(ASCENDING)); +// } +// program.emit_insn(Insn::SorterOpen { +// cursor_id: sort_cursor, +// columns: current_operator_column_count, +// order: OwnedRecord::new(order), +// }); + +// program.add_comment(program.offset(), "clear group by abort flag"); +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: abort_flag_register, +// }); + +// program.add_comment( +// program.offset(), +// "initialize group by comparison registers to NULL", +// ); +// program.emit_insn(Insn::Null { +// dest: group_exprs_comparison_register, +// dest_end: if group_by.len() > 1 { +// Some(group_exprs_comparison_register + group_by.len() - 1) +// } else { +// None +// }, +// }); + +// program.add_comment( +// program.offset(), +// "go to clear accumulator subroutine", +// ); + +// let subroutine_accumulator_clear_return_offset_register = +// program.alloc_register(); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_clear_label, +// return_reg: subroutine_accumulator_clear_return_offset_register, +// }, +// subroutine_accumulator_clear_label, +// ); + +// m.group_bys.insert( +// *id, +// GroupByMetadata { +// sort_cursor, +// subroutine_accumulator_clear_label, +// subroutine_accumulator_clear_return_offset_register, +// subroutine_accumulator_output_label, +// subroutine_accumulator_output_return_offset_register: program +// .alloc_register(), +// accumulator_indicator_set_true_label: program.allocate_label(), +// sorter_data_label, +// grouping_done_label, +// abort_flag_register, +// data_in_accumulator_indicator_register, +// group_exprs_accumulator_register, +// group_exprs_comparison_register, +// sorter_key_register, +// }, +// ); + +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::Continue); +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::Done); +// } +// } +// } +// } +// GROUP_BY_INSERT_INTO_SORTER => { +// let sort_keys_count = group_by.len(); +// let start_reg = program.alloc_registers(current_operator_column_count); +// for (i, expr) in group_by.iter().enumerate() { +// let key_reg = start_reg + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// key_reg, +// None, +// m.result_set_register_start, +// )?; +// } +// for (i, agg) in aggregates.iter().enumerate() { +// // TODO it's a hack to assume aggregate functions have exactly one argument. +// // Counterpoint e.g. GROUP_CONCAT(expr, separator). +// // +// // Here we are collecting scalars for the group by sorter, which will include +// // both the group by expressions and the aggregate arguments. +// // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` +// // the sorter will have two scalars: u.first_name and u.age. +// // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. +// // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. +// // +// // This is why we take the first argument of each aggregate function currently. +// // It's mostly an artifact of the current architecture being a bit poor; we should recognize +// // which scalars are dependencies of aggregate functions and explicitly collect those. +// let expr = &agg.args[0]; +// let agg_reg = start_reg + sort_keys_count + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// agg_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// let group_by_metadata = m.group_bys.get(id).unwrap(); + +// program.emit_insn(Insn::MakeRecord { +// start_reg, +// count: current_operator_column_count, +// dest_reg: group_by_metadata.sorter_key_register, +// }); + +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::SorterInsert { +// cursor_id: group_by_metadata.sort_cursor, +// record_reg: group_by_metadata.sorter_key_register, +// }); + +// return Ok(OpStepResult::Continue); +// } +// #[allow(clippy::never_loop)] +// GROUP_BY_SORT_AND_COMPARE => { +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => { +// break; +// } +// _ => unreachable!(), +// } +// } + +// let group_by_metadata = m.group_bys.get_mut(id).unwrap(); + +// let GroupByMetadata { +// group_exprs_comparison_register: comparison_register, +// subroutine_accumulator_output_return_offset_register, +// subroutine_accumulator_output_label, +// subroutine_accumulator_clear_return_offset_register, +// subroutine_accumulator_clear_label, +// data_in_accumulator_indicator_register, +// accumulator_indicator_set_true_label, +// group_exprs_accumulator_register: group_exprs_start_register, +// abort_flag_register, +// sorter_key_register, +// .. +// } = *group_by_metadata; +// let halt_label = *m.termination_label_stack.first().unwrap(); + +// let mut column_names = +// Vec::with_capacity(current_operator_column_count); +// for expr in group_by +// .iter() +// .chain(aggregates.iter().map(|agg| &agg.args[0])) +// // FIXME: just blindly taking the first arg is a hack +// { +// // Sorter column names for group by are now just determined by stringifying the expression, since the group by +// // columns and aggregations can be practically anything. +// // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle +// // things like `count(1)` and `COUNT(1)` the same way +// column_names.push(expr.to_string()); +// } +// let pseudo_columns = column_names +// .iter() +// .map(|name| Column { +// name: name.clone(), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }) +// .collect::>(); + +// let pseudo_table = Rc::new(PseudoTable { +// columns: pseudo_columns, +// }); + +// let pseudo_cursor = program +// .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); + +// program.emit_insn(Insn::OpenPseudo { +// cursor_id: pseudo_cursor, +// content_reg: sorter_key_register, +// num_fields: current_operator_column_count, +// }); + +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterSort { +// cursor_id: group_by_metadata.sort_cursor, +// pc_if_empty: group_by_metadata.grouping_done_label, +// }, +// group_by_metadata.grouping_done_label, +// ); + +// program.defer_label_resolution( +// group_by_metadata.sorter_data_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::SorterData { +// cursor_id: group_by_metadata.sort_cursor, +// dest_reg: group_by_metadata.sorter_key_register, +// pseudo_cursor, +// }); + +// let groups_start_reg = program.alloc_registers(group_by.len()); +// for (i, expr) in group_by.iter().enumerate() { +// let sorter_column_index = +// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; +// let group_reg = groups_start_reg + i; +// program.emit_insn(Insn::Column { +// cursor_id: pseudo_cursor, +// column: sorter_column_index, +// dest: group_reg, +// }); +// } + +// program.emit_insn(Insn::Compare { +// start_reg_a: comparison_register, +// start_reg_b: groups_start_reg, +// count: group_by.len(), +// }); + +// let agg_step_label = program.allocate_label(); + +// program.add_comment( +// program.offset(), +// "start new group if comparison is not equal", +// ); +// program.emit_insn_with_label_dependency( +// Insn::Jump { +// target_pc_lt: program.offset() + 1, +// target_pc_eq: agg_step_label, +// target_pc_gt: program.offset() + 1, +// }, +// agg_step_label, +// ); + +// program.emit_insn(Insn::Move { +// source_reg: groups_start_reg, +// dest_reg: comparison_register, +// count: group_by.len(), +// }); + +// program.add_comment( +// program.offset(), +// "check if ended group had data, and output if so", +// ); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_output_label, +// return_reg: +// subroutine_accumulator_output_return_offset_register, +// }, +// subroutine_accumulator_output_label, +// ); + +// program.add_comment(program.offset(), "check abort flag"); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: abort_flag_register, +// target_pc: halt_label, +// decrement_by: 0, +// }, +// m.termination_label_stack[0], +// ); + +// program +// .add_comment(program.offset(), "goto clear accumulator subroutine"); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: subroutine_accumulator_clear_label, +// return_reg: subroutine_accumulator_clear_return_offset_register, +// }, +// subroutine_accumulator_clear_label, +// ); + +// program.resolve_label(agg_step_label, program.offset()); +// let start_reg = m.aggregation_start_registers.get(id).unwrap(); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = start_reg + i; +// translate_aggregation( +// program, +// referenced_tables, +// agg, +// agg_result_reg, +// Some(pseudo_cursor), +// )?; +// } + +// program.add_comment( +// program.offset(), +// "don't emit group columns if continuing existing group", +// ); +// program.emit_insn_with_label_dependency( +// Insn::If { +// target_pc: accumulator_indicator_set_true_label, +// reg: data_in_accumulator_indicator_register, +// null_reg: 0, // unused in this case +// }, +// accumulator_indicator_set_true_label, +// ); + +// for (i, expr) in group_by.iter().enumerate() { +// let key_reg = group_exprs_start_register + i; +// let sorter_column_index = +// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; +// program.emit_insn(Insn::Column { +// cursor_id: pseudo_cursor, +// column: sorter_column_index, +// dest: key_reg, +// }); +// } + +// program.resolve_label( +// accumulator_indicator_set_true_label, +// program.offset(), +// ); +// program.add_comment(program.offset(), "indicate data in accumulator"); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: data_in_accumulator_indicator_register, +// }); + +// return Ok(OpStepResult::Continue); +// } +// GROUP_BY_PREPARE_ROW => { +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterNext { +// cursor_id: group_by_metadata.sort_cursor, +// pc_if_next: group_by_metadata.sorter_data_label, +// }, +// group_by_metadata.sorter_data_label, +// ); + +// program.resolve_label( +// group_by_metadata.grouping_done_label, +// program.offset(), +// ); + +// program.add_comment(program.offset(), "emit row for final group"); +// program.emit_insn_with_label_dependency( +// Insn::Gosub { +// target_pc: group_by_metadata +// .subroutine_accumulator_output_label, +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }, +// group_by_metadata.subroutine_accumulator_output_label, +// ); + +// program.add_comment(program.offset(), "group by finished"); +// let termination_label = +// m.termination_label_stack[m.termination_label_stack.len() - 2]; +// program.emit_insn_with_label_dependency( +// Insn::Goto { +// target_pc: termination_label, +// }, +// termination_label, +// ); +// program.emit_insn(Insn::Integer { +// value: 1, +// dest: group_by_metadata.abort_flag_register, +// }); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// program.resolve_label( +// group_by_metadata.subroutine_accumulator_output_label, +// program.offset(), +// ); + +// program.add_comment( +// program.offset(), +// "output group by row subroutine start", +// ); +// let termination_label = *m.termination_label_stack.last().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::IfPos { +// reg: group_by_metadata.data_in_accumulator_indicator_register, +// target_pc: termination_label, +// decrement_by: 0, +// }, +// termination_label, +// ); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// return Ok(OpStepResult::ReadyToEmit); +// } +// GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_output_return_offset_register, +// }); + +// program.add_comment( +// program.offset(), +// "clear accumulator subroutine start", +// ); +// program.resolve_label( +// group_by_metadata.subroutine_accumulator_clear_label, +// program.offset(), +// ); +// let start_reg = group_by_metadata.group_exprs_accumulator_register; +// program.emit_insn(Insn::Null { +// dest: start_reg, +// dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), +// }); + +// program.emit_insn(Insn::Integer { +// value: 0, +// dest: group_by_metadata.data_in_accumulator_indicator_register, +// }); +// program.emit_insn(Insn::Return { +// return_reg: group_by_metadata +// .subroutine_accumulator_clear_return_offset_register, +// }); +// } +// _ => { +// return Ok(OpStepResult::Done); +// } +// } +// } + +// // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t + +// const AGGREGATE_INIT: usize = 1; +// const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; +// match *step { +// AGGREGATE_INIT => { +// let agg_final_label = program.allocate_label(); +// m.termination_label_stack.push(agg_final_label); +// let num_aggs = aggregates.len(); +// let start_reg = program.alloc_registers(num_aggs); +// m.aggregation_start_registers.insert(*id, start_reg); + +// Ok(OpStepResult::Continue) +// } +// AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => {} +// OpStepResult::ReadyToEmit => { +// let start_reg = m.aggregation_start_registers.get(id).unwrap(); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = start_reg + i; +// translate_aggregation( +// program, +// referenced_tables, +// agg, +// agg_result_reg, +// None, +// )?; +// } +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// } +// }, +// _ => Ok(OpStepResult::Done), +// } +// } +// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), +// SourceOperator::Limit { source, step, .. } => { +// *step += 1; +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::ReadyToEmit); +// } +// OpStepResult::Done => return Ok(OpStepResult::Done), +// } +// } +// } +// SourceOperator::Order { +// id, +// source, +// key, +// step, +// } => { +// *step += 1; +// const ORDER_INIT: usize = 1; +// const ORDER_INSERT_INTO_SORTER: usize = 2; +// const ORDER_SORT_AND_OPEN_LOOP: usize = 3; +// const ORDER_NEXT: usize = 4; +// match *step { +// ORDER_INIT => { +// m.termination_label_stack.push(program.allocate_label()); +// let sort_cursor = program.alloc_cursor_id(None, None); +// m.sorts.insert( +// *id, +// SortMetadata { +// sort_cursor, +// pseudo_table_cursor: usize::MAX, // will be set later +// sorter_data_register: program.alloc_register(), +// sorter_data_label: program.allocate_label(), +// done_label: program.allocate_label(), +// }, +// ); +// let mut order = Vec::new(); +// for (_, direction) in key.iter() { +// order.push(OwnedValue::Integer(*direction as i64)); +// } +// program.emit_insn(Insn::SorterOpen { +// cursor_id: sort_cursor, +// columns: key.len(), +// order: OwnedRecord::new(order), +// }); + +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Continue => continue, +// OpStepResult::ReadyToEmit => { +// return Ok(OpStepResult::Continue); +// } +// OpStepResult::Done => { +// return Ok(OpStepResult::Done); +// } +// } +// } +// } +// ORDER_INSERT_INTO_SORTER => { +// let sort_keys_count = key.len(); +// let source_cols_count = source.column_count(referenced_tables); +// let start_reg = program.alloc_registers(sort_keys_count); +// source.result_columns(program, referenced_tables, m, None)?; + +// for (i, (expr, _)) in key.iter().enumerate() { +// let key_reg = start_reg + i; +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// key_reg, +// None, +// m.result_set_register_start, +// )?; +// } + +// let sort_metadata = m.sorts.get_mut(id).unwrap(); +// program.emit_insn(Insn::MakeRecord { +// start_reg, +// count: sort_keys_count + source_cols_count, +// dest_reg: sort_metadata.sorter_data_register, +// }); + +// program.emit_insn(Insn::SorterInsert { +// cursor_id: sort_metadata.sort_cursor, +// record_reg: sort_metadata.sorter_data_register, +// }); + +// Ok(OpStepResult::Continue) +// } +// #[allow(clippy::never_loop)] +// ORDER_SORT_AND_OPEN_LOOP => { +// loop { +// match source.step(program, m, referenced_tables)? { +// OpStepResult::Done => { +// break; +// } +// _ => unreachable!(), +// } +// } +// program.resolve_label( +// m.termination_label_stack.pop().unwrap(), +// program.offset(), +// ); +// let column_names = source.column_names(); +// let mut pseudo_columns = vec![]; +// for (i, _) in key.iter().enumerate() { +// pseudo_columns.push(Column { +// name: format!("sort_key_{}", i), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }); +// } +// for name in column_names { +// pseudo_columns.push(Column { +// name: name.clone(), +// primary_key: false, +// ty: crate::schema::Type::Null, +// }); +// } + +// let num_fields = pseudo_columns.len(); + +// let pseudo_cursor = program.alloc_cursor_id( +// None, +// Some(Table::Pseudo(Rc::new(PseudoTable { +// columns: pseudo_columns, +// }))), +// ); +// let sort_metadata = m.sorts.get(id).unwrap(); + +// program.emit_insn(Insn::OpenPseudo { +// cursor_id: pseudo_cursor, +// content_reg: sort_metadata.sorter_data_register, +// num_fields, +// }); + +// program.emit_insn_with_label_dependency( +// Insn::SorterSort { +// cursor_id: sort_metadata.sort_cursor, +// pc_if_empty: sort_metadata.done_label, +// }, +// sort_metadata.done_label, +// ); + +// program.defer_label_resolution( +// sort_metadata.sorter_data_label, +// program.offset() as usize, +// ); +// program.emit_insn(Insn::SorterData { +// cursor_id: sort_metadata.sort_cursor, +// dest_reg: sort_metadata.sorter_data_register, +// pseudo_cursor, +// }); + +// let sort_metadata = m.sorts.get_mut(id).unwrap(); + +// sort_metadata.pseudo_table_cursor = pseudo_cursor; + +// Ok(OpStepResult::ReadyToEmit) +// } +// ORDER_NEXT => { +// let sort_metadata = m.sorts.get(id).unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::SorterNext { +// cursor_id: sort_metadata.sort_cursor, +// pc_if_next: sort_metadata.sorter_data_label, +// }, +// sort_metadata.sorter_data_label, +// ); + +// program.resolve_label(sort_metadata.done_label, program.offset()); + +// Ok(OpStepResult::Done) +// } +// _ => unreachable!(), +// } +// } +// SourceOperator::Nothing => Ok(OpStepResult::Done), +// } +// } +// fn result_columns( +// &self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// m: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result { +// let col_count = self.column_count(referenced_tables); +// match self { +// SourceOperator::Scan { +// table_reference, .. +// } => { +// let start_reg = program.alloc_registers(col_count); +// let table = cursor_override +// .map(|c| c.pseudo_table.clone()) +// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); +// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// }); +// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); +// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); + +// Ok(start_reg) +// } +// SourceOperator::Search { +// table_reference, .. +// } => { +// let start_reg = program.alloc_registers(col_count); +// let table = cursor_override +// .map(|c| c.pseudo_table.clone()) +// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); +// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { +// program.resolve_cursor_id(&table_reference.table_identifier, None) +// }); +// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); +// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); + +// Ok(start_reg) +// } +// SourceOperator::Join { left, right, .. } => { +// let left_start_reg = +// left.result_columns(program, referenced_tables, m, cursor_override)?; +// right.result_columns(program, referenced_tables, m, cursor_override)?; + +// Ok(left_start_reg) +// } +// SourceOperator::Projection { +// id, +// expressions, +// aggregates, +// group_by, +// .. +// } => { +// if aggregates.is_empty() && group_by.is_none() { +// let expr_count = expressions.len(); +// let start_reg = program.alloc_registers(expr_count); +// let mut cur_reg = start_reg; +// m.result_set_register_start = start_reg; +// for expr in expressions { +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// cur_reg, +// cursor_override.map(|c| c.cursor_id), +// m.result_set_register_start, +// )?; +// cur_reg += 1; +// } + +// return Ok(start_reg); +// } +// let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); +// program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); +// for (i, agg) in aggregates.iter().enumerate() { +// let agg_result_reg = *agg_start_reg + i; +// program.emit_insn(Insn::AggFinal { +// register: agg_result_reg, +// func: agg.func.clone(), +// }); +// } + +// if let Some(group_by) = group_by { +// let output_row_start_reg = +// program.alloc_registers(aggregates.len() + group_by.len()); +// let group_by_metadata = m.group_bys.get(id).unwrap(); +// program.emit_insn(Insn::Copy { +// src_reg: group_by_metadata.group_exprs_accumulator_register, +// dst_reg: output_row_start_reg, +// amount: group_by.len() - 1, +// }); +// program.emit_insn(Insn::Copy { +// src_reg: *agg_start_reg, +// dst_reg: output_row_start_reg + group_by.len(), +// amount: aggregates.len() - 1, +// }); + +// Ok(output_row_start_reg) +// } else { +// Ok(*agg_start_reg) +// } +// } +// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), +// SourceOperator::Limit { .. } => { +// unimplemented!() +// } +// SourceOperator::Order { id, key, .. } => { +// let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; +// let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); +// let start_column_offset = key.len(); +// let column_count = pseudo_table.columns().len() - start_column_offset; +// let start_reg = program.alloc_registers(column_count); +// translate_table_columns( +// program, +// cursor_id, +// &pseudo_table, +// start_column_offset, +// start_reg, +// ); + +// Ok(start_reg) +// } +// SourceOperator::Projection { +// expressions, id, .. +// } => { +// let expr_count = expressions.len(); +// let start_reg = program.alloc_registers(expr_count); +// let mut cur_reg = start_reg; +// m.result_set_register_start = start_reg; +// for expr in expressions { +// translate_expr( +// program, +// Some(referenced_tables), +// expr, +// cur_reg, +// cursor_override.map(|c| c.cursor_id), +// m.result_set_register_start, +// )?; +// cur_reg += 1; +// } + +// Ok(start_reg) +// } +// SourceOperator::Nothing => unimplemented!(), +// } +// } +// fn result_row( +// &mut self, +// program: &mut ProgramBuilder, +// referenced_tables: &[BTreeTableReference], +// m: &mut Metadata, +// cursor_override: Option<&SortCursorOverride>, +// ) -> Result<()> { +// match self { +// SourceOperator::Limit { source, limit, .. } => { +// source.result_row(program, referenced_tables, m, cursor_override)?; +// let limit_reg = program.alloc_register(); +// program.emit_insn(Insn::Integer { +// value: *limit as i64, +// dest: limit_reg, +// }); +// program.mark_last_insn_constant(); +// let jump_label = m.termination_label_stack.first().unwrap(); +// program.emit_insn_with_label_dependency( +// Insn::DecrJumpZero { +// reg: limit_reg, +// target_pc: *jump_label, +// }, +// *jump_label, +// ); + +// Ok(()) +// } +// operator => { +// let start_reg = +// operator.result_columns(program, referenced_tables, m, cursor_override)?; +// program.emit_insn(Insn::ResultRow { +// start_reg, +// count: operator.column_count(referenced_tables), +// }); +// Ok(()) +// } +// } +// } +// } + +fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); let halt_label = program.allocate_label(); @@ -1697,13 +1637,13 @@ fn prologue( let metadata = Metadata { termination_label_stack: vec![halt_label], - expr_result_cache: cache, aggregation_start_registers: HashMap::new(), group_bys: HashMap::new(), left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], sorts: HashMap::new(), + result_set_register_start: 0, }; Ok((program, metadata, init_label, start_offset)) @@ -1740,28 +1680,1195 @@ fn epilogue( pub fn emit_program( database_header: Rc>, mut plan: Plan, - cache: ExpressionResultCache, connection: Weak, ) -> Result { - let (mut program, mut metadata, init_label, start_offset) = prologue(cache)?; - loop { - match plan - .root_operator - .step(&mut program, &mut metadata, &plan.referenced_tables)? - { - OpStepResult::Continue => {} - OpStepResult::ReadyToEmit => { - plan.root_operator.result_row( - &mut program, - &plan.referenced_tables, - &mut metadata, - None, - )?; + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + let mut order_by_necessary = plan.order_by.is_some(); + + // OPEN CURSORS ETC + if let Some(ref mut order_by) = plan.order_by { + init_order_by(&mut program, order_by, &mut metadata)?; + } + + if let Some(ref mut group_by) = plan.group_by { + let aggregates = plan.aggregates.as_mut().unwrap(); + init_group_by(&mut program, group_by, aggregates, &mut metadata)?; + } + init_source(&mut program, &plan.source, &mut metadata)?; + + // REWIND CURSORS, EMIT CONDITIONS + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + // EMIT COLUMNS AND OTHER EXPRS IN INNER LOOP + inner_loop_emit(&mut program, &mut plan, &mut metadata)?; + + // CLOSE LOOP + close_loop( + &mut program, + &mut plan.source, + &mut metadata, + &plan.referenced_tables, + )?; + + // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION + if let Some(ref mut group_by) = plan.group_by { + sort_group_by(&mut program, group_by, &mut metadata)?; + finalize_group_by(&mut program, group_by, &mut metadata)?; + } else if let Some(ref mut aggregates) = plan.aggregates { + // Example: SELECT sum(x), count(*) FROM t; + finalize_agg_without_group_by(&mut program, aggregates, &mut metadata)?; + // If we have an aggregate without a group by, we don't need an order by because currently + // there can only be a single row result in those cases. + order_by_necessary = false; + } + + // IF ORDER BY, SORT BY ORDER BY + if let Some(ref mut order_by) = plan.order_by { + if order_by_necessary { + sort_order_by( + &mut program, + order_by, + &plan.result_columns, + plan.limit.clone(), + &mut metadata, + )?; + } + } + + // EPILOGUE + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + +const ORDER_BY_ID: usize = 0; +const GROUP_BY_ID: usize = 1; +const AGG_WITHOUT_GROUP_BY_ID: usize = 2; + +fn init_order_by( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + m: &mut Metadata, +) -> Result<()> { + m.termination_label_stack.push(program.allocate_label()); + let sort_cursor = program.alloc_cursor_id(None, None); + m.sorts.insert( + ORDER_BY_ID, + SortMetadata { + sort_cursor, + pseudo_table_cursor: usize::MAX, // will be set later + sorter_data_register: program.alloc_register(), + sorter_data_label: program.allocate_label(), + done_label: program.allocate_label(), + }, + ); + let mut order = Vec::new(); + for (_, direction) in order_by.iter() { + order.push(OwnedValue::Integer(*direction as i64)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: order_by.len(), + order: OwnedRecord::new(order), + }); + Ok(()) +} + +fn init_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + aggregates: &Vec, + m: &mut Metadata, +) -> Result<()> { + let agg_final_label = program.allocate_label(); + m.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + + let sort_cursor = program.alloc_cursor_id(None, None); + + let abort_flag_register = program.alloc_register(); + let data_in_accumulator_indicator_register = program.alloc_register(); + let group_exprs_comparison_register = program.alloc_registers(group_by.len()); + let group_exprs_accumulator_register = program.alloc_registers(group_by.len()); + let agg_exprs_start_reg = program.alloc_registers(num_aggs); + m.aggregation_start_registers + .insert(GROUP_BY_ID, agg_exprs_start_reg); + let sorter_key_register = program.alloc_register(); + + let subroutine_accumulator_clear_label = program.allocate_label(); + let subroutine_accumulator_output_label = program.allocate_label(); + let sorter_data_label = program.allocate_label(); + let grouping_done_label = program.allocate_label(); + + let mut order = Vec::new(); + const ASCENDING: i64 = 0; + for _ in group_by.iter() { + order.push(OwnedValue::Integer(ASCENDING)); + } + program.emit_insn(Insn::SorterOpen { + cursor_id: sort_cursor, + columns: aggregates.len() + group_by.len(), + order: OwnedRecord::new(order), + }); + + program.add_comment(program.offset(), "clear group by abort flag"); + program.emit_insn(Insn::Integer { + value: 0, + dest: abort_flag_register, + }); + + program.add_comment( + program.offset(), + "initialize group by comparison registers to NULL", + ); + program.emit_insn(Insn::Null { + dest: group_exprs_comparison_register, + dest_end: if group_by.len() > 1 { + Some(group_exprs_comparison_register + group_by.len() - 1) + } else { + None + }, + }); + + program.add_comment(program.offset(), "go to clear accumulator subroutine"); + + let subroutine_accumulator_clear_return_offset_register = program.alloc_register(); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_clear_label, + return_reg: subroutine_accumulator_clear_return_offset_register, + }, + subroutine_accumulator_clear_label, + ); + + m.group_bys.insert( + GROUP_BY_ID, + GroupByMetadata { + sort_cursor, + subroutine_accumulator_clear_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_output_return_offset_register: program.alloc_register(), + accumulator_indicator_set_true_label: program.allocate_label(), + sorter_data_label, + grouping_done_label, + abort_flag_register, + data_in_accumulator_indicator_register, + group_exprs_accumulator_register, + group_exprs_comparison_register, + sorter_key_register, + }, + ); + Ok(()) +} + +// fn init_agg_without_group_by( +// program: &mut ProgramBuilder, +// aggregates: &Vec, +// m: &mut Metadata, +// ) -> Result<()> { + +// Ok(()) +// } + +fn init_source( + program: &mut ProgramBuilder, + source: &SourceOperator, + m: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + if *outer { + let lj_metadata = LeftJoinMetadata { + match_flag_register: program.alloc_register(), + set_match_flag_true_label: program.allocate_label(), + check_match_flag_label: program.allocate_label(), + on_match_jump_to_label: program.allocate_label(), + }; + m.left_joins.insert(*id, lj_metadata); } - OpStepResult::Done => { - epilogue(&mut program, &mut metadata, init_label, start_offset)?; - return Ok(program.build(database_header, connection)); + init_source(program, left, m)?; + init_source(program, right, m)?; + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + .. + } => { + let cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + let root_page = table_reference.table.root_page; + let next_row_label = program.allocate_label(); + m.next_row_labels.insert(*id, next_row_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + let table_cursor_id = program.alloc_cursor_id( + Some(table_reference.table_identifier.clone()), + Some(Table::BTree(table_reference.table.clone())), + ); + + let next_row_label = program.allocate_label(); + + if !matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + m.next_row_labels.insert(*id, next_row_label); } + + let scan_loop_body_label = program.allocate_label(); + m.scan_loop_body_labels.push(scan_loop_body_label); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + + if let Search::IndexSearch { index, .. } = search { + let index_cursor_id = program + .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); } } } + +fn open_loop( + program: &mut ProgramBuilder, + source: &mut SourceOperator, + referenced_tables: &[BTreeTableReference], + m: &mut Metadata, +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + predicates, + outer, + .. + } => { + open_loop(program, left, referenced_tables, m)?; + + let mut jump_target_when_false = *m + .next_row_labels + .get(&right.id()) + .or(m.next_row_labels.get(&left.id())) + .unwrap_or(m.termination_label_stack.last().unwrap()); + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.emit_insn(Insn::Integer { + value: 0, + dest: lj_meta.match_flag_register, + }); + jump_target_when_false = lj_meta.check_match_flag_label; + } + m.next_row_labels.insert(right.id(), jump_target_when_false); + + open_loop(program, right, referenced_tables, m)?; + + if let Some(predicates) = predicates { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false, + }; + for predicate in predicates.iter() { + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + m.result_set_register_start, + )?; + } + program.resolve_label(jump_target_when_true, program.offset()); + } + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + program.defer_label_resolution( + lj_meta.set_match_flag_true_label, + program.offset() as usize, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: lj_meta.match_flag_register, + }); + } + + return Ok(()); + } + SourceOperator::Scan { + id, + table_reference, + predicates, + iter_dir, + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::LastAsync { cursor_id }); + } else { + program.emit_insn(Insn::RewindAsync { cursor_id }); + } + let scan_loop_body_label = program.allocate_label(); + let halt_label = m.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + Insn::LastAwait { + cursor_id, + pc_if_empty: *halt_label, + } + } else { + Insn::RewindAwait { + cursor_id, + pc_if_empty: *halt_label, + } + }, + *halt_label, + ); + m.scan_loop_body_labels.push(scan_loop_body_label); + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + + let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + if let Some(preds) = predicates { + for expr in preds { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + expr, + None, + condition_metadata, + m.result_set_register_start, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Search { + id, + table_reference, + search, + predicates, + .. + } => { + let table_cursor_id = + program.resolve_cursor_id(&table_reference.table_identifier, None); + + // Open the loop for the index search. + // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. + if !matches!(search, Search::PrimaryKeyEq { .. }) { + let index_cursor_id = if let Search::IndexSearch { index, .. } = search { + Some(program.resolve_cursor_id(&index.name, None)) + } else { + None + }; + let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); + let cmp_reg = program.alloc_register(); + let (cmp_expr, cmp_op) = match search { + Search::IndexSearch { + cmp_expr, cmp_op, .. + } => (cmp_expr, cmp_op), + Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + // TODO this only handles ascending indexes + match cmp_op { + ast::Operator::Equals + | ast::Operator::Greater + | ast::Operator::GreaterEquals => { + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + cmp_reg, + None, + m.result_set_register_start, + )?; + } + ast::Operator::Less | ast::Operator::LessEquals => { + program.emit_insn(Insn::Null { + dest: cmp_reg, + dest_end: None, + }); + } + _ => unreachable!(), + } + program.emit_insn_with_label_dependency( + match cmp_op { + ast::Operator::Equals | ast::Operator::GreaterEquals => Insn::SeekGE { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *m.termination_label_stack.last().unwrap(), + }, + ast::Operator::Greater + | ast::Operator::Less + | ast::Operator::LessEquals => Insn::SeekGT { + is_index: index_cursor_id.is_some(), + cursor_id: index_cursor_id.unwrap_or(table_cursor_id), + start_reg: cmp_reg, + num_regs: 1, + target_pc: *m.termination_label_stack.last().unwrap(), + }, + _ => unreachable!(), + }, + *m.termination_label_stack.last().unwrap(), + ); + if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + cmp_reg, + None, + m.result_set_register_start, + )?; + } + + program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); + // TODO: We are currently only handling ascending indexes. + // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. + // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. + // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. + // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. + // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. + // + // For primary key searches we emit RowId and then compare it to the seek value. + + let abort_jump_target = *m + .next_row_labels + .get(id) + .unwrap_or(m.termination_label_stack.last().unwrap()); + match cmp_op { + ast::Operator::Equals | ast::Operator::LessEquals => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGT { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + ast::Operator::Less => { + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn_with_label_dependency( + Insn::IdxGE { + cursor_id: index_cursor_id, + start_reg: cmp_reg, + num_regs: 1, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } else { + let rowid_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id: table_cursor_id, + dest: rowid_reg, + }); + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: rowid_reg, + rhs: cmp_reg, + target_pc: abort_jump_target, + }, + abort_jump_target, + ); + } + } + _ => {} + } + + if let Some(index_cursor_id) = index_cursor_id { + program.emit_insn(Insn::DeferredSeek { + index_cursor_id, + table_cursor_id, + }); + } + } + + let jump_label = m + .next_row_labels + .get(id) + .unwrap_or(m.termination_label_stack.last().unwrap()); + + if let Search::PrimaryKeyEq { cmp_expr } = search { + let src_reg = program.alloc_register(); + translate_expr( + program, + Some(referenced_tables), + cmp_expr, + src_reg, + None, + m.result_set_register_start, + )?; + program.emit_insn_with_label_dependency( + Insn::SeekRowid { + cursor_id: table_cursor_id, + src_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + } + if let Some(predicates) = predicates { + for predicate in predicates.iter() { + let jump_target_when_true = program.allocate_label(); + let condition_metadata = ConditionMetadata { + jump_if_condition_is_true: false, + jump_target_when_true, + jump_target_when_false: *jump_label, + }; + translate_condition_expr( + program, + referenced_tables, + predicate, + None, + condition_metadata, + m.result_set_register_start, + )?; + program.resolve_label(jump_target_when_true, program.offset()); + } + } + + return Ok(()); + } + SourceOperator::Nothing => { + return Ok(()); + } + } +} + +pub enum InnerLoopEmitTarget<'a> { + GroupBySorter { + group_by: &'a Vec, + aggregates: &'a Vec, + }, + OrderBySorter { + order_by: &'a Vec<(ast::Expr, Direction)>, + }, + ResultRow { + limit: Option, + }, + AggStep, +} + +fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metadata) -> Result<()> { + // if we have a group by, we emit a record into the group by sorter. + if let Some(group_by) = &plan.group_by { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates: &plan.aggregates.as_ref().unwrap(), + }, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have aggregates, we emit without ResultRow. + // we also do not need to sort because we are emitting a single row. + if plan.aggregates.is_some() { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::AggStep, + &plan.referenced_tables, + ); + } + // if we DONT have a group by, but we have an order by, we emit a record into the order by sorter. + if let Some(order_by) = &plan.order_by { + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::OrderBySorter { order_by }, + &plan.referenced_tables, + ); + } + // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. + return inner_loop_source_emit( + program, + &plan.source, + &plan.result_columns, + &plan.aggregates, + m, + InnerLoopEmitTarget::ResultRow { limit: plan.limit }, + &plan.referenced_tables, + ); +} + +fn inner_loop_source_emit( + program: &mut ProgramBuilder, + source: &SourceOperator, + result_columns: &Vec, + aggregates: &Option>, + m: &mut Metadata, + emit_target: InnerLoopEmitTarget, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match emit_target { + InnerLoopEmitTarget::GroupBySorter { + group_by, + aggregates, + } => { + // TODO: DOESNT WORK YET + let sort_keys_count = group_by.len(); + let column_count = sort_keys_count + aggregates.len(); + let start_reg = program.alloc_registers(column_count); + for (i, expr) in group_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + None, + m.result_set_register_start, + )?; + } + for (i, agg) in aggregates.iter().enumerate() { + // TODO it's a hack to assume aggregate functions have exactly one argument. + // Counterpoint e.g. GROUP_CONCAT(expr, separator). + // + // Here we are collecting scalars for the group by sorter, which will include + // both the group by expressions and the aggregate arguments. + // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` + // the sorter will have two scalars: u.first_name and u.age. + // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. + // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. + // + // This is why we take the first argument of each aggregate function currently. + // It's mostly an artifact of the current architecture being a bit poor; we should recognize + // which scalars are dependencies of aggregate functions and explicitly collect those. + let expr = &agg.args[0]; + let agg_reg = start_reg + sort_keys_count + i; + translate_expr( + program, + Some(referenced_tables), + expr, + agg_reg, + None, + m.result_set_register_start, + )?; + } + + let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); + + program.emit_insn(Insn::MakeRecord { + start_reg, + count: column_count, + dest_reg: group_by_metadata.sorter_key_register, + }); + + let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); + program.emit_insn(Insn::SorterInsert { + cursor_id: group_by_metadata.sort_cursor, + record_reg: group_by_metadata.sorter_key_register, + }); + + Ok(()) + } + InnerLoopEmitTarget::OrderBySorter { order_by } => { + // TODO: DOESNT WORK YET + let sort_keys_count = order_by.len(); + let source_cols_count = result_columns.len(); + let start_reg = program.alloc_registers(sort_keys_count + source_cols_count); + for (i, (expr, _)) in order_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + None, + m.result_set_register_start, + )?; + } + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + sort_keys_count + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + other => todo!("{:?}", other), + } + } + + let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: sort_keys_count + source_cols_count, + dest_reg: sort_metadata.sorter_data_register, + }); + + program.emit_insn(Insn::SorterInsert { + cursor_id: sort_metadata.sort_cursor, + record_reg: sort_metadata.sorter_data_register, + }); + + Ok(()) + } + InnerLoopEmitTarget::AggStep => { + let aggregates = aggregates.as_ref().unwrap(); + let agg_final_label = program.allocate_label(); + m.termination_label_stack.push(agg_final_label); + let num_aggs = aggregates.len(); + let start_reg = program.alloc_registers(result_columns.len()); + m.aggregation_start_registers + .insert(AGG_WITHOUT_GROUP_BY_ID, start_reg); + for (i, agg) in aggregates.iter().enumerate() { + let reg = start_reg + i; + translate_aggregation(program, referenced_tables, agg, reg, None)?; + } + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + num_aggs + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } + other => unreachable!("Unexpected non-scalar result column: {:?}", other), + } + } + Ok(()) + } + InnerLoopEmitTarget::ResultRow { limit } => { + assert!(aggregates.is_none()); + let start_reg = program.alloc_registers(result_columns.len()); + for (i, expr) in result_columns.iter().enumerate() { + match expr { + ResultSetColumn::Scalar(expr) => { + let reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + reg, + None, + m.result_set_register_start, + )?; + } + other => unreachable!("Unexpected non-scalar result column: {:?}", other), + } + } + program.emit_insn(Insn::ResultRow { + start_reg, + count: result_columns.len(), + }); + if let Some(limit) = limit { + let jump_label = m.termination_label_stack.last().unwrap(); + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label, + }, + *jump_label, + ); + } + + Ok(()) + } + } +} + +fn close_loop( + program: &mut ProgramBuilder, + source: &SourceOperator, + m: &mut Metadata, + referenced_tables: &[BTreeTableReference], +) -> Result<()> { + match source { + SourceOperator::Join { + id, + left, + right, + outer, + .. + } => { + close_loop(program, right, m, referenced_tables)?; + + if *outer { + let lj_meta = m.left_joins.get(id).unwrap(); + // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) + program.resolve_label(lj_meta.check_match_flag_label, program.offset()); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: lj_meta.match_flag_register, + target_pc: lj_meta.on_match_jump_to_label, + decrement_by: 0, + }, + lj_meta.on_match_jump_to_label, + ); + // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL + let right_cursor_id = match right.as_ref() { + SourceOperator::Scan { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier, None), + SourceOperator::Search { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier, None), + _ => unreachable!(), + }; + program.emit_insn(Insn::NullRow { + cursor_id: right_cursor_id, + }); + // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: lj_meta.set_match_flag_true_label, + }, + lj_meta.set_match_flag_true_label, + ); + } + let next_row_label = if *outer { + m.left_joins.get(id).unwrap().on_match_jump_to_label + } else { + *m.next_row_labels.get(&right.id()).unwrap() + }; + // This points to the NextAsync instruction of the left table + program.resolve_label(next_row_label, program.offset()); + close_loop(program, left, m, referenced_tables)?; + + Ok(()) + } + SourceOperator::Scan { + id, + table_reference, + iter_dir, + .. + } => { + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn(Insn::PrevAsync { cursor_id }); + } else { + program.emit_insn(Insn::NextAsync { cursor_id }); + } + let jump_label = m.scan_loop_body_labels.pop().unwrap(); + + if iter_dir + .as_ref() + .is_some_and(|dir| *dir == IterationDirection::Backwards) + { + program.emit_insn_with_label_dependency( + Insn::PrevAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } else { + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + } + Ok(()) + } + SourceOperator::Search { + id, + table_reference, + search, + .. + } => { + if matches!(search, Search::PrimaryKeyEq { .. }) { + // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. + return Ok(()); + } + let cursor_id = match search { + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name, None), + Search::PrimaryKeySearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier, None) + } + Search::PrimaryKeyEq { .. } => unreachable!(), + }; + program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.emit_insn(Insn::NextAsync { cursor_id }); + let jump_label = m.scan_loop_body_labels.pop().unwrap(); + program.emit_insn_with_label_dependency( + Insn::NextAwait { + cursor_id, + pc_if_next: jump_label, + }, + jump_label, + ); + + Ok(()) + } + SourceOperator::Nothing => { + unreachable!() + } + } +} + +fn sort_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + m: &mut Metadata, +) -> Result<()> { + todo!() +} + +fn finalize_group_by( + program: &mut ProgramBuilder, + group_by: &Vec, + m: &mut Metadata, +) -> Result<()> { + todo!() +} + +enum FinalizeGroupByEmitTarget { + OrderBySorter(usize), + ResultRow, +} + +fn finalize_agg_without_group_by( + program: &mut ProgramBuilder, + aggregates: &Vec, + m: &mut Metadata, +) -> Result<()> { + let agg_start_reg = m + .aggregation_start_registers + .get(&AGG_WITHOUT_GROUP_BY_ID) + .unwrap(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = *agg_start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + let output_reg = program.alloc_registers(aggregates.len()); + program.emit_insn(Insn::Copy { + src_reg: *agg_start_reg, + dst_reg: output_reg, + amount: aggregates.len() - 1, + }); + // This always emits a ResultRow because currently it can only be used for a single row result + program.emit_insn(Insn::ResultRow { + start_reg: output_reg, + count: aggregates.len(), + }); + + Ok(()) +} + +fn sort_order_by( + program: &mut ProgramBuilder, + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, + limit: Option, + m: &mut Metadata, +) -> Result<()> { + // TODO: DOESNT WORK YET + program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); + let mut pseudo_columns = vec![]; + for (i, _) in order_by.iter().enumerate() { + pseudo_columns.push(Column { + name: format!("sort_key_{}", i), + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + for expr in result_columns.iter() { + pseudo_columns.push(Column { + name: match expr { + ResultSetColumn::Scalar(expr) => expr.to_string(), + ResultSetColumn::Agg(agg) => agg.to_string(), + _ => unreachable!(), + }, + primary_key: false, + ty: crate::schema::Type::Null, + }); + } + + let num_fields = pseudo_columns.len(); + + let pseudo_cursor = program.alloc_cursor_id( + None, + Some(Table::Pseudo(Rc::new(PseudoTable { + columns: pseudo_columns, + }))), + ); + let sort_metadata = m.sorts.get(&ORDER_BY_ID).unwrap(); + + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: sort_metadata.sorter_data_register, + num_fields, + }); + + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: sort_metadata.sort_cursor, + pc_if_empty: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + + program.defer_label_resolution(sort_metadata.sorter_data_label, program.offset() as usize); + program.emit_insn(Insn::SorterData { + cursor_id: sort_metadata.sort_cursor, + dest_reg: sort_metadata.sorter_data_register, + pseudo_cursor, + }); + + let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + + sort_metadata.pseudo_table_cursor = pseudo_cursor; + + // EMIT COLUMNS FROM SORTER AND EMIT ROW + let cursor_id = pseudo_cursor; + let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); + let start_column_offset = order_by.len(); + let column_count = pseudo_table.columns().len() - start_column_offset; + let start_reg = program.alloc_registers(column_count); + for i in 0..column_count { + let reg = start_reg + i; + program.emit_insn(Insn::Column { + cursor_id, + column: start_column_offset + i, + dest: reg, + }); + } + program.emit_insn(Insn::ResultRow { + start_reg, + count: column_count, + }); + + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: sort_metadata.done_label, + }, + sort_metadata.done_label, + ); + } + + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: sort_metadata.sort_cursor, + pc_if_next: sort_metadata.sorter_data_label, + }, + sort_metadata.sorter_data_label, + ); + + program.resolve_label(sort_metadata.done_label, program.offset()); + + Ok(()) +} diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 6fa78c6f6..01dcae16c 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1,6 +1,5 @@ use sqlite3_parser::ast::{self, UnaryOperator}; -use super::optimizer::CachedResult; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; @@ -24,6 +23,7 @@ pub fn translate_condition_expr( expr: &ast::Expr, cursor_hint: Option, condition_metadata: ConditionMetadata, + result_set_register_start: usize, ) -> Result<()> { match expr { ast::Expr::Between { .. } => todo!(), @@ -39,6 +39,7 @@ pub fn translate_condition_expr( jump_if_condition_is_true: false, ..condition_metadata }, + result_set_register_start, ); let _ = translate_condition_expr( program, @@ -46,6 +47,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, + result_set_register_start, ); } ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { @@ -61,6 +63,7 @@ pub fn translate_condition_expr( jump_target_when_false, ..condition_metadata }, + result_set_register_start, ); program.resolve_label(jump_target_when_false, program.offset()); let _ = translate_condition_expr( @@ -69,6 +72,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, + result_set_register_start, ); } ast::Expr::Binary(lhs, op, rhs) => { @@ -79,7 +83,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - None, + result_set_register_start, ); if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant() @@ -91,7 +95,7 @@ pub fn translate_condition_expr( rhs, rhs_reg, cursor_hint, - None, + result_set_register_start, ); if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant() @@ -340,7 +344,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - None, + result_set_register_start, )?; let rhs = rhs.as_ref().unwrap(); @@ -370,7 +374,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - None, + result_set_register_start, )?; // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. if !last_condition { @@ -414,7 +418,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - None, + result_set_register_start, )?; program.emit_insn_with_label_dependency( Insn::Eq { @@ -460,7 +464,7 @@ pub fn translate_condition_expr( lhs, column_reg, cursor_hint, - None, + result_set_register_start, )?; if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant(); @@ -471,7 +475,7 @@ pub fn translate_condition_expr( rhs, pattern_reg, cursor_hint, - None, + result_set_register_start, )?; if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant(); @@ -545,6 +549,7 @@ pub fn translate_condition_expr( expr, cursor_hint, condition_metadata, + result_set_register_start, ); } } @@ -553,71 +558,33 @@ pub fn translate_condition_expr( Ok(()) } -pub fn get_cached_or_translate( - program: &mut ProgramBuilder, - referenced_tables: Option<&[BTreeTableReference]>, - expr: &ast::Expr, - cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, -) -> Result { - if let Some(cached_results) = cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - return Ok(cached_result.register_idx); - } - } - let reg = program.alloc_register(); - translate_expr( - program, - referenced_tables, - expr, - reg, - cursor_hint, - cached_results, - )?; - Ok(reg) -} - pub fn translate_expr( program: &mut ProgramBuilder, referenced_tables: Option<&[BTreeTableReference]>, expr: &ast::Expr, target_register: usize, cursor_hint: Option, - cached_results: Option<&Vec<&CachedResult>>, + result_set_register_start: usize, ) -> Result { - if let Some(cached_results) = &cached_results { - if let Some(cached_result) = cached_results - .iter() - .find(|cached_result| cached_result.source_expr == *expr) - { - program.emit_insn(Insn::Copy { - src_reg: cached_result.register_idx, - dst_reg: target_register, - amount: 0, - }); - return Ok(target_register); - } - } - match expr { + ast::Expr::AggRef { index } => todo!(), ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(e1, op, e2) => { - let e1_reg = get_cached_or_translate( + let e1_reg = translate_expr( program, referenced_tables, e1, + target_register, cursor_hint, - cached_results, + result_set_register_start, )?; - let e2_reg = get_cached_or_translate( + let e2_reg = translate_expr( program, referenced_tables, e2, + target_register, cursor_hint, - cached_results, + result_set_register_start, )?; match op { @@ -741,7 +708,7 @@ pub fn translate_expr( expr, reg_expr, cursor_hint, - cached_results, + result_set_register_start, )?; let reg_type = program.alloc_register(); program.emit_insn(Insn::String8 { @@ -814,7 +781,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -841,7 +808,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } @@ -879,7 +846,7 @@ pub fn translate_expr( arg, target_register, cursor_hint, - cached_results, + result_set_register_start, )?; if index < args.len() - 1 { program.emit_insn_with_label_dependency( @@ -915,7 +882,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } program.emit_insn(Insn::Function { @@ -948,7 +915,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; } program.emit_insn(Insn::Function { @@ -985,7 +952,7 @@ pub fn translate_expr( &args[0], temp_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::NotNull { reg: temp_reg, @@ -998,7 +965,7 @@ pub fn translate_expr( &args[1], temp_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, @@ -1031,7 +998,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1079,7 +1046,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1116,7 +1083,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } } @@ -1154,7 +1121,7 @@ pub fn translate_expr( &args[0], str_reg, cursor_hint, - cached_results, + result_set_register_start, )?; translate_expr( program, @@ -1162,7 +1129,7 @@ pub fn translate_expr( &args[1], start_reg, cursor_hint, - cached_results, + result_set_register_start, )?; if args.len() == 3 { translate_expr( @@ -1171,7 +1138,7 @@ pub fn translate_expr( &args[2], length_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } @@ -1201,7 +1168,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1225,7 +1192,7 @@ pub fn translate_expr( &args[0], arg_reg, cursor_hint, - cached_results, + result_set_register_start, )?; start_reg = arg_reg; } @@ -1250,7 +1217,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - cached_results, + result_set_register_start, )?; } } @@ -1290,7 +1257,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); @@ -1323,7 +1290,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1357,7 +1324,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - cached_results, + result_set_register_start, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1395,7 +1362,7 @@ pub fn translate_expr( &args[0], first_reg, cursor_hint, - cached_results, + result_set_register_start, )?; let second_reg = program.alloc_register(); translate_expr( @@ -1404,7 +1371,7 @@ pub fn translate_expr( &args[1], second_reg, cursor_hint, - cached_results, + result_set_register_start, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1536,7 +1503,7 @@ pub fn translate_expr( &exprs[0], target_register, cursor_hint, - cached_results, + result_set_register_start, )?; } else { // Parenthesized expressions with multiple arguments are reserved for special cases @@ -1660,7 +1627,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1682,7 +1649,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, ); expr_reg }; @@ -1725,7 +1692,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; translate_expr( program, @@ -1733,7 +1700,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { @@ -1757,7 +1724,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1779,7 +1746,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1816,7 +1783,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; translate_expr( program, @@ -1824,7 +1791,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { @@ -1848,7 +1815,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1870,7 +1837,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - None, + 0, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ea890e994..8a5760516 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -98,7 +98,7 @@ pub fn translate_insert( expr, column_registers_start + col, None, - None, + 0, )?; } program.emit_insn(Insn::Yield { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 682ed6c4b..cd08f619f 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, rc::Rc}; +use std::rc::Rc; use sqlite3_parser::ast; @@ -6,7 +6,7 @@ use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Operator, Plan, ProjectionColumn, Search, + Direction, IterationDirection, Plan, Search, SourceOperator, }; /** @@ -14,49 +14,45 @@ use super::plan::{ * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCache)> { - let mut expr_result_cache = ExpressionResultCache::new(); +pub fn optimize_plan(mut select_plan: Plan) -> Result { push_predicates( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.where_clause, &select_plan.referenced_tables, )?; - if eliminate_constants(&mut select_plan.root_operator)? + if eliminate_constants(&mut select_plan.source)? == ConstantConditionEliminationResult::ImpossibleCondition { - return Ok(( - Plan { - root_operator: Operator::Nothing, - referenced_tables: vec![], - available_indexes: vec![], - }, - expr_result_cache, - )); + return Ok(Plan { + source: SourceOperator::Nothing, + ..select_plan + }); } use_indexes( - &mut select_plan.root_operator, + &mut select_plan.source, &select_plan.referenced_tables, &select_plan.available_indexes, )?; eliminate_unnecessary_orderby( - &mut select_plan.root_operator, + &mut select_plan.source, + &mut select_plan.order_by, &select_plan.referenced_tables, &select_plan.available_indexes, )?; - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache); - Ok((select_plan, expr_result_cache)) + Ok(select_plan) } fn _operator_is_already_ordered_by( - operator: &mut Operator, + operator: &mut SourceOperator, key: &mut ast::Expr, referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result { match operator { - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => Ok(key.is_primary_key_of(table_reference.table_index)), - Operator::Search { + SourceOperator::Search { table_reference, search, .. @@ -77,61 +73,53 @@ fn _operator_is_already_ordered_by( Ok(index_is_the_same) } }, - Operator::Join { left, .. } => { + SourceOperator::Join { left, .. } => { _operator_is_already_ordered_by(left, key, referenced_tables, available_indexes) } - Operator::Aggregate { source, .. } => { - _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) - } - Operator::Projection { source, .. } => { - _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes) - } _ => Ok(false), } } fn eliminate_unnecessary_orderby( - operator: &mut Operator, + operator: &mut SourceOperator, + order_by: &mut Option>, referenced_tables: &[BTreeTableReference], available_indexes: &Vec>, ) -> Result<()> { - match operator { - Operator::Order { source, key, .. } => { - if key.len() != 1 { - // TODO: handle multiple order by keys - return Ok(()); - } - - let (key, direction) = key.first_mut().unwrap(); - - let already_ordered = _operator_is_already_ordered_by(source, key, referenced_tables, available_indexes)?; - - if already_ordered { - push_scan_direction(source, direction); - - *operator = source.take_ownership(); - } - Ok(()) - } - Operator::Limit { source, .. } => { - eliminate_unnecessary_orderby(source, referenced_tables, available_indexes)?; - Ok(()) - } - _ => Ok(()), + if order_by.is_none() { + return Ok(()); } + + let o = order_by.as_mut().unwrap(); + + if o.len() != 1 { + // TODO: handle multiple order by keys + return Ok(()); + } + + let (key, _) = o.first_mut().unwrap(); + + let already_ordered = + _operator_is_already_ordered_by(operator, key, referenced_tables, available_indexes)?; + + if already_ordered { + *order_by = None; + } + + Ok(()) } /** * Use indexes where possible */ fn use_indexes( - operator: &mut Operator, + operator: &mut SourceOperator, referenced_tables: &[BTreeTableReference], available_indexes: &[Rc], ) -> Result<()> { match operator { - Operator::Search { .. } => Ok(()), - Operator::Scan { + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Scan { table_reference, predicates: filter, id, @@ -162,12 +150,11 @@ fn use_indexes( } Either::Right(index_search) => { fs.remove(i); - *operator = Operator::Search { + *operator = SourceOperator::Search { id: *id, table_reference: table_reference.clone(), predicates: Some(fs.clone()), search: index_search, - step: 0, }; return Ok(()); @@ -177,32 +164,12 @@ fn use_indexes( Ok(()) } - Operator::Aggregate { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Filter { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Limit { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { use_indexes(left, referenced_tables, available_indexes)?; use_indexes(right, referenced_tables, available_indexes)?; Ok(()) } - Operator::Order { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Projection { source, .. } => { - use_indexes(source, referenced_tables, available_indexes)?; - Ok(()) - } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -214,33 +181,11 @@ enum ConstantConditionEliminationResult { // removes predicates that are always true // returns a ConstantEliminationResult indicating whether any predicates are always false -fn eliminate_constants(operator: &mut Operator) -> Result { +fn eliminate_constants( + operator: &mut SourceOperator, +) -> Result { match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - let predicate = &predicates[i]; - if predicate.is_always_true()? { - predicates.remove(i); - } else if predicate.is_always_false()? { - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } else { - i += 1; - } - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - eliminate_constants(operator)?; - } else { - eliminate_constants(source)?; - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -278,44 +223,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *source = Box::new(Operator::Nothing); - } - // Aggregation operator can return a row even if the source is empty e.g. count(1) from users where 0 - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Limit { source, .. } => { - let constant_elimination_result = eliminate_constants(source)?; - if constant_elimination_result - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - } - Ok(constant_elimination_result) - } - Operator::Order { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Projection { source, .. } => { - if eliminate_constants(source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - *operator = Operator::Nothing; - return Ok(ConstantConditionEliminationResult::ImpossibleCondition); - } - - Ok(ConstantConditionEliminationResult::Continue) - } - Operator::Scan { predicates, .. } => { + SourceOperator::Scan { predicates, .. } => { if let Some(ps) = predicates { let mut i = 0; while i < ps.len() { @@ -335,7 +243,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result { + SourceOperator::Search { predicates, .. } => { if let Some(predicates) = predicates { let mut i = 0; while i < predicates.len() { @@ -352,7 +260,7 @@ fn eliminate_constants(operator: &mut Operator) -> Result Ok(ConstantConditionEliminationResult::Continue), + SourceOperator::Nothing => Ok(ConstantConditionEliminationResult::Continue), } } @@ -360,42 +268,35 @@ fn eliminate_constants(operator: &mut Operator) -> Result>, referenced_tables: &Vec, ) -> Result<()> { - match operator { - Operator::Filter { - source, predicates, .. - } => { - let mut i = 0; - while i < predicates.len() { - // try to push the predicate to the source - // if it succeeds, remove the predicate from the filter - let predicate_owned = predicates[i].take_ownership(); - let Some(predicate) = push_predicate(source, predicate_owned, referenced_tables)? - else { - predicates.remove(i); - continue; - }; - predicates[i] = predicate; - i += 1; - } - - if predicates.is_empty() { - *operator = source.take_ownership(); - } - - Ok(()) + if let Some(predicates) = where_clause { + let mut i = 0; + while i < predicates.len() { + let predicate = predicates[i].take_ownership(); + let Some(predicate) = push_predicate(operator, predicate, referenced_tables)? else { + predicates.remove(i); + continue; + }; + predicates[i] = predicate; + i += 1; } - Operator::Join { + if predicates.is_empty() { + *where_clause = None; + } + } + match operator { + SourceOperator::Join { left, right, predicates, outer, .. } => { - push_predicates(left, referenced_tables)?; - push_predicates(right, referenced_tables)?; + push_predicates(left, where_clause, referenced_tables)?; + push_predicates(right, where_clause, referenced_tables)?; if predicates.is_none() { return Ok(()); @@ -433,26 +334,9 @@ fn push_predicates( Ok(()) } - Operator::Aggregate { source, .. } => { - push_predicates(source, referenced_tables)?; - - Ok(()) - } - Operator::Limit { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Order { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Projection { source, .. } => { - push_predicates(source, referenced_tables)?; - Ok(()) - } - Operator::Scan { .. } => Ok(()), - Operator::Search { .. } => Ok(()), - Operator::Nothing => Ok(()), + SourceOperator::Scan { .. } => Ok(()), + SourceOperator::Search { .. } => Ok(()), + SourceOperator::Nothing => Ok(()), } } @@ -461,12 +345,12 @@ fn push_predicates( Returns Ok(None) if the predicate was pushed, otherwise returns itself as Ok(Some(predicate)) */ fn push_predicate( - operator: &mut Operator, + operator: &mut SourceOperator, predicate: ast::Expr, referenced_tables: &Vec, ) -> Result> { match operator { - Operator::Scan { + SourceOperator::Scan { predicates, table_reference, .. @@ -497,22 +381,8 @@ fn push_predicate( Ok(None) } - Operator::Search { .. } => Ok(Some(predicate)), - Operator::Filter { - source, - predicates: ps, - .. - } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - ps.push(push_result.unwrap()); - - Ok(None) - } - Operator::Join { + SourceOperator::Search { .. } => Ok(Some(predicate)), + SourceOperator::Join { left, right, predicates: join_on_preds, @@ -552,46 +422,13 @@ fn push_predicate( Ok(None) } - Operator::Aggregate { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Limit { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Order { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Projection { source, .. } => { - let push_result = push_predicate(source, predicate, referenced_tables)?; - if push_result.is_none() { - return Ok(None); - } - - Ok(Some(push_result.unwrap())) - } - Operator::Nothing => Ok(Some(predicate)), + SourceOperator::Nothing => Ok(Some(predicate)), } } -fn push_scan_direction(operator: &mut Operator, direction: &Direction) { +fn push_scan_direction(operator: &mut SourceOperator, direction: &Direction) { match operator { - Operator::Projection { source, .. } => push_scan_direction(source, direction), - Operator::Scan { iter_dir, .. } => { + SourceOperator::Scan { iter_dir, .. } => { if iter_dir.is_none() { match direction { Direction::Ascending => *iter_dir = Some(IterationDirection::Forwards), @@ -603,381 +440,6 @@ fn push_scan_direction(operator: &mut Operator, direction: &Direction) { } } -#[derive(Debug)] -pub struct ExpressionResultCache { - resultmap: HashMap, - keymap: HashMap>, -} - -#[derive(Debug)] -pub struct CachedResult { - pub register_idx: usize, - pub source_expr: ast::Expr, -} - -const OPERATOR_ID_MULTIPLIER: usize = 10000; - -/** - ExpressionResultCache is a cache for the results of expressions that are computed in the query plan, - or more precisely, the VM registers that hold the results of these expressions. - - Right now the cache is mainly used to avoid recomputing e.g. the result of an aggregation expression - e.g. SELECT t.a, SUM(t.b) FROM t GROUP BY t.a ORDER BY SUM(t.b) -*/ -impl ExpressionResultCache { - pub fn new() -> Self { - ExpressionResultCache { - resultmap: HashMap::new(), - keymap: HashMap::new(), - } - } - - /** - Store the result of an expression that is computed in the query plan. - The result is stored in a VM register. A copy of the expression AST node is - stored as well, so that parent operators can use it to compare their own expressions - with the one that was computed in a child operator. - - This is a weakness of our current reliance on a 3rd party AST library, as we can't - e.g. modify the AST to add identifiers to nodes or replace nodes with some kind of - reference to a register, etc. - */ - pub fn cache_result_register( - &mut self, - operator_id: usize, - result_column_idx: usize, - register_idx: usize, - expr: ast::Expr, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.resultmap.insert( - key, - CachedResult { - register_idx, - source_expr: expr, - }, - ); - } - - /** - Set a mapping from a parent operator to a child operator, so that the parent operator - can look up the register of a result that was computed in the child operator. - E.g. "Parent operator's result column 3 is computed in child operator 5, result column 2" - */ - pub fn set_precomputation_key( - &mut self, - operator_id: usize, - result_column_idx: usize, - child_operator_id: usize, - child_operator_result_column_idx_mask: usize, - ) { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - - let mut values = Vec::new(); - for i in 0..64 { - if (child_operator_result_column_idx_mask >> i) & 1 == 1 { - values.push(child_operator_id * OPERATOR_ID_MULTIPLIER + i); - } - } - self.keymap.insert(key, values); - } - - /** - Get the cache entries for a given operator and result column index. - There may be multiple cached entries, e.g. a binary operator's both - arms may have been cached. - */ - pub fn get_cached_result_registers( - &self, - operator_id: usize, - result_column_idx: usize, - ) -> Option> { - let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx; - self.keymap.get(&key).and_then(|keys| { - let mut results = Vec::new(); - for key in keys { - if let Some(result) = self.resultmap.get(key) { - results.push(result); - } - } - if results.is_empty() { - None - } else { - Some(results) - } - }) - } -} - -type ResultColumnIndexBitmask = usize; - -/** - Find all result columns in an operator that match an expression, either fully or partially. - This is used to find the result columns that are computed in an operator and that are used - in a parent operator, so that the parent operator can look up the register that holds the result - of the child operator's expression. - - The result is returned as a bitmask due to performance neuroticism. A limitation of this is that - we can only handle 64 result columns per operator. -*/ -fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr: &ast::Expr, - operator: &Operator, -) -> ResultColumnIndexBitmask { - let exact_match = match operator { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut idx = 0; - let mut mask = 0; - for agg in aggregates.iter() { - if agg.original_expr == *expr { - mask |= 1 << idx; - } - idx += 1; - } - - if let Some(group_by) = group_by { - for g in group_by.iter() { - if g == expr { - mask |= 1 << idx; - } - idx += 1 - } - } - - mask - } - Operator::Filter { .. } => 0, - Operator::Limit { .. } => 0, - Operator::Join { .. } => 0, - Operator::Order { .. } => 0, - Operator::Projection { expressions, .. } => { - let mut mask = 0; - for (idx, e) in expressions.iter().enumerate() { - match e { - ProjectionColumn::Column(c) => { - if c == expr { - mask |= 1 << idx; - } - } - ProjectionColumn::Star => {} - ProjectionColumn::TableStar(_) => {} - } - } - - mask - } - Operator::Scan { .. } => 0, - Operator::Search { .. } => 0, - Operator::Nothing => 0, - }; - - if exact_match != 0 { - return exact_match; - } - - match expr { - ast::Expr::Between { - lhs, - not: _, - start, - end, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(start, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(end, operator); - mask - } - ast::Expr::Binary(lhs, _op, rhs) => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Case { - base, - when_then_pairs, - else_expr, - } => { - let mut mask = 0; - if let Some(base) = base { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(base, operator); - } - for (w, t) in when_then_pairs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(w, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(t, operator); - } - if let Some(e) = else_expr { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Cast { expr, type_name: _ } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Collate(expr, _collation) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::DoublyQualified(_schema, _tbl, _ident) => 0, - ast::Expr::Exists(_) => 0, - ast::Expr::FunctionCall { - name: _, - distinctness: _, - args, - order_by: _, - filter_over: _, - } => { - let mut mask = 0; - if let Some(args) = args { - for a in args.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(a, operator); - } - } - mask - } - ast::Expr::FunctionCallStar { - name: _, - filter_over: _, - } => 0, - ast::Expr::Id(_) => unreachable!("Ids have been bound to Column references"), - ast::Expr::Column { .. } => 0, - ast::Expr::InList { lhs, not: _, rhs } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - if let Some(rhs) = rhs { - for r in rhs.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(r, operator); - } - } - mask - } - ast::Expr::InSelect { - lhs, - not: _, - rhs: _, - } => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - lhs, operator, - ) - } - ast::Expr::InTable { - lhs: _, - not: _, - rhs: _, - args: _, - } => 0, - ast::Expr::IsNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Like { - lhs, - not: _, - op: _, - rhs, - escape: _, - } => { - let mut mask = 0; - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator); - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator); - mask - } - ast::Expr::Literal(_) => 0, - ast::Expr::Name(_) => 0, - ast::Expr::NotNull(expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Parenthesized(expr) => { - let mut mask = 0; - for e in expr.iter() { - mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator); - } - mask - } - ast::Expr::Qualified(_, _) => { - unreachable!("Qualified expressions have been bound to Column references") - } - ast::Expr::Raise(_, _) => 0, - ast::Expr::Subquery(_) => 0, - ast::Expr::Unary(_op, expr) => { - find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially( - expr, operator, - ) - } - ast::Expr::Variable(_) => 0, - } -} - -/** - * This function is used to find all the expressions that are shared between the parent operator and the child operators. - * If an expression is shared between the parent and child operators, then the parent operator should not recompute the expression. - * Instead, it should use the result of the expression that was computed by the child operator. -*/ -fn find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - operator: &Operator, - expr_result_cache: &mut ExpressionResultCache, -) { - match operator { - Operator::Aggregate { - source, - .. - } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them( - source, expr_result_cache, - ) - } - Operator::Filter { .. } => unreachable!(), - Operator::Limit { source, .. } => { - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Join { .. } => {} - Operator::Order { source, key, .. } => { - for (idx, (expr, _)) in key.iter().enumerate() { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Projection { source, expressions, .. } => { - for (idx, expr) in expressions.iter().enumerate() { - if let ProjectionColumn::Column(expr) = expr { - let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(expr, source); - if result != 0 { - expr_result_cache.set_precomputation_key( - operator.id(), - idx, - source.id(), - result, - ); - } - } - } - find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache) - } - Operator::Scan { .. } => {} - Operator::Search { .. } => {} - Operator::Nothing => {} - } -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ConstantPredicate { AlwaysTrue, @@ -1286,8 +748,8 @@ impl TakeOwnership for ast::Expr { } } -impl TakeOwnership for Operator { +impl TakeOwnership for SourceOperator { fn take_ownership(&mut self) -> Self { - std::mem::replace(self, Operator::Nothing) + std::mem::replace(self, SourceOperator::Nothing) } } diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8c1eff7be..43fcb4e96 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -12,16 +12,29 @@ use crate::{ Result, }; +#[derive(Debug)] +pub enum ResultSetColumn { + Scalar(ast::Expr), + Agg(Aggregate), + ComputedAgg(ast::Expr), +} + #[derive(Debug)] pub struct Plan { - pub root_operator: Operator, + pub source: SourceOperator, + pub result_columns: Vec, + pub where_clause: Option>, + pub group_by: Option>, + pub order_by: Option>, + pub aggregates: Option>, + pub limit: Option, pub referenced_tables: Vec, pub available_indexes: Vec>, } impl Display for Plan { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.root_operator) + write!(f, "{}", self.source) } } @@ -45,69 +58,17 @@ pub enum IterationDirection { TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. */ #[derive(Clone, Debug)] -pub enum Operator { - // Aggregate operator - // This operator is used to compute aggregate functions like SUM, AVG, COUNT, etc. - // It takes a source operator and a list of aggregate functions to compute. - // GROUP BY is not supported yet. - Aggregate { - id: usize, - source: Box, - aggregates: Vec, - group_by: Option>, - step: usize, - }, - // Filter operator - // This operator is used to filter rows from the source operator. - // It takes a source operator and a list of predicates to evaluate. - // Only rows for which all predicates evaluate to true are passed to the next operator. - // Generally filter operators will only exist in unoptimized plans, - // as the optimizer will try to push filters down to the lowest possible level, - // e.g. a table scan. - Filter { - id: usize, - source: Box, - predicates: Vec, - }, - // Limit operator - // This operator is used to limit the number of rows returned by the source operator. - Limit { - id: usize, - source: Box, - limit: usize, - step: usize, - }, +pub enum SourceOperator { // Join operator // This operator is used to join two source operators. // It takes a left and right source operator, a list of predicates to evaluate, // and a boolean indicating whether it is an outer join. Join { id: usize, - left: Box, - right: Box, + left: Box, + right: Box, predicates: Option>, outer: bool, - step: usize, - }, - // Order operator - // This operator is used to sort the rows returned by the source operator. - Order { - id: usize, - source: Box, - key: Vec<(ast::Expr, Direction)>, - step: usize, - }, - // Projection operator - // This operator is used to project columns from the source operator. - // It takes a source operator and a list of expressions to evaluate. - // e.g. SELECT foo, bar FROM t1 - // In this example, the expressions would be [foo, bar] - // and the source operator would be a Scan operator for table t1. - Projection { - id: usize, - source: Box, - expressions: Vec, - step: usize, }, // Scan operator // This operator is used to scan a table. @@ -122,7 +83,6 @@ pub enum Operator { id: usize, table_reference: BTreeTableReference, predicates: Option>, - step: usize, iter_dir: Option, }, // Search operator @@ -133,7 +93,6 @@ pub enum Operator { table_reference: BTreeTableReference, search: Search, predicates: Option>, - step: usize, }, // Nothing operator // This operator is used to represent an empty query. @@ -168,106 +127,30 @@ pub enum Search { }, } -#[derive(Clone, Debug)] -pub enum ProjectionColumn { - Column(ast::Expr), - Star, - TableStar(BTreeTableReference), -} - -impl ProjectionColumn { +impl SourceOperator { pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { match self { - ProjectionColumn::Column(_) => 1, - ProjectionColumn::Star => { - let mut count = 0; - for table_reference in referenced_tables { - count += table_reference.table.columns.len(); - } - count - } - ProjectionColumn::TableStar(table_reference) => table_reference.table.columns.len(), - } - } -} - -impl Operator { - pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { - match self { - Operator::Aggregate { - group_by, - aggregates, - .. - } => aggregates.len() + group_by.as_ref().map_or(0, |g| g.len()), - Operator::Filter { source, .. } => source.column_count(referenced_tables), - Operator::Limit { source, .. } => source.column_count(referenced_tables), - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { left.column_count(referenced_tables) + right.column_count(referenced_tables) } - Operator::Order { source, .. } => source.column_count(referenced_tables), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| e.column_count(referenced_tables)) - .sum(), - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => table_reference.table.columns.len(), - Operator::Search { + SourceOperator::Search { table_reference, .. } => table_reference.table.columns.len(), - Operator::Nothing => 0, + SourceOperator::Nothing => 0, } } pub fn column_names(&self) -> Vec { match self { - Operator::Aggregate { - aggregates, - group_by, - .. - } => { - let mut names = vec![]; - for agg in aggregates.iter() { - names.push(agg.func.to_string().to_string()); - } - - if let Some(group_by) = group_by { - for expr in group_by.iter() { - match expr { - ast::Expr::Id(ident) => names.push(ident.0.clone()), - ast::Expr::Qualified(tbl, ident) => { - names.push(format!("{}.{}", tbl.0, ident.0)) - } - e => names.push(e.to_string()), - } - } - } - - names - } - Operator::Filter { source, .. } => source.column_names(), - Operator::Limit { source, .. } => source.column_names(), - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { let mut names = left.column_names(); names.extend(right.column_names()); names } - Operator::Order { source, .. } => source.column_names(), - Operator::Projection { expressions, .. } => expressions - .iter() - .map(|e| match e { - ProjectionColumn::Column(expr) => match expr { - ast::Expr::Id(ident) => ident.0.clone(), - ast::Expr::Qualified(tbl, ident) => format!("{}.{}", tbl.0, ident.0), - _ => "expr".to_string(), - }, - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect(), - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => table_reference .table @@ -275,7 +158,7 @@ impl Operator { .iter() .map(|c| c.name.clone()) .collect(), - Operator::Search { + SourceOperator::Search { table_reference, .. } => table_reference .table @@ -283,21 +166,16 @@ impl Operator { .iter() .map(|c| c.name.clone()) .collect(), - Operator::Nothing => vec![], + SourceOperator::Nothing => vec![], } } pub fn id(&self) -> usize { match self { - Operator::Aggregate { id, .. } => *id, - Operator::Filter { id, .. } => *id, - Operator::Limit { id, .. } => *id, - Operator::Join { id, .. } => *id, - Operator::Order { id, .. } => *id, - Operator::Projection { id, .. } => *id, - Operator::Scan { id, .. } => *id, - Operator::Search { id, .. } => *id, - Operator::Nothing => unreachable!(), + SourceOperator::Join { id, .. } => *id, + SourceOperator::Scan { id, .. } => *id, + SourceOperator::Search { id, .. } => *id, + SourceOperator::Nothing => unreachable!(), } } } @@ -337,10 +215,10 @@ impl Display for Aggregate { } // For EXPLAIN QUERY PLAN -impl Display for Operator { +impl Display for SourceOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt_operator( - operator: &Operator, + operator: &SourceOperator, f: &mut Formatter, level: usize, last: bool, @@ -356,34 +234,7 @@ impl Display for Operator { }; match operator { - Operator::Aggregate { - source, aggregates, .. - } => { - // e.g. Aggregate count(*), sum(x) - let aggregates_display_string = aggregates - .iter() - .map(|agg| agg.to_string()) - .collect::>() - .join(", "); - writeln!(f, "{}AGGREGATE {}", indent, aggregates_display_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Filter { - source, predicates, .. - } => { - let predicates_string = predicates - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(" AND "); - writeln!(f, "{}FILTER {}", indent, predicates_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Limit { source, limit, .. } => { - writeln!(f, "{}TAKE {}", indent, limit)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Join { + SourceOperator::Join { left, right, predicates, @@ -408,35 +259,7 @@ impl Display for Operator { fmt_operator(left, f, level + 1, false)?; fmt_operator(right, f, level + 1, true) } - Operator::Order { source, key, .. } => { - let sort_keys_string = key - .iter() - .map(|(expr, dir)| format!("{} {}", expr, dir)) - .collect::>() - .join(", "); - writeln!(f, "{}SORT {}", indent, sort_keys_string)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Projection { - source, - expressions, - .. - } => { - let expressions = expressions - .iter() - .map(|expr| match expr { - ProjectionColumn::Column(c) => c.to_string(), - ProjectionColumn::Star => "*".to_string(), - ProjectionColumn::TableStar(table_reference) => { - format!("{}.{}", table_reference.table_identifier, "*") - } - }) - .collect::>() - .join(", "); - writeln!(f, "{}PROJECT {}", indent, expressions)?; - fmt_operator(source, f, level + 1, true) - } - Operator::Scan { + SourceOperator::Scan { table_reference, predicates: filter, .. @@ -464,7 +287,7 @@ impl Display for Operator { }?; Ok(()) } - Operator::Search { + SourceOperator::Search { table_reference, search, .. @@ -487,7 +310,7 @@ impl Display for Operator { } Ok(()) } - Operator::Nothing => Ok(()), + SourceOperator::Nothing => Ok(()), } } writeln!(f, "QUERY PLAN")?; @@ -505,35 +328,15 @@ impl Display for Operator { */ pub fn get_table_ref_bitmask_for_operator<'a>( tables: &'a Vec, - operator: &'a Operator, + operator: &'a SourceOperator, ) -> Result { let mut table_refs_mask = 0; match operator { - Operator::Aggregate { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Filter { - source, predicates, .. - } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - for predicate in predicates { - table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, predicate)?; - } - } - Operator::Limit { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Join { left, right, .. } => { + SourceOperator::Join { left, right, .. } => { table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?; table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?; } - Operator::Order { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Projection { source, .. } => { - table_refs_mask |= get_table_ref_bitmask_for_operator(tables, source)?; - } - Operator::Scan { + SourceOperator::Scan { table_reference, .. } => { table_refs_mask |= 1 @@ -542,7 +345,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Search { + SourceOperator::Search { table_reference, .. } => { table_refs_mask |= 1 @@ -551,7 +354,7 @@ pub fn get_table_ref_bitmask_for_operator<'a>( .position(|t| Rc::ptr_eq(&t.table, &table_reference.table)) .unwrap(); } - Operator::Nothing => {} + SourceOperator::Nothing => {} } Ok(table_refs_mask) } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 7f803e514..3e6b73630 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,4 +1,6 @@ -use super::plan::{Aggregate, BTreeTableReference, Direction, Operator, Plan, ProjectionColumn}; +use super::plan::{ + Aggregate, BTreeTableReference, Direction, Plan, ResultSetColumn, SourceOperator, +}; use crate::{function::Func, schema::Schema, util::normalize_ident, Result}; use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; @@ -66,6 +68,7 @@ fn bind_column_references( referenced_tables: &[BTreeTableReference], ) -> Result<()> { match expr { + ast::Expr::AggRef { .. } => unreachable!(), ast::Expr::Id(id) => { let mut match_result = None; for (tbl_idx, table) in referenced_tables.iter().enumerate() { @@ -237,146 +240,157 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ - projection_expressions.push(ProjectionColumn::Star); - } - ast::ResultColumn::TableStar(name) => { - let name_normalized = normalize_ident(name.0.as_str()); - let referenced_table = referenced_tables - .iter() - .find(|t| t.table_identifier == name_normalized); - - if referenced_table.is_none() { - crate::bail_parse_error!("Table {} not found", name.0); + let mut aggregate_expressions = Vec::new(); + for column in columns.clone() { + match column { + ast::ResultColumn::Star => { + for table_reference in plan.referenced_tables.iter() { + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns.push(ResultSetColumn::Scalar( + ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_primary_key: col.primary_key, + }, + )); } - let table_reference = referenced_table.unwrap(); - projection_expressions - .push(ProjectionColumn::TableStar(table_reference.clone())); } - ast::ResultColumn::Expr(mut expr, _) => { - bind_column_references(&mut expr, &referenced_tables)?; - projection_expressions.push(ProjectionColumn::Column(expr.clone())); - match expr.clone() { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - order_by: _, - } => { - let args_count = if let Some(args) = &args { - args.len() - } else { - 0 - }; - match Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - args_count, - ) { - Ok(Func::Agg(f)) => { - aggregate_expressions.push(Aggregate { - func: f, - args: args.unwrap(), - original_expr: expr.clone(), - }); - } - Ok(_) => { - resolve_aggregates(&expr, &mut aggregate_expressions); - } - _ => {} - } - } - ast::Expr::FunctionCallStar { - name, - filter_over: _, - } => { - if let Ok(Func::Agg(f)) = Func::resolve_function( - normalize_ident(name.0.as_str()).as_str(), - 0, - ) { - aggregate_expressions.push(Aggregate { + } + ast::ResultColumn::TableStar(name) => { + let name_normalized = normalize_ident(name.0.as_str()); + let referenced_table = plan + .referenced_tables + .iter() + .find(|t| t.table_identifier == name_normalized); + + if referenced_table.is_none() { + crate::bail_parse_error!("Table {} not found", name.0); + } + let table_reference = referenced_table.unwrap(); + for (idx, col) in table_reference.table.columns.iter().enumerate() { + plan.result_columns + .push(ResultSetColumn::Scalar(ast::Expr::Column { + database: None, // TODO: support different databases + table: table_reference.table_index, + column: idx, + is_primary_key: col.primary_key, + })); + } + } + ast::ResultColumn::Expr(mut expr, _) => { + bind_column_references(&mut expr, &plan.referenced_tables)?; + match &expr { + ast::Expr::FunctionCall { + name, + distinctness: _, + args, + filter_over: _, + order_by: _, + } => { + let args_count = if let Some(args) = &args { + args.len() + } else { + 0 + }; + match Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + args_count, + ) { + Ok(Func::Agg(f)) => { + let agg = Aggregate { func: f, - args: vec![ast::Expr::Literal(ast::Literal::Numeric( - "1".to_string(), - ))], + args: args.as_ref().unwrap().clone(), original_expr: expr.clone(), - }); + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn::Agg(agg)); } + Ok(_) => { + resolve_aggregates(&expr, &mut aggregate_expressions); + } + _ => {} } - ast::Expr::Binary(lhs, _, rhs) => { - resolve_aggregates(&lhs, &mut aggregate_expressions); - resolve_aggregates(&rhs, &mut aggregate_expressions); + } + ast::Expr::FunctionCallStar { + name, + filter_over: _, + } => { + if let Ok(Func::Agg(f)) = Func::resolve_function( + normalize_ident(name.0.as_str()).as_str(), + 0, + ) { + let agg = Aggregate { + func: f, + args: vec![ast::Expr::Literal(ast::Literal::Numeric( + "1".to_string(), + ))], + original_expr: expr.clone(), + }; + aggregate_expressions.push(agg.clone()); + plan.result_columns.push(ResultSetColumn::Agg(agg)); + } else { + crate::bail_parse_error!( + "Invalid aggregate function: {}", + name.0 + ); } - _ => {} + } + ast::Expr::Binary(lhs, _, rhs) => { + resolve_aggregates(&lhs, &mut aggregate_expressions); + resolve_aggregates(&rhs, &mut aggregate_expressions); + plan.result_columns + .push(ResultSetColumn::Scalar(expr.clone())); + } + e => { + plan.result_columns.push(ResultSetColumn::Scalar(e.clone())); } } } } - if let Some(group_by) = group_by.as_mut() { - for expr in group_by.exprs.iter_mut() { - bind_column_references(expr, &referenced_tables)?; - } - if aggregate_expressions.is_empty() { - crate::bail_parse_error!( - "GROUP BY clause without aggregate functions is not allowed" - ); - } - for scalar in projection_expressions.iter() { - match scalar { - ProjectionColumn::Column(_) => {} - _ => { - crate::bail_parse_error!( - "Only column references are allowed in the SELECT clause when using GROUP BY" - ); - } - } - } - } - if !aggregate_expressions.is_empty() { - operator = Operator::Aggregate { - source: Box::new(operator), - aggregates: aggregate_expressions, - group_by: group_by.map(|g| g.exprs), // TODO: support HAVING - id: operator_id_counter.get_next_id(), - step: 0, - } - } - - if !projection_expressions.is_empty() { - operator = Operator::Projection { - source: Box::new(operator), - expressions: projection_expressions, - id: operator_id_counter.get_next_id(), - step: 0, - }; - } } + if let Some(group_by) = group_by.as_mut() { + for expr in group_by.exprs.iter_mut() { + bind_column_references(expr, &plan.referenced_tables)?; + } + if aggregate_expressions.is_empty() { + crate::bail_parse_error!( + "GROUP BY clause without aggregate functions is not allowed" + ); + } + } + + plan.group_by = group_by.map(|g| g.exprs); + plan.aggregates = if aggregate_expressions.is_empty() { + None + } else { + Some(aggregate_expressions) + }; // Parse the ORDER BY clause if let Some(order_by) = select.order_by { @@ -402,7 +416,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ let l = n.parse()?; - if l == 0 { - Operator::Nothing - } else { - Operator::Limit { - source: Box::new(operator), - limit: l, - id: operator_id_counter.get_next_id(), - step: 0, - } - } + Some(l) } _ => todo!(), } } // Return the unoptimized query plan - Ok(Plan { - root_operator: operator, - referenced_tables, - available_indexes: schema.indexes.clone().into_values().flatten().collect(), - }) + Ok(plan) } _ => todo!(), } @@ -456,9 +452,9 @@ fn parse_from( schema: &Schema, from: Option, operator_id_counter: &mut OperatorIdCounter, -) -> Result<(Operator, Vec)> { +) -> Result<(SourceOperator, Vec)> { if from.as_ref().and_then(|f| f.select.as_ref()).is_none() { - return Ok((Operator::Nothing, vec![])); + return Ok((SourceOperator::Nothing, vec![])); } let from = from.unwrap(); @@ -484,11 +480,10 @@ fn parse_from( _ => todo!(), }; - let mut operator = Operator::Scan { + let mut operator = SourceOperator::Scan { table_reference: first_table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }; @@ -498,13 +493,12 @@ fn parse_from( for join in from.joins.unwrap_or_default().into_iter() { let (right, outer, predicates) = parse_join(schema, join, operator_id_counter, &mut tables, table_index)?; - operator = Operator::Join { + operator = SourceOperator::Join { left: Box::new(operator), right: Box::new(right), predicates, outer, id: operator_id_counter.get_next_id(), - step: 0, }; table_index += 1; } @@ -518,7 +512,7 @@ fn parse_join( operator_id_counter: &mut OperatorIdCounter, tables: &mut Vec, table_index: usize, -) -> Result<(Operator, bool, Option>)> { +) -> Result<(SourceOperator, bool, Option>)> { let ast::JoinedSelectTable { operator, table, @@ -574,11 +568,10 @@ fn parse_join( } Ok(( - Operator::Scan { + SourceOperator::Scan { table_reference: table.clone(), predicates: None, id: operator_id_counter.get_next_id(), - step: 0, iter_dir: None, }, outer, diff --git a/core/translate/select.rs b/core/translate/select.rs index 2b946b0fd..0d16089eb 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -17,12 +17,7 @@ pub fn translate_select( connection: Weak, ) -> Result { let select_plan = prepare_select_plan(schema, select)?; - let (optimized_plan, expr_result_cache) = optimize_plan(select_plan)?; - println!("{:?}", expr_result_cache); - emit_program( - database_header, - optimized_plan, - expr_result_cache, - connection, - ) + let optimized_plan = optimize_plan(select_plan)?; + // println!("optimized_plan: {:?}", optimized_plan); + emit_program(database_header, optimized_plan, connection) } diff --git a/vendored/sqlite3-parser/src/parser/ast/fmt.rs b/vendored/sqlite3-parser/src/parser/ast/fmt.rs index 80f87eefb..6b0271919 100644 --- a/vendored/sqlite3-parser/src/parser/ast/fmt.rs +++ b/vendored/sqlite3-parser/src/parser/ast/fmt.rs @@ -638,6 +638,7 @@ impl ToTokens for Expr { } Self::Id(id) => id.to_tokens(s), Self::Column { .. } => Ok(()), + Self::AggRef { .. } => Ok(()), Self::InList { lhs, not, rhs } => { lhs.to_tokens(s)?; if *not { diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 29ec84dd6..ac45b5170 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -338,6 +338,11 @@ pub enum Expr { /// is the column a primary key is_primary_key: bool, }, + /// AggRef is a reference to a computed aggregate + AggRef { + /// index of the aggregate in the aggregates vector parsed from the query + index: usize, + }, /// `IN` InList { /// expression From cc902ed25de34213287c385eb40ecbb91d2d7760 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 15:59:39 +0200 Subject: [PATCH 03/32] GROUP BY and ORDER BY mostly work --- core/schema.rs | 2 +- core/translate/emitter.rs | 2265 +++++------------ core/translate/expr.rs | 348 ++- core/translate/insert.rs | 2 +- core/translate/optimizer.rs | 11 +- core/translate/planner.rs | 12 +- vendored/sqlite3-parser/src/parser/ast/fmt.rs | 1 - vendored/sqlite3-parser/src/parser/ast/mod.rs | 9 +- 8 files changed, 872 insertions(+), 1778 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 7ebe249be..c60b8ff5f 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -90,7 +90,7 @@ impl Table { None => None, }, Table::Pseudo(table) => match table.columns.get(index) { - Some(column) => Some(&column.name), + Some(_) => None, None => None, }, } diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index fe23832db..b9bf5767a 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -6,7 +6,6 @@ use sqlite3_parser::ast; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::expr::resolve_ident_pseudo_table; use crate::translate::plan::{IterationDirection, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::vdbe::builder::ProgramBuilder; @@ -14,41 +13,12 @@ use crate::vdbe::{BranchOffset, Insn, Program}; use crate::{Connection, Result}; use super::expr::{ - translate_aggregation, translate_condition_expr, translate_expr, translate_table_columns, + translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; use super::plan::{ResultSetColumn, SourceOperator}; -/** - * The Emitter trait is used to emit bytecode instructions for a given operator in the query plan. - * - * - step: perform a single step of the operator, emitting bytecode instructions as needed, - and returning a result indicating whether the operator is ready to emit a result row -*/ -// pub trait Emitter { -// fn step( -// &mut self, -// pb: &mut ProgramBuilder, -// m: &mut Metadata, -// referenced_tables: &[BTreeTableReference], -// ) -> Result; -// fn result_columns( -// &self, -// program: &mut ProgramBuilder, -// referenced_tables: &[BTreeTableReference], -// metadata: &mut Metadata, -// cursor_override: Option<&SortCursorOverride>, -// ) -> Result; -// fn result_row( -// &mut self, -// program: &mut ProgramBuilder, -// referenced_tables: &[BTreeTableReference], -// metadata: &mut Metadata, -// cursor_override: Option<&SortCursorOverride>, -// ) -> Result<()>; -// } - #[derive(Debug)] pub struct LeftJoinMetadata { // integer register that holds a flag that is set to true if the current row has a match for the left join @@ -127,1500 +97,22 @@ pub struct Metadata { next_row_labels: HashMap, // labels for the Rewind instructions. scan_loop_body_labels: Vec, - // mapping between Aggregation operator id and the register that holds the start of the aggregation result - aggregation_start_registers: HashMap, - // mapping between Aggregation operator id and associated metadata (if the aggregation has a group by clause) - group_bys: HashMap, + // metadata for the group by operator + group_by_metadata: Option, // mapping between Order operator id and associated metadata sorts: HashMap, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, - // register holding the start of the result set - result_set_register_start: usize, + // First register of the aggregation results + pub aggregation_start_register: Option, + // We need to emit result columns in the order they are present in the SELECT, but they may not be in the same order in the ORDER BY sorter. + // This vector holds the indexes of the result columns in the ORDER BY sorter. + pub result_column_indexes_in_orderby_sorter: HashMap, + // We might skip adding a SELECT result column into the ORDER BY sorter if it is an exact match in the ORDER BY keys. + // This vector holds the indexes of the result columns that we need to skip. + pub result_columns_to_skip_in_orderby_sorter: Option>, } -// /// Emitters return one of three possible results from the step() method: -// /// - Continue: the operator is not yet ready to emit a result row -// /// - ReadyToEmit: the operator is ready to emit a result row -// /// - Done: the operator has completed execution -// /// For example, a Scan operator will return Continue until it has opened a cursor, rewound it and applied any predicates. -// /// At that point, it will return ReadyToEmit. -// /// Finally, when the Scan operator has emitted a Next instruction, it will return Done. -// /// -// /// Parent operators are free to make decisions based on the result a child operator's step() method. -// /// -// /// When the root operator of a Plan returns ReadyToEmit, a ResultRow will always be emitted. -// /// When the root operator returns Done, the bytecode plan is complete. -// #[derive(Debug, PartialEq)] -// pub enum OpStepResult { -// Continue, -// ReadyToEmit, -// Done, -// } - -// impl Emitter for SourceOperator { -// fn step( -// &mut self, -// program: &mut ProgramBuilder, -// m: &mut Metadata, -// referenced_tables: &[BTreeTableReference], -// ) -> Result { -// let current_operator_column_count = self.column_count(referenced_tables); -// match self { -// SourceOperator::Scan { -// table_reference, -// id, -// step, -// predicates, -// iter_dir, -// } => { -// *step += 1; -// const SCAN_OPEN_READ: usize = 1; -// const SCAN_BODY: usize = 2; -// const SCAN_NEXT: usize = 3; -// let reverse = iter_dir -// .as_ref() -// .is_some_and(|iter_dir| *iter_dir == IterationDirection::Backwards); -// match *step { -// SCAN_OPEN_READ => { -// let cursor_id = program.alloc_cursor_id( -// Some(table_reference.table_identifier.clone()), -// Some(Table::BTree(table_reference.table.clone())), -// ); -// let root_page = table_reference.table.root_page; -// let next_row_label = program.allocate_label(); -// m.next_row_labels.insert(*id, next_row_label); -// program.emit_insn(Insn::OpenReadAsync { -// cursor_id, -// root_page, -// }); -// program.emit_insn(Insn::OpenReadAwait); - -// Ok(OpStepResult::Continue) -// } -// SCAN_BODY => { -// let cursor_id = -// program.resolve_cursor_id(&table_reference.table_identifier, None); -// if reverse { -// program.emit_insn(Insn::LastAsync { cursor_id }); -// } else { -// program.emit_insn(Insn::RewindAsync { cursor_id }); -// } -// let scan_loop_body_label = program.allocate_label(); -// let halt_label = m.termination_label_stack.last().unwrap(); -// program.emit_insn_with_label_dependency( -// if reverse { -// Insn::LastAwait { -// cursor_id, -// pc_if_empty: *halt_label, -// } -// } else { -// Insn::RewindAwait { -// cursor_id, -// pc_if_empty: *halt_label, -// } -// }, -// *halt_label, -// ); -// m.scan_loop_body_labels.push(scan_loop_body_label); -// program.defer_label_resolution( -// scan_loop_body_label, -// program.offset() as usize, -// ); - -// let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); -// if let Some(preds) = predicates { -// for expr in preds { -// let jump_target_when_true = program.allocate_label(); -// let condition_metadata = ConditionMetadata { -// jump_if_condition_is_true: false, -// jump_target_when_true, -// jump_target_when_false: *jump_label, -// }; -// translate_condition_expr( -// program, -// referenced_tables, -// expr, -// None, -// condition_metadata, -// m.result_set_register_start, -// )?; -// program.resolve_label(jump_target_when_true, program.offset()); -// } -// } - -// Ok(OpStepResult::ReadyToEmit) -// } -// SCAN_NEXT => { -// let cursor_id = -// program.resolve_cursor_id(&table_reference.table_identifier, None); -// program -// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); -// if reverse { -// program.emit_insn(Insn::PrevAsync { cursor_id }); -// } else { -// program.emit_insn(Insn::NextAsync { cursor_id }); -// } -// let jump_label = m.scan_loop_body_labels.pop().unwrap(); - -// if reverse { -// program.emit_insn_with_label_dependency( -// Insn::PrevAwait { -// cursor_id, -// pc_if_next: jump_label, -// }, -// jump_label, -// ); -// } else { -// program.emit_insn_with_label_dependency( -// Insn::NextAwait { -// cursor_id, -// pc_if_next: jump_label, -// }, -// jump_label, -// ); -// } -// Ok(OpStepResult::Done) -// } -// _ => Ok(OpStepResult::Done), -// } -// } -// SourceOperator::Search { -// table_reference, -// search, -// predicates, -// step, -// id, -// .. -// } => { -// *step += 1; -// const SEARCH_OPEN_READ: usize = 1; -// const SEARCH_BODY: usize = 2; -// const SEARCH_NEXT: usize = 3; -// match *step { -// SEARCH_OPEN_READ => { -// let table_cursor_id = program.alloc_cursor_id( -// Some(table_reference.table_identifier.clone()), -// Some(Table::BTree(table_reference.table.clone())), -// ); - -// let next_row_label = program.allocate_label(); - -// if !matches!(search, Search::PrimaryKeyEq { .. }) { -// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. -// m.next_row_labels.insert(*id, next_row_label); -// } - -// let scan_loop_body_label = program.allocate_label(); -// m.scan_loop_body_labels.push(scan_loop_body_label); -// program.emit_insn(Insn::OpenReadAsync { -// cursor_id: table_cursor_id, -// root_page: table_reference.table.root_page, -// }); -// program.emit_insn(Insn::OpenReadAwait); - -// if let Search::IndexSearch { index, .. } = search { -// let index_cursor_id = program.alloc_cursor_id( -// Some(index.name.clone()), -// Some(Table::Index(index.clone())), -// ); -// program.emit_insn(Insn::OpenReadAsync { -// cursor_id: index_cursor_id, -// root_page: index.root_page, -// }); -// program.emit_insn(Insn::OpenReadAwait); -// } -// Ok(OpStepResult::Continue) -// } -// SEARCH_BODY => { -// let table_cursor_id = -// program.resolve_cursor_id(&table_reference.table_identifier, None); - -// // Open the loop for the index search. -// // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. -// if !matches!(search, Search::PrimaryKeyEq { .. }) { -// let index_cursor_id = if let Search::IndexSearch { index, .. } = search -// { -// Some(program.resolve_cursor_id(&index.name, None)) -// } else { -// None -// }; -// let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); -// let cmp_reg = program.alloc_register(); -// let (cmp_expr, cmp_op) = match search { -// Search::IndexSearch { -// cmp_expr, cmp_op, .. -// } => (cmp_expr, cmp_op), -// Search::PrimaryKeySearch { cmp_expr, cmp_op } => (cmp_expr, cmp_op), -// Search::PrimaryKeyEq { .. } => unreachable!(), -// }; -// // TODO this only handles ascending indexes -// match cmp_op { -// ast::Operator::Equals -// | ast::Operator::Greater -// | ast::Operator::GreaterEquals => { -// translate_expr( -// program, -// Some(referenced_tables), -// cmp_expr, -// cmp_reg, -// None, -// m.result_set_register_start, -// )?; -// } -// ast::Operator::Less | ast::Operator::LessEquals => { -// program.emit_insn(Insn::Null { -// dest: cmp_reg, -// dest_end: None, -// }); -// } -// _ => unreachable!(), -// } -// program.emit_insn_with_label_dependency( -// match cmp_op { -// ast::Operator::Equals | ast::Operator::GreaterEquals => { -// Insn::SeekGE { -// is_index: index_cursor_id.is_some(), -// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), -// start_reg: cmp_reg, -// num_regs: 1, -// target_pc: *m.termination_label_stack.last().unwrap(), -// } -// } -// ast::Operator::Greater -// | ast::Operator::Less -// | ast::Operator::LessEquals => Insn::SeekGT { -// is_index: index_cursor_id.is_some(), -// cursor_id: index_cursor_id.unwrap_or(table_cursor_id), -// start_reg: cmp_reg, -// num_regs: 1, -// target_pc: *m.termination_label_stack.last().unwrap(), -// }, -// _ => unreachable!(), -// }, -// *m.termination_label_stack.last().unwrap(), -// ); -// if *cmp_op == ast::Operator::Less -// || *cmp_op == ast::Operator::LessEquals -// { -// translate_expr( -// program, -// Some(referenced_tables), -// cmp_expr, -// cmp_reg, -// None, -// m.result_set_register_start, -// )?; -// } - -// program.defer_label_resolution( -// scan_loop_body_label, -// program.offset() as usize, -// ); -// // TODO: We are currently only handling ascending indexes. -// // For conditions like index_key > 10, we have already seeked to the first key greater than 10, and can just scan forward. -// // For conditions like index_key < 10, we are at the beginning of the index, and will scan forward and emit IdxGE(10) with a conditional jump to the end. -// // For conditions like index_key = 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward and emit IdxGT(10) with a conditional jump to the end. -// // For conditions like index_key >= 10, we have already seeked to the first key greater than or equal to 10, and can just scan forward. -// // For conditions like index_key <= 10, we are at the beginning of the index, and will scan forward and emit IdxGT(10) with a conditional jump to the end. -// // For conditions like index_key != 10, TODO. probably the optimal way is not to use an index at all. -// // -// // For primary key searches we emit RowId and then compare it to the seek value. - -// let abort_jump_target = *m -// .next_row_labels -// .get(id) -// .unwrap_or(m.termination_label_stack.last().unwrap()); -// match cmp_op { -// ast::Operator::Equals | ast::Operator::LessEquals => { -// if let Some(index_cursor_id) = index_cursor_id { -// program.emit_insn_with_label_dependency( -// Insn::IdxGT { -// cursor_id: index_cursor_id, -// start_reg: cmp_reg, -// num_regs: 1, -// target_pc: abort_jump_target, -// }, -// abort_jump_target, -// ); -// } else { -// let rowid_reg = program.alloc_register(); -// program.emit_insn(Insn::RowId { -// cursor_id: table_cursor_id, -// dest: rowid_reg, -// }); -// program.emit_insn_with_label_dependency( -// Insn::Gt { -// lhs: rowid_reg, -// rhs: cmp_reg, -// target_pc: abort_jump_target, -// }, -// abort_jump_target, -// ); -// } -// } -// ast::Operator::Less => { -// if let Some(index_cursor_id) = index_cursor_id { -// program.emit_insn_with_label_dependency( -// Insn::IdxGE { -// cursor_id: index_cursor_id, -// start_reg: cmp_reg, -// num_regs: 1, -// target_pc: abort_jump_target, -// }, -// abort_jump_target, -// ); -// } else { -// let rowid_reg = program.alloc_register(); -// program.emit_insn(Insn::RowId { -// cursor_id: table_cursor_id, -// dest: rowid_reg, -// }); -// program.emit_insn_with_label_dependency( -// Insn::Ge { -// lhs: rowid_reg, -// rhs: cmp_reg, -// target_pc: abort_jump_target, -// }, -// abort_jump_target, -// ); -// } -// } -// _ => {} -// } - -// if let Some(index_cursor_id) = index_cursor_id { -// program.emit_insn(Insn::DeferredSeek { -// index_cursor_id, -// table_cursor_id, -// }); -// } -// } - -// let jump_label = m -// .next_row_labels -// .get(id) -// .unwrap_or(m.termination_label_stack.last().unwrap()); - -// if let Search::PrimaryKeyEq { cmp_expr } = search { -// let src_reg = program.alloc_register(); -// translate_expr( -// program, -// Some(referenced_tables), -// cmp_expr, -// src_reg, -// None, -// m.result_set_register_start, -// )?; -// program.emit_insn_with_label_dependency( -// Insn::SeekRowid { -// cursor_id: table_cursor_id, -// src_reg, -// target_pc: *jump_label, -// }, -// *jump_label, -// ); -// } -// if let Some(predicates) = predicates { -// for predicate in predicates.iter() { -// let jump_target_when_true = program.allocate_label(); -// let condition_metadata = ConditionMetadata { -// jump_if_condition_is_true: false, -// jump_target_when_true, -// jump_target_when_false: *jump_label, -// }; -// translate_condition_expr( -// program, -// referenced_tables, -// predicate, -// None, -// condition_metadata, -// m.result_set_register_start, -// )?; -// program.resolve_label(jump_target_when_true, program.offset()); -// } -// } - -// Ok(OpStepResult::ReadyToEmit) -// } -// SEARCH_NEXT => { -// if matches!(search, Search::PrimaryKeyEq { .. }) { -// // Primary key equality search is handled with a SeekRowid instruction which does not loop, so there is no need to emit a NextAsync instruction. -// return Ok(OpStepResult::Done); -// } -// let cursor_id = match search { -// Search::IndexSearch { index, .. } => { -// program.resolve_cursor_id(&index.name, None) -// } -// Search::PrimaryKeySearch { .. } => { -// program.resolve_cursor_id(&table_reference.table_identifier, None) -// } -// Search::PrimaryKeyEq { .. } => unreachable!(), -// }; -// program -// .resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); -// program.emit_insn(Insn::NextAsync { cursor_id }); -// let jump_label = m.scan_loop_body_labels.pop().unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::NextAwait { -// cursor_id, -// pc_if_next: jump_label, -// }, -// jump_label, -// ); -// Ok(OpStepResult::Done) -// } -// _ => Ok(OpStepResult::Done), -// } -// } -// SourceOperator::Join { -// left, -// right, -// outer, -// predicates, -// step, -// id, -// .. -// } => { -// *step += 1; -// const JOIN_INIT: usize = 1; -// const JOIN_DO_JOIN: usize = 2; -// const JOIN_END: usize = 3; -// match *step { -// JOIN_INIT => { -// if *outer { -// let lj_metadata = LeftJoinMetadata { -// match_flag_register: program.alloc_register(), -// set_match_flag_true_label: program.allocate_label(), -// check_match_flag_label: program.allocate_label(), -// on_match_jump_to_label: program.allocate_label(), -// }; -// m.left_joins.insert(*id, lj_metadata); -// } -// left.step(program, m, referenced_tables)?; -// right.step(program, m, referenced_tables)?; - -// Ok(OpStepResult::Continue) -// } -// JOIN_DO_JOIN => { -// left.step(program, m, referenced_tables)?; - -// let mut jump_target_when_false = *m -// .next_row_labels -// .get(&right.id()) -// .or(m.next_row_labels.get(&left.id())) -// .unwrap_or(m.termination_label_stack.last().unwrap()); - -// if *outer { -// let lj_meta = m.left_joins.get(id).unwrap(); -// program.emit_insn(Insn::Integer { -// value: 0, -// dest: lj_meta.match_flag_register, -// }); -// jump_target_when_false = lj_meta.check_match_flag_label; -// } -// m.next_row_labels.insert(right.id(), jump_target_when_false); - -// right.step(program, m, referenced_tables)?; - -// if let Some(predicates) = predicates { -// let jump_target_when_true = program.allocate_label(); -// let condition_metadata = ConditionMetadata { -// jump_if_condition_is_true: false, -// jump_target_when_true, -// jump_target_when_false, -// }; -// for predicate in predicates.iter() { -// translate_condition_expr( -// program, -// referenced_tables, -// predicate, -// None, -// condition_metadata, -// m.result_set_register_start, -// )?; -// } -// program.resolve_label(jump_target_when_true, program.offset()); -// } - -// if *outer { -// let lj_meta = m.left_joins.get(id).unwrap(); -// program.defer_label_resolution( -// lj_meta.set_match_flag_true_label, -// program.offset() as usize, -// ); -// program.emit_insn(Insn::Integer { -// value: 1, -// dest: lj_meta.match_flag_register, -// }); -// } - -// Ok(OpStepResult::ReadyToEmit) -// } -// JOIN_END => { -// right.step(program, m, referenced_tables)?; - -// if *outer { -// let lj_meta = m.left_joins.get(id).unwrap(); -// // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) -// program.resolve_label(lj_meta.check_match_flag_label, program.offset()); -// program.emit_insn_with_label_dependency( -// Insn::IfPos { -// reg: lj_meta.match_flag_register, -// target_pc: lj_meta.on_match_jump_to_label, -// decrement_by: 0, -// }, -// lj_meta.on_match_jump_to_label, -// ); -// // If not, we set the right table cursor's "pseudo null bit" on, which means any Insn::Column will return NULL -// let right_cursor_id = match right.as_ref() { -// SourceOperator::Scan { -// table_reference, .. -// } => program -// .resolve_cursor_id(&table_reference.table_identifier, None), -// SourceOperator::Search { -// table_reference, .. -// } => program -// .resolve_cursor_id(&table_reference.table_identifier, None), -// _ => unreachable!(), -// }; -// program.emit_insn(Insn::NullRow { -// cursor_id: right_cursor_id, -// }); -// // Jump to setting the left join match flag to 1 again, but this time the right table cursor will set everything to null -// program.emit_insn_with_label_dependency( -// Insn::Goto { -// target_pc: lj_meta.set_match_flag_true_label, -// }, -// lj_meta.set_match_flag_true_label, -// ); -// } -// let next_row_label = if *outer { -// m.left_joins.get(id).unwrap().on_match_jump_to_label -// } else { -// *m.next_row_labels.get(&right.id()).unwrap() -// }; -// // This points to the NextAsync instruction of the left table -// program.resolve_label(next_row_label, program.offset()); -// left.step(program, m, referenced_tables)?; - -// Ok(OpStepResult::Done) -// } -// _ => Ok(OpStepResult::Done), -// } -// } -// SourceOperator::Projection { -// id, -// source, -// expressions, -// aggregates, -// group_by, -// step, -// .. -// } => { -// *step += 1; - -// if !aggregates.is_empty() && group_by.is_none() { -// const PROJECTION_WAIT_UNTIL_SOURCE_READY: usize = 1; -// const PROJECTION_FINALIZE_SOURCE: usize = 2; -// match *step { -// PROJECTION_WAIT_UNTIL_SOURCE_READY => loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Continue => continue, -// OpStepResult::ReadyToEmit | OpStepResult::Done => { -// return Ok(OpStepResult::ReadyToEmit); -// } -// } -// }, -// PROJECTION_FINALIZE_SOURCE => { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Done => return Ok(OpStepResult::Done), -// _ => unreachable!(), -// } -// } -// _ => return Ok(OpStepResult::Done), -// } -// } - -// // Group by aggregation eg. SELECT a, b, sum(c) FROM t GROUP BY a, b -// if let Some(group_by) = group_by { -// const GROUP_BY_INIT: usize = 1; -// const GROUP_BY_INSERT_INTO_SORTER: usize = 2; -// const GROUP_BY_SORT_AND_COMPARE: usize = 3; -// const GROUP_BY_PREPARE_ROW: usize = 4; -// const GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE: usize = 5; -// match *step { -// GROUP_BY_INIT => { -// let agg_final_label = program.allocate_label(); -// m.termination_label_stack.push(agg_final_label); -// let num_aggs = aggregates.len(); - -// let sort_cursor = program.alloc_cursor_id(None, None); - -// let abort_flag_register = program.alloc_register(); -// let data_in_accumulator_indicator_register = program.alloc_register(); -// let group_exprs_comparison_register = -// program.alloc_registers(group_by.len()); -// let group_exprs_accumulator_register = -// program.alloc_registers(group_by.len()); -// let agg_exprs_start_reg = program.alloc_registers(num_aggs); -// m.aggregation_start_registers -// .insert(*id, agg_exprs_start_reg); -// let sorter_key_register = program.alloc_register(); - -// let subroutine_accumulator_clear_label = program.allocate_label(); -// let subroutine_accumulator_output_label = program.allocate_label(); -// let sorter_data_label = program.allocate_label(); -// let grouping_done_label = program.allocate_label(); - -// let mut order = Vec::new(); -// const ASCENDING: i64 = 0; -// for _ in group_by.iter() { -// order.push(OwnedValue::Integer(ASCENDING)); -// } -// program.emit_insn(Insn::SorterOpen { -// cursor_id: sort_cursor, -// columns: current_operator_column_count, -// order: OwnedRecord::new(order), -// }); - -// program.add_comment(program.offset(), "clear group by abort flag"); -// program.emit_insn(Insn::Integer { -// value: 0, -// dest: abort_flag_register, -// }); - -// program.add_comment( -// program.offset(), -// "initialize group by comparison registers to NULL", -// ); -// program.emit_insn(Insn::Null { -// dest: group_exprs_comparison_register, -// dest_end: if group_by.len() > 1 { -// Some(group_exprs_comparison_register + group_by.len() - 1) -// } else { -// None -// }, -// }); - -// program.add_comment( -// program.offset(), -// "go to clear accumulator subroutine", -// ); - -// let subroutine_accumulator_clear_return_offset_register = -// program.alloc_register(); -// program.emit_insn_with_label_dependency( -// Insn::Gosub { -// target_pc: subroutine_accumulator_clear_label, -// return_reg: subroutine_accumulator_clear_return_offset_register, -// }, -// subroutine_accumulator_clear_label, -// ); - -// m.group_bys.insert( -// *id, -// GroupByMetadata { -// sort_cursor, -// subroutine_accumulator_clear_label, -// subroutine_accumulator_clear_return_offset_register, -// subroutine_accumulator_output_label, -// subroutine_accumulator_output_return_offset_register: program -// .alloc_register(), -// accumulator_indicator_set_true_label: program.allocate_label(), -// sorter_data_label, -// grouping_done_label, -// abort_flag_register, -// data_in_accumulator_indicator_register, -// group_exprs_accumulator_register, -// group_exprs_comparison_register, -// sorter_key_register, -// }, -// ); - -// loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Continue => continue, -// OpStepResult::ReadyToEmit => { -// return Ok(OpStepResult::Continue); -// } -// OpStepResult::Done => { -// return Ok(OpStepResult::Done); -// } -// } -// } -// } -// GROUP_BY_INSERT_INTO_SORTER => { -// let sort_keys_count = group_by.len(); -// let start_reg = program.alloc_registers(current_operator_column_count); -// for (i, expr) in group_by.iter().enumerate() { -// let key_reg = start_reg + i; -// translate_expr( -// program, -// Some(referenced_tables), -// expr, -// key_reg, -// None, -// m.result_set_register_start, -// )?; -// } -// for (i, agg) in aggregates.iter().enumerate() { -// // TODO it's a hack to assume aggregate functions have exactly one argument. -// // Counterpoint e.g. GROUP_CONCAT(expr, separator). -// // -// // Here we are collecting scalars for the group by sorter, which will include -// // both the group by expressions and the aggregate arguments. -// // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` -// // the sorter will have two scalars: u.first_name and u.age. -// // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. -// // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. -// // -// // This is why we take the first argument of each aggregate function currently. -// // It's mostly an artifact of the current architecture being a bit poor; we should recognize -// // which scalars are dependencies of aggregate functions and explicitly collect those. -// let expr = &agg.args[0]; -// let agg_reg = start_reg + sort_keys_count + i; -// translate_expr( -// program, -// Some(referenced_tables), -// expr, -// agg_reg, -// None, -// m.result_set_register_start, -// )?; -// } - -// let group_by_metadata = m.group_bys.get(id).unwrap(); - -// program.emit_insn(Insn::MakeRecord { -// start_reg, -// count: current_operator_column_count, -// dest_reg: group_by_metadata.sorter_key_register, -// }); - -// let group_by_metadata = m.group_bys.get(id).unwrap(); -// program.emit_insn(Insn::SorterInsert { -// cursor_id: group_by_metadata.sort_cursor, -// record_reg: group_by_metadata.sorter_key_register, -// }); - -// return Ok(OpStepResult::Continue); -// } -// #[allow(clippy::never_loop)] -// GROUP_BY_SORT_AND_COMPARE => { -// loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Done => { -// break; -// } -// _ => unreachable!(), -// } -// } - -// let group_by_metadata = m.group_bys.get_mut(id).unwrap(); - -// let GroupByMetadata { -// group_exprs_comparison_register: comparison_register, -// subroutine_accumulator_output_return_offset_register, -// subroutine_accumulator_output_label, -// subroutine_accumulator_clear_return_offset_register, -// subroutine_accumulator_clear_label, -// data_in_accumulator_indicator_register, -// accumulator_indicator_set_true_label, -// group_exprs_accumulator_register: group_exprs_start_register, -// abort_flag_register, -// sorter_key_register, -// .. -// } = *group_by_metadata; -// let halt_label = *m.termination_label_stack.first().unwrap(); - -// let mut column_names = -// Vec::with_capacity(current_operator_column_count); -// for expr in group_by -// .iter() -// .chain(aggregates.iter().map(|agg| &agg.args[0])) -// // FIXME: just blindly taking the first arg is a hack -// { -// // Sorter column names for group by are now just determined by stringifying the expression, since the group by -// // columns and aggregations can be practically anything. -// // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle -// // things like `count(1)` and `COUNT(1)` the same way -// column_names.push(expr.to_string()); -// } -// let pseudo_columns = column_names -// .iter() -// .map(|name| Column { -// name: name.clone(), -// primary_key: false, -// ty: crate::schema::Type::Null, -// }) -// .collect::>(); - -// let pseudo_table = Rc::new(PseudoTable { -// columns: pseudo_columns, -// }); - -// let pseudo_cursor = program -// .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); - -// program.emit_insn(Insn::OpenPseudo { -// cursor_id: pseudo_cursor, -// content_reg: sorter_key_register, -// num_fields: current_operator_column_count, -// }); - -// let group_by_metadata = m.group_bys.get(id).unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::SorterSort { -// cursor_id: group_by_metadata.sort_cursor, -// pc_if_empty: group_by_metadata.grouping_done_label, -// }, -// group_by_metadata.grouping_done_label, -// ); - -// program.defer_label_resolution( -// group_by_metadata.sorter_data_label, -// program.offset() as usize, -// ); -// program.emit_insn(Insn::SorterData { -// cursor_id: group_by_metadata.sort_cursor, -// dest_reg: group_by_metadata.sorter_key_register, -// pseudo_cursor, -// }); - -// let groups_start_reg = program.alloc_registers(group_by.len()); -// for (i, expr) in group_by.iter().enumerate() { -// let sorter_column_index = -// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; -// let group_reg = groups_start_reg + i; -// program.emit_insn(Insn::Column { -// cursor_id: pseudo_cursor, -// column: sorter_column_index, -// dest: group_reg, -// }); -// } - -// program.emit_insn(Insn::Compare { -// start_reg_a: comparison_register, -// start_reg_b: groups_start_reg, -// count: group_by.len(), -// }); - -// let agg_step_label = program.allocate_label(); - -// program.add_comment( -// program.offset(), -// "start new group if comparison is not equal", -// ); -// program.emit_insn_with_label_dependency( -// Insn::Jump { -// target_pc_lt: program.offset() + 1, -// target_pc_eq: agg_step_label, -// target_pc_gt: program.offset() + 1, -// }, -// agg_step_label, -// ); - -// program.emit_insn(Insn::Move { -// source_reg: groups_start_reg, -// dest_reg: comparison_register, -// count: group_by.len(), -// }); - -// program.add_comment( -// program.offset(), -// "check if ended group had data, and output if so", -// ); -// program.emit_insn_with_label_dependency( -// Insn::Gosub { -// target_pc: subroutine_accumulator_output_label, -// return_reg: -// subroutine_accumulator_output_return_offset_register, -// }, -// subroutine_accumulator_output_label, -// ); - -// program.add_comment(program.offset(), "check abort flag"); -// program.emit_insn_with_label_dependency( -// Insn::IfPos { -// reg: abort_flag_register, -// target_pc: halt_label, -// decrement_by: 0, -// }, -// m.termination_label_stack[0], -// ); - -// program -// .add_comment(program.offset(), "goto clear accumulator subroutine"); -// program.emit_insn_with_label_dependency( -// Insn::Gosub { -// target_pc: subroutine_accumulator_clear_label, -// return_reg: subroutine_accumulator_clear_return_offset_register, -// }, -// subroutine_accumulator_clear_label, -// ); - -// program.resolve_label(agg_step_label, program.offset()); -// let start_reg = m.aggregation_start_registers.get(id).unwrap(); -// for (i, agg) in aggregates.iter().enumerate() { -// let agg_result_reg = start_reg + i; -// translate_aggregation( -// program, -// referenced_tables, -// agg, -// agg_result_reg, -// Some(pseudo_cursor), -// )?; -// } - -// program.add_comment( -// program.offset(), -// "don't emit group columns if continuing existing group", -// ); -// program.emit_insn_with_label_dependency( -// Insn::If { -// target_pc: accumulator_indicator_set_true_label, -// reg: data_in_accumulator_indicator_register, -// null_reg: 0, // unused in this case -// }, -// accumulator_indicator_set_true_label, -// ); - -// for (i, expr) in group_by.iter().enumerate() { -// let key_reg = group_exprs_start_register + i; -// let sorter_column_index = -// resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; -// program.emit_insn(Insn::Column { -// cursor_id: pseudo_cursor, -// column: sorter_column_index, -// dest: key_reg, -// }); -// } - -// program.resolve_label( -// accumulator_indicator_set_true_label, -// program.offset(), -// ); -// program.add_comment(program.offset(), "indicate data in accumulator"); -// program.emit_insn(Insn::Integer { -// value: 1, -// dest: data_in_accumulator_indicator_register, -// }); - -// return Ok(OpStepResult::Continue); -// } -// GROUP_BY_PREPARE_ROW => { -// let group_by_metadata = m.group_bys.get(id).unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::SorterNext { -// cursor_id: group_by_metadata.sort_cursor, -// pc_if_next: group_by_metadata.sorter_data_label, -// }, -// group_by_metadata.sorter_data_label, -// ); - -// program.resolve_label( -// group_by_metadata.grouping_done_label, -// program.offset(), -// ); - -// program.add_comment(program.offset(), "emit row for final group"); -// program.emit_insn_with_label_dependency( -// Insn::Gosub { -// target_pc: group_by_metadata -// .subroutine_accumulator_output_label, -// return_reg: group_by_metadata -// .subroutine_accumulator_output_return_offset_register, -// }, -// group_by_metadata.subroutine_accumulator_output_label, -// ); - -// program.add_comment(program.offset(), "group by finished"); -// let termination_label = -// m.termination_label_stack[m.termination_label_stack.len() - 2]; -// program.emit_insn_with_label_dependency( -// Insn::Goto { -// target_pc: termination_label, -// }, -// termination_label, -// ); -// program.emit_insn(Insn::Integer { -// value: 1, -// dest: group_by_metadata.abort_flag_register, -// }); -// program.emit_insn(Insn::Return { -// return_reg: group_by_metadata -// .subroutine_accumulator_output_return_offset_register, -// }); - -// program.resolve_label( -// group_by_metadata.subroutine_accumulator_output_label, -// program.offset(), -// ); - -// program.add_comment( -// program.offset(), -// "output group by row subroutine start", -// ); -// let termination_label = *m.termination_label_stack.last().unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::IfPos { -// reg: group_by_metadata.data_in_accumulator_indicator_register, -// target_pc: termination_label, -// decrement_by: 0, -// }, -// termination_label, -// ); -// program.emit_insn(Insn::Return { -// return_reg: group_by_metadata -// .subroutine_accumulator_output_return_offset_register, -// }); - -// return Ok(OpStepResult::ReadyToEmit); -// } -// GROUP_BY_CLEAR_ACCUMULATOR_SUBROUTINE => { -// let group_by_metadata = m.group_bys.get(id).unwrap(); -// program.emit_insn(Insn::Return { -// return_reg: group_by_metadata -// .subroutine_accumulator_output_return_offset_register, -// }); - -// program.add_comment( -// program.offset(), -// "clear accumulator subroutine start", -// ); -// program.resolve_label( -// group_by_metadata.subroutine_accumulator_clear_label, -// program.offset(), -// ); -// let start_reg = group_by_metadata.group_exprs_accumulator_register; -// program.emit_insn(Insn::Null { -// dest: start_reg, -// dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), -// }); - -// program.emit_insn(Insn::Integer { -// value: 0, -// dest: group_by_metadata.data_in_accumulator_indicator_register, -// }); -// program.emit_insn(Insn::Return { -// return_reg: group_by_metadata -// .subroutine_accumulator_clear_return_offset_register, -// }); -// } -// _ => { -// return Ok(OpStepResult::Done); -// } -// } -// } - -// // Non-grouped aggregation e.g. SELECT COUNT(*) FROM t - -// const AGGREGATE_INIT: usize = 1; -// const AGGREGATE_WAIT_UNTIL_SOURCE_READY: usize = 2; -// match *step { -// AGGREGATE_INIT => { -// let agg_final_label = program.allocate_label(); -// m.termination_label_stack.push(agg_final_label); -// let num_aggs = aggregates.len(); -// let start_reg = program.alloc_registers(num_aggs); -// m.aggregation_start_registers.insert(*id, start_reg); - -// Ok(OpStepResult::Continue) -// } -// AGGREGATE_WAIT_UNTIL_SOURCE_READY => loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Continue => {} -// OpStepResult::ReadyToEmit => { -// let start_reg = m.aggregation_start_registers.get(id).unwrap(); -// for (i, agg) in aggregates.iter().enumerate() { -// let agg_result_reg = start_reg + i; -// translate_aggregation( -// program, -// referenced_tables, -// agg, -// agg_result_reg, -// None, -// )?; -// } -// } -// OpStepResult::Done => { -// return Ok(OpStepResult::ReadyToEmit); -// } -// } -// }, -// _ => Ok(OpStepResult::Done), -// } -// } -// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), -// SourceOperator::Limit { source, step, .. } => { -// *step += 1; -// loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Continue => continue, -// OpStepResult::ReadyToEmit => { -// return Ok(OpStepResult::ReadyToEmit); -// } -// OpStepResult::Done => return Ok(OpStepResult::Done), -// } -// } -// } -// SourceOperator::Order { -// id, -// source, -// key, -// step, -// } => { -// *step += 1; -// const ORDER_INIT: usize = 1; -// const ORDER_INSERT_INTO_SORTER: usize = 2; -// const ORDER_SORT_AND_OPEN_LOOP: usize = 3; -// const ORDER_NEXT: usize = 4; -// match *step { -// ORDER_INIT => { -// m.termination_label_stack.push(program.allocate_label()); -// let sort_cursor = program.alloc_cursor_id(None, None); -// m.sorts.insert( -// *id, -// SortMetadata { -// sort_cursor, -// pseudo_table_cursor: usize::MAX, // will be set later -// sorter_data_register: program.alloc_register(), -// sorter_data_label: program.allocate_label(), -// done_label: program.allocate_label(), -// }, -// ); -// let mut order = Vec::new(); -// for (_, direction) in key.iter() { -// order.push(OwnedValue::Integer(*direction as i64)); -// } -// program.emit_insn(Insn::SorterOpen { -// cursor_id: sort_cursor, -// columns: key.len(), -// order: OwnedRecord::new(order), -// }); - -// loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Continue => continue, -// OpStepResult::ReadyToEmit => { -// return Ok(OpStepResult::Continue); -// } -// OpStepResult::Done => { -// return Ok(OpStepResult::Done); -// } -// } -// } -// } -// ORDER_INSERT_INTO_SORTER => { -// let sort_keys_count = key.len(); -// let source_cols_count = source.column_count(referenced_tables); -// let start_reg = program.alloc_registers(sort_keys_count); -// source.result_columns(program, referenced_tables, m, None)?; - -// for (i, (expr, _)) in key.iter().enumerate() { -// let key_reg = start_reg + i; -// translate_expr( -// program, -// Some(referenced_tables), -// expr, -// key_reg, -// None, -// m.result_set_register_start, -// )?; -// } - -// let sort_metadata = m.sorts.get_mut(id).unwrap(); -// program.emit_insn(Insn::MakeRecord { -// start_reg, -// count: sort_keys_count + source_cols_count, -// dest_reg: sort_metadata.sorter_data_register, -// }); - -// program.emit_insn(Insn::SorterInsert { -// cursor_id: sort_metadata.sort_cursor, -// record_reg: sort_metadata.sorter_data_register, -// }); - -// Ok(OpStepResult::Continue) -// } -// #[allow(clippy::never_loop)] -// ORDER_SORT_AND_OPEN_LOOP => { -// loop { -// match source.step(program, m, referenced_tables)? { -// OpStepResult::Done => { -// break; -// } -// _ => unreachable!(), -// } -// } -// program.resolve_label( -// m.termination_label_stack.pop().unwrap(), -// program.offset(), -// ); -// let column_names = source.column_names(); -// let mut pseudo_columns = vec![]; -// for (i, _) in key.iter().enumerate() { -// pseudo_columns.push(Column { -// name: format!("sort_key_{}", i), -// primary_key: false, -// ty: crate::schema::Type::Null, -// }); -// } -// for name in column_names { -// pseudo_columns.push(Column { -// name: name.clone(), -// primary_key: false, -// ty: crate::schema::Type::Null, -// }); -// } - -// let num_fields = pseudo_columns.len(); - -// let pseudo_cursor = program.alloc_cursor_id( -// None, -// Some(Table::Pseudo(Rc::new(PseudoTable { -// columns: pseudo_columns, -// }))), -// ); -// let sort_metadata = m.sorts.get(id).unwrap(); - -// program.emit_insn(Insn::OpenPseudo { -// cursor_id: pseudo_cursor, -// content_reg: sort_metadata.sorter_data_register, -// num_fields, -// }); - -// program.emit_insn_with_label_dependency( -// Insn::SorterSort { -// cursor_id: sort_metadata.sort_cursor, -// pc_if_empty: sort_metadata.done_label, -// }, -// sort_metadata.done_label, -// ); - -// program.defer_label_resolution( -// sort_metadata.sorter_data_label, -// program.offset() as usize, -// ); -// program.emit_insn(Insn::SorterData { -// cursor_id: sort_metadata.sort_cursor, -// dest_reg: sort_metadata.sorter_data_register, -// pseudo_cursor, -// }); - -// let sort_metadata = m.sorts.get_mut(id).unwrap(); - -// sort_metadata.pseudo_table_cursor = pseudo_cursor; - -// Ok(OpStepResult::ReadyToEmit) -// } -// ORDER_NEXT => { -// let sort_metadata = m.sorts.get(id).unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::SorterNext { -// cursor_id: sort_metadata.sort_cursor, -// pc_if_next: sort_metadata.sorter_data_label, -// }, -// sort_metadata.sorter_data_label, -// ); - -// program.resolve_label(sort_metadata.done_label, program.offset()); - -// Ok(OpStepResult::Done) -// } -// _ => unreachable!(), -// } -// } -// SourceOperator::Nothing => Ok(OpStepResult::Done), -// } -// } -// fn result_columns( -// &self, -// program: &mut ProgramBuilder, -// referenced_tables: &[BTreeTableReference], -// m: &mut Metadata, -// cursor_override: Option<&SortCursorOverride>, -// ) -> Result { -// let col_count = self.column_count(referenced_tables); -// match self { -// SourceOperator::Scan { -// table_reference, .. -// } => { -// let start_reg = program.alloc_registers(col_count); -// let table = cursor_override -// .map(|c| c.pseudo_table.clone()) -// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); -// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { -// program.resolve_cursor_id(&table_reference.table_identifier, None) -// }); -// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); -// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - -// Ok(start_reg) -// } -// SourceOperator::Search { -// table_reference, .. -// } => { -// let start_reg = program.alloc_registers(col_count); -// let table = cursor_override -// .map(|c| c.pseudo_table.clone()) -// .unwrap_or_else(|| Table::BTree(table_reference.table.clone())); -// let cursor_id = cursor_override.map(|c| c.cursor_id).unwrap_or_else(|| { -// program.resolve_cursor_id(&table_reference.table_identifier, None) -// }); -// let start_column_offset = cursor_override.map(|c| c.sort_key_len).unwrap_or(0); -// translate_table_columns(program, cursor_id, &table, start_column_offset, start_reg); - -// Ok(start_reg) -// } -// SourceOperator::Join { left, right, .. } => { -// let left_start_reg = -// left.result_columns(program, referenced_tables, m, cursor_override)?; -// right.result_columns(program, referenced_tables, m, cursor_override)?; - -// Ok(left_start_reg) -// } -// SourceOperator::Projection { -// id, -// expressions, -// aggregates, -// group_by, -// .. -// } => { -// if aggregates.is_empty() && group_by.is_none() { -// let expr_count = expressions.len(); -// let start_reg = program.alloc_registers(expr_count); -// let mut cur_reg = start_reg; -// m.result_set_register_start = start_reg; -// for expr in expressions { -// translate_expr( -// program, -// Some(referenced_tables), -// expr, -// cur_reg, -// cursor_override.map(|c| c.cursor_id), -// m.result_set_register_start, -// )?; -// cur_reg += 1; -// } - -// return Ok(start_reg); -// } -// let agg_start_reg = m.aggregation_start_registers.get(id).unwrap(); -// program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); -// for (i, agg) in aggregates.iter().enumerate() { -// let agg_result_reg = *agg_start_reg + i; -// program.emit_insn(Insn::AggFinal { -// register: agg_result_reg, -// func: agg.func.clone(), -// }); -// } - -// if let Some(group_by) = group_by { -// let output_row_start_reg = -// program.alloc_registers(aggregates.len() + group_by.len()); -// let group_by_metadata = m.group_bys.get(id).unwrap(); -// program.emit_insn(Insn::Copy { -// src_reg: group_by_metadata.group_exprs_accumulator_register, -// dst_reg: output_row_start_reg, -// amount: group_by.len() - 1, -// }); -// program.emit_insn(Insn::Copy { -// src_reg: *agg_start_reg, -// dst_reg: output_row_start_reg + group_by.len(), -// amount: aggregates.len() - 1, -// }); - -// Ok(output_row_start_reg) -// } else { -// Ok(*agg_start_reg) -// } -// } -// SourceOperator::Filter { .. } => unreachable!("predicates have been pushed down"), -// SourceOperator::Limit { .. } => { -// unimplemented!() -// } -// SourceOperator::Order { id, key, .. } => { -// let cursor_id = m.sorts.get(id).unwrap().pseudo_table_cursor; -// let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); -// let start_column_offset = key.len(); -// let column_count = pseudo_table.columns().len() - start_column_offset; -// let start_reg = program.alloc_registers(column_count); -// translate_table_columns( -// program, -// cursor_id, -// &pseudo_table, -// start_column_offset, -// start_reg, -// ); - -// Ok(start_reg) -// } -// SourceOperator::Projection { -// expressions, id, .. -// } => { -// let expr_count = expressions.len(); -// let start_reg = program.alloc_registers(expr_count); -// let mut cur_reg = start_reg; -// m.result_set_register_start = start_reg; -// for expr in expressions { -// translate_expr( -// program, -// Some(referenced_tables), -// expr, -// cur_reg, -// cursor_override.map(|c| c.cursor_id), -// m.result_set_register_start, -// )?; -// cur_reg += 1; -// } - -// Ok(start_reg) -// } -// SourceOperator::Nothing => unimplemented!(), -// } -// } -// fn result_row( -// &mut self, -// program: &mut ProgramBuilder, -// referenced_tables: &[BTreeTableReference], -// m: &mut Metadata, -// cursor_override: Option<&SortCursorOverride>, -// ) -> Result<()> { -// match self { -// SourceOperator::Limit { source, limit, .. } => { -// source.result_row(program, referenced_tables, m, cursor_override)?; -// let limit_reg = program.alloc_register(); -// program.emit_insn(Insn::Integer { -// value: *limit as i64, -// dest: limit_reg, -// }); -// program.mark_last_insn_constant(); -// let jump_label = m.termination_label_stack.first().unwrap(); -// program.emit_insn_with_label_dependency( -// Insn::DecrJumpZero { -// reg: limit_reg, -// target_pc: *jump_label, -// }, -// *jump_label, -// ); - -// Ok(()) -// } -// operator => { -// let start_reg = -// operator.result_columns(program, referenced_tables, m, cursor_override)?; -// program.emit_insn(Insn::ResultRow { -// start_reg, -// count: operator.column_count(referenced_tables), -// }); -// Ok(()) -// } -// } -// } -// } - fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); @@ -1637,13 +129,14 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> let metadata = Metadata { termination_label_stack: vec![halt_label], - aggregation_start_registers: HashMap::new(), - group_bys: HashMap::new(), + group_by_metadata: None, left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], sorts: HashMap::new(), - result_set_register_start: 0, + aggregation_start_register: None, + result_column_indexes_in_orderby_sorter: HashMap::new(), + result_columns_to_skip_in_orderby_sorter: None, }; Ok((program, metadata, init_label, start_offset)) @@ -1655,10 +148,8 @@ fn epilogue( init_label: BranchOffset, start_offset: BranchOffset, ) -> Result<()> { - program.resolve_label( - metadata.termination_label_stack.pop().unwrap(), - program.offset(), - ); + let halt_label = metadata.termination_label_stack.pop().unwrap(); + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), @@ -1684,8 +175,6 @@ pub fn emit_program( ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; - let mut order_by_necessary = plan.order_by.is_some(); - // OPEN CURSORS ETC if let Some(ref mut order_by) = plan.order_by { init_order_by(&mut program, order_by, &mut metadata)?; @@ -1716,13 +205,23 @@ pub fn emit_program( &plan.referenced_tables, )?; + let mut order_by_necessary = plan.order_by.is_some(); + // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION if let Some(ref mut group_by) = plan.group_by { - sort_group_by(&mut program, group_by, &mut metadata)?; - finalize_group_by(&mut program, group_by, &mut metadata)?; + group_by_emit( + &mut program, + &plan.result_columns, + group_by, + plan.order_by.as_ref(), + &plan.aggregates.as_ref().unwrap(), + plan.limit.clone(), + &plan.referenced_tables, + &mut metadata, + )?; } else if let Some(ref mut aggregates) = plan.aggregates { // Example: SELECT sum(x), count(*) FROM t; - finalize_agg_without_group_by(&mut program, aggregates, &mut metadata)?; + agg_without_group_by_emit(&mut program, aggregates, &mut metadata)?; // If we have an aggregate without a group by, we don't need an order by because currently // there can only be a single row result in those cases. order_by_necessary = false; @@ -1797,8 +296,6 @@ fn init_group_by( let group_exprs_comparison_register = program.alloc_registers(group_by.len()); let group_exprs_accumulator_register = program.alloc_registers(group_by.len()); let agg_exprs_start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_registers - .insert(GROUP_BY_ID, agg_exprs_start_reg); let sorter_key_register = program.alloc_register(); let subroutine_accumulator_clear_label = program.allocate_label(); @@ -1847,24 +344,23 @@ fn init_group_by( subroutine_accumulator_clear_label, ); - m.group_bys.insert( - GROUP_BY_ID, - GroupByMetadata { - sort_cursor, - subroutine_accumulator_clear_label, - subroutine_accumulator_clear_return_offset_register, - subroutine_accumulator_output_label, - subroutine_accumulator_output_return_offset_register: program.alloc_register(), - accumulator_indicator_set_true_label: program.allocate_label(), - sorter_data_label, - grouping_done_label, - abort_flag_register, - data_in_accumulator_indicator_register, - group_exprs_accumulator_register, - group_exprs_comparison_register, - sorter_key_register, - }, - ); + m.aggregation_start_register = Some(agg_exprs_start_reg); + + m.group_by_metadata = Some(GroupByMetadata { + sort_cursor, + subroutine_accumulator_clear_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_output_return_offset_register: program.alloc_register(), + accumulator_indicator_set_true_label: program.allocate_label(), + sorter_data_label, + grouping_done_label, + abort_flag_register, + data_in_accumulator_indicator_register, + group_exprs_accumulator_register, + group_exprs_comparison_register, + sorter_key_register, + }); Ok(()) } @@ -2017,7 +513,7 @@ fn open_loop( predicate, None, condition_metadata, - m.result_set_register_start, + None, )?; } program.resolve_label(jump_target_when_true, program.offset()); @@ -2089,7 +585,7 @@ fn open_loop( expr, None, condition_metadata, - m.result_set_register_start, + None, )?; program.resolve_label(jump_target_when_true, program.offset()); } @@ -2135,7 +631,7 @@ fn open_loop( cmp_expr, cmp_reg, None, - m.result_set_register_start, + None, )?; } ast::Operator::Less | ast::Operator::LessEquals => { @@ -2175,7 +671,7 @@ fn open_loop( cmp_expr, cmp_reg, None, - m.result_set_register_start, + None, )?; } @@ -2273,7 +769,7 @@ fn open_loop( cmp_expr, src_reg, None, - m.result_set_register_start, + None, )?; program.emit_insn_with_label_dependency( Insn::SeekRowid { @@ -2298,7 +794,7 @@ fn open_loop( predicate, None, condition_metadata, - m.result_set_register_start, + None, )?; program.resolve_label(jump_target_when_true, program.offset()); } @@ -2395,46 +891,31 @@ fn inner_loop_source_emit( } => { // TODO: DOESNT WORK YET let sort_keys_count = group_by.len(); - let column_count = sort_keys_count + aggregates.len(); + let aggregate_arguments_count = + aggregates.iter().map(|agg| agg.args.len()).sum::(); + let column_count = sort_keys_count + aggregate_arguments_count; let start_reg = program.alloc_registers(column_count); - for (i, expr) in group_by.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - m.result_set_register_start, - )?; + let mut cur_reg = start_reg; + for expr in group_by.iter() { + let key_reg = cur_reg; + cur_reg += 1; + translate_expr(program, Some(referenced_tables), expr, key_reg, None, None)?; } - for (i, agg) in aggregates.iter().enumerate() { - // TODO it's a hack to assume aggregate functions have exactly one argument. - // Counterpoint e.g. GROUP_CONCAT(expr, separator). - // + for agg in aggregates.iter() { // Here we are collecting scalars for the group by sorter, which will include // both the group by expressions and the aggregate arguments. // e.g. in `select u.first_name, sum(u.age) from users group by u.first_name` // the sorter will have two scalars: u.first_name and u.age. // these are then sorted by u.first_name, and for each u.first_name, we sum the u.age. - // the actual aggregation is done later in GROUP_BY_SORT_AND_COMPARE below. - // - // This is why we take the first argument of each aggregate function currently. - // It's mostly an artifact of the current architecture being a bit poor; we should recognize - // which scalars are dependencies of aggregate functions and explicitly collect those. - let expr = &agg.args[0]; - let agg_reg = start_reg + sort_keys_count + i; - translate_expr( - program, - Some(referenced_tables), - expr, - agg_reg, - None, - m.result_set_register_start, - )?; + // the actual aggregation is done later. + for expr in agg.args.iter() { + let agg_reg = cur_reg; + cur_reg += 1; + translate_expr(program, Some(referenced_tables), expr, agg_reg, None, None)?; + } } - let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); + let group_by_metadata = m.group_by_metadata.as_ref().unwrap(); program.emit_insn(Insn::MakeRecord { start_reg, @@ -2442,7 +923,6 @@ fn inner_loop_source_emit( dest_reg: group_by_metadata.sorter_key_register, }); - let group_by_metadata = m.group_bys.get(&GROUP_BY_ID).unwrap(); program.emit_insn(Insn::SorterInsert { cursor_id: group_by_metadata.sort_cursor, record_reg: group_by_metadata.sorter_key_register, @@ -2451,42 +931,90 @@ fn inner_loop_source_emit( Ok(()) } InnerLoopEmitTarget::OrderBySorter { order_by } => { - // TODO: DOESNT WORK YET - let sort_keys_count = order_by.len(); - let source_cols_count = result_columns.len(); - let start_reg = program.alloc_registers(sort_keys_count + source_cols_count); + // We need to handle the case where we are emitting to sorter. + // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. + // In case any of the sort keys are exactly equal to a result column, we need to skip emitting that result column. + // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. + // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns + // should be emitted in the SELECT clause order, not the ORDER BY clause order. + let mut result_columns_to_skip: Option> = None; + for (i, rc) in result_columns.iter().enumerate() { + match rc { + ResultSetColumn::Scalar(expr) => { + let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); + } + m.result_column_indexes_in_orderby_sorter.insert(i, j); + } + } + ResultSetColumn::Agg(agg) => { + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &agg.original_expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); + } + m.result_column_indexes_in_orderby_sorter.insert(i, j); + } + } + ResultSetColumn::ComputedAgg(_) => { + unreachable!( + "ComputedAgg should have been rewritten to a normal agg before emit" + ); + } + } + } + let order_by_len = order_by.len(); + let result_columns_to_skip_len = result_columns_to_skip + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); + let orderby_sorter_column_count = + order_by_len + result_columns.len() - result_columns_to_skip_len; + let start_reg = program.alloc_registers(orderby_sorter_column_count); for (i, (expr, _)) in order_by.iter().enumerate() { let key_reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - None, - m.result_set_register_start, - )?; + translate_expr(program, Some(referenced_tables), expr, key_reg, None, None)?; } - for (i, expr) in result_columns.iter().enumerate() { - match expr { + let mut cur_reg = start_reg + order_by_len; + let mut cur_idx_in_orderby_sorter = order_by_len; + for (i, rc) in result_columns.iter().enumerate() { + if let Some(ref v) = result_columns_to_skip { + if v.contains(&i) { + continue; + } + } + match rc { ResultSetColumn::Scalar(expr) => { - let reg = start_reg + sort_keys_count + i; translate_expr( program, Some(referenced_tables), expr, - reg, + cur_reg, + None, None, - m.result_set_register_start, )?; } - other => todo!("{:?}", other), + other => unreachable!("{:?}", other), } + m.result_column_indexes_in_orderby_sorter + .insert(i, cur_idx_in_orderby_sorter); + cur_idx_in_orderby_sorter += 1; + cur_reg += 1; } let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); program.emit_insn(Insn::MakeRecord { start_reg, - count: sort_keys_count + source_cols_count, + count: orderby_sorter_column_count, dest_reg: sort_metadata.sorter_data_register, }); @@ -2503,8 +1031,7 @@ fn inner_loop_source_emit( m.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(result_columns.len()); - m.aggregation_start_registers - .insert(AGG_WITHOUT_GROUP_BY_ID, start_reg); + m.aggregation_start_register = Some(start_reg); for (i, agg) in aggregates.iter().enumerate() { let reg = start_reg + i; translate_aggregation(program, referenced_tables, agg, reg, None)?; @@ -2513,14 +1040,7 @@ fn inner_loop_source_emit( match expr { ResultSetColumn::Scalar(expr) => { let reg = start_reg + num_aggs + i; - translate_expr( - program, - Some(referenced_tables), - expr, - reg, - None, - m.result_set_register_start, - )?; + translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; } ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } other => unreachable!("Unexpected non-scalar result column: {:?}", other), @@ -2535,16 +1055,12 @@ fn inner_loop_source_emit( match expr { ResultSetColumn::Scalar(expr) => { let reg = start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - reg, - None, - m.result_set_register_start, - )?; + translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; } - other => unreachable!("Unexpected non-scalar result column: {:?}", other), + other => unreachable!( + "Unexpected non-scalar result column in inner loop: {:?}", + other + ), } } program.emit_insn(Insn::ResultRow { @@ -2703,44 +1219,461 @@ fn close_loop( Ok(()) } - SourceOperator::Nothing => { - unreachable!() - } + SourceOperator::Nothing => Ok(()), } } -fn sort_group_by( +fn group_by_emit( program: &mut ProgramBuilder, + result_columns: &Vec, group_by: &Vec, + order_by: Option<&Vec<(ast::Expr, Direction)>>, + aggregates: &Vec, + limit: Option, + referenced_tables: &[BTreeTableReference], m: &mut Metadata, ) -> Result<()> { - todo!() + let group_by_metadata = m.group_by_metadata.as_mut().unwrap(); + + let GroupByMetadata { + group_exprs_comparison_register: comparison_register, + subroutine_accumulator_output_return_offset_register, + subroutine_accumulator_output_label, + subroutine_accumulator_clear_return_offset_register, + subroutine_accumulator_clear_label, + data_in_accumulator_indicator_register, + accumulator_indicator_set_true_label, + group_exprs_accumulator_register: group_exprs_start_register, + abort_flag_register, + sorter_key_register, + .. + } = *group_by_metadata; + let halt_label = *m.termination_label_stack.first().unwrap(); + + // all group by columns and all arguments of agg functions are in the sorter. + // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) + let sorter_column_count = + group_by.len() + aggregates.iter().map(|agg| agg.args.len()).sum::(); + // sorter column names do not matter + let pseudo_columns = (0..sorter_column_count) + .map(|i| Column { + name: i.to_string(), + primary_key: false, + ty: crate::schema::Type::Null, + }) + .collect::>(); + + // A pseudo table is a "fake" table to which we read one row at a time from the sorter + let pseudo_table = Rc::new(PseudoTable { + columns: pseudo_columns, + }); + + let pseudo_cursor = program.alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); + + program.emit_insn(Insn::OpenPseudo { + cursor_id: pseudo_cursor, + content_reg: sorter_key_register, + num_fields: sorter_column_count, + }); + + // Sort the sorter based on the group by columns + program.emit_insn_with_label_dependency( + Insn::SorterSort { + cursor_id: group_by_metadata.sort_cursor, + pc_if_empty: group_by_metadata.grouping_done_label, + }, + group_by_metadata.grouping_done_label, + ); + + program.defer_label_resolution( + group_by_metadata.sorter_data_label, + program.offset() as usize, + ); + // Read a row from the sorted data in the sorter into the pseudo cursor + program.emit_insn(Insn::SorterData { + cursor_id: group_by_metadata.sort_cursor, + dest_reg: group_by_metadata.sorter_key_register, + pseudo_cursor, + }); + + // Read the group by columns from the pseudo cursor + let groups_start_reg = program.alloc_registers(group_by.len()); + for (i, expr) in group_by.iter().enumerate() { + let sorter_column_index = i; + let group_reg = groups_start_reg + i; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: group_reg, + }); + } + + // Compare the group by columns to the previous group by columns to see if we are at a new group or not + program.emit_insn(Insn::Compare { + start_reg_a: comparison_register, + start_reg_b: groups_start_reg, + count: group_by.len(), + }); + + let agg_step_label = program.allocate_label(); + + program.add_comment( + program.offset(), + "start new group if comparison is not equal", + ); + // If we are at a new group, continue. If we are at the same group, jump to the aggregation step (i.e. accumulate more values into the aggregations) + program.emit_insn_with_label_dependency( + Insn::Jump { + target_pc_lt: program.offset() + 1, + target_pc_eq: agg_step_label, + target_pc_gt: program.offset() + 1, + }, + agg_step_label, + ); + + // New group, move current group by columns into the comparison register + program.emit_insn(Insn::Move { + source_reg: groups_start_reg, + dest_reg: comparison_register, + count: group_by.len(), + }); + + program.add_comment( + program.offset(), + "check if ended group had data, and output if so", + ); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_output_label, + return_reg: subroutine_accumulator_output_return_offset_register, + }, + subroutine_accumulator_output_label, + ); + + program.add_comment(program.offset(), "check abort flag"); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: abort_flag_register, + target_pc: halt_label, + decrement_by: 0, + }, + m.termination_label_stack[0], + ); + + program.add_comment(program.offset(), "goto clear accumulator subroutine"); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: subroutine_accumulator_clear_label, + return_reg: subroutine_accumulator_clear_return_offset_register, + }, + subroutine_accumulator_clear_label, + ); + + // Accumulate the values into the aggregations + program.resolve_label(agg_step_label, program.offset()); + let start_reg = m.aggregation_start_register.unwrap(); + let mut cursor_index = group_by.len(); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = start_reg + i; + translate_aggregation_groupby( + program, + referenced_tables, + pseudo_cursor, + cursor_index, + agg, + agg_result_reg, + )?; + cursor_index += agg.args.len(); + } + + // We only emit the group by columns if we are going to start a new group (i.e. the prev group will not accumulate any more values into the aggregations) + program.add_comment( + program.offset(), + "don't emit group columns if continuing existing group", + ); + program.emit_insn_with_label_dependency( + Insn::If { + target_pc: accumulator_indicator_set_true_label, + reg: data_in_accumulator_indicator_register, + null_reg: 0, // unused in this case + }, + accumulator_indicator_set_true_label, + ); + + // Read the group by columns for a finished group + for (i, expr) in group_by.iter().enumerate() { + let key_reg = group_exprs_start_register + i; + let sorter_column_index = i; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: key_reg, + }); + } + + program.resolve_label(accumulator_indicator_set_true_label, program.offset()); + program.add_comment(program.offset(), "indicate data in accumulator"); + program.emit_insn(Insn::Integer { + value: 1, + dest: data_in_accumulator_indicator_register, + }); + + program.emit_insn_with_label_dependency( + Insn::SorterNext { + cursor_id: group_by_metadata.sort_cursor, + pc_if_next: group_by_metadata.sorter_data_label, + }, + group_by_metadata.sorter_data_label, + ); + + program.resolve_label(group_by_metadata.grouping_done_label, program.offset()); + + program.add_comment(program.offset(), "emit row for final group"); + program.emit_insn_with_label_dependency( + Insn::Gosub { + target_pc: group_by_metadata.subroutine_accumulator_output_label, + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }, + group_by_metadata.subroutine_accumulator_output_label, + ); + + program.add_comment(program.offset(), "group by finished"); + let termination_label = m.termination_label_stack[m.termination_label_stack.len() - 2]; + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: termination_label, + }, + termination_label, + ); + program.emit_insn(Insn::Integer { + value: 1, + dest: group_by_metadata.abort_flag_register, + }); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + program.resolve_label( + group_by_metadata.subroutine_accumulator_output_label, + program.offset(), + ); + + program.add_comment(program.offset(), "output group by row subroutine start"); + let termination_label = *m.termination_label_stack.last().unwrap(); + program.emit_insn_with_label_dependency( + Insn::IfPos { + reg: group_by_metadata.data_in_accumulator_indicator_register, + target_pc: termination_label, + decrement_by: 0, + }, + termination_label, + ); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + let agg_start_reg = m.aggregation_start_register.unwrap(); + program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); + for (i, agg) in aggregates.iter().enumerate() { + let agg_result_reg = agg_start_reg + i; + program.emit_insn(Insn::AggFinal { + register: agg_result_reg, + func: agg.func.clone(), + }); + } + + // TODO handle result column expressions like LENGTH(SUM(x)) + // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) + // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) + // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the + // result column expression matches a) a group by column or b) an aggregation result. + let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len() + group_by.len()); + for (i, expr) in group_by.iter().enumerate() { + precomputed_exprs_to_register.push((expr, group_exprs_start_register + i)); + } + for (i, agg) in aggregates.iter().enumerate() { + precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); + } + + // We need to handle the case where we are emitting to sorter. + // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. + // In case any of the sort keys are exactly equal to a result column, we need to skip emitting that result column. + // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. + // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns + // should be emitted in the SELECT clause order, not the ORDER BY clause order. + let mut result_columns_to_skip: Option> = None; + if let Some(order_by) = order_by { + for (i, rc) in result_columns.iter().enumerate() { + match rc { + ResultSetColumn::Scalar(expr) => { + let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); + } + m.result_column_indexes_in_orderby_sorter.insert(i, j); + } + } + ResultSetColumn::Agg(agg) => { + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &agg.original_expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); + } + m.result_column_indexes_in_orderby_sorter.insert(i, j); + } + } + ResultSetColumn::ComputedAgg(_) => { + unreachable!( + "ComputedAgg should have been rewritten to a normal agg before emit" + ); + } + } + } + } + let order_by_len = order_by.as_ref().map(|v| v.len()).unwrap_or(0); + let result_columns_to_skip_len = result_columns_to_skip + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); + let output_row_start_reg = + program.alloc_registers(result_columns.len() + order_by_len - result_columns_to_skip_len); + let mut cur_reg = output_row_start_reg; + if let Some(order_by) = order_by { + for (expr, _) in order_by.iter() { + translate_expr( + program, + Some(referenced_tables), + expr, + cur_reg, + None, + Some(&precomputed_exprs_to_register), + )?; + cur_reg += 1; + } + } + let mut res_col_idx_in_orderby_sorter = order_by_len; + for (i, rc) in result_columns.iter().enumerate() { + if let Some(ref v) = result_columns_to_skip { + if v.contains(&i) { + continue; + } + } + match rc { + ResultSetColumn::Scalar(expr) => { + translate_expr( + program, + Some(referenced_tables), + expr, + cur_reg, + None, + Some(&precomputed_exprs_to_register), + )?; + } + ResultSetColumn::Agg(agg) => { + let found = aggregates.iter().enumerate().find(|(_, a)| **a == *agg); + if let Some((i, _)) = found { + program.emit_insn(Insn::Copy { + src_reg: agg_start_reg + i, + dst_reg: cur_reg, + amount: 0, + }); + } else { + unreachable!("agg {:?} not found", agg); + } + } + ResultSetColumn::ComputedAgg(agg) => { + unreachable!( + "ComputedAgg should have been rewritten to a normal agg before emit: {:?}", + agg + ); + } + } + m.result_column_indexes_in_orderby_sorter + .insert(i, res_col_idx_in_orderby_sorter); + res_col_idx_in_orderby_sorter += 1; + cur_reg += 1; + } + + match order_by { + None => { + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn(Insn::ResultRow { + start_reg: output_row_start_reg, + count: aggregates.len() + group_by.len(), + }); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *m.termination_label_stack.last().unwrap(), + }, + *m.termination_label_stack.last().unwrap(), + ); + } + } + Some(_) => { + program.emit_insn(Insn::MakeRecord { + start_reg: output_row_start_reg, + count: aggregates.len() + group_by.len(), + dest_reg: group_by_metadata.sorter_key_register, + }); + + program.emit_insn(Insn::SorterInsert { + cursor_id: m.sorts.get(&ORDER_BY_ID).unwrap().sort_cursor, + record_reg: group_by_metadata.sorter_key_register, + }); + } + } + + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, + }); + + program.add_comment(program.offset(), "clear accumulator subroutine start"); + program.resolve_label( + group_by_metadata.subroutine_accumulator_clear_label, + program.offset(), + ); + let start_reg = group_by_metadata.group_exprs_accumulator_register; + program.emit_insn(Insn::Null { + dest: start_reg, + dest_end: Some(start_reg + group_by.len() + aggregates.len() - 1), + }); + + program.emit_insn(Insn::Integer { + value: 0, + dest: group_by_metadata.data_in_accumulator_indicator_register, + }); + program.emit_insn(Insn::Return { + return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, + }); + + m.result_columns_to_skip_in_orderby_sorter = result_columns_to_skip; + + Ok(()) } -fn finalize_group_by( - program: &mut ProgramBuilder, - group_by: &Vec, - m: &mut Metadata, -) -> Result<()> { - todo!() -} - -enum FinalizeGroupByEmitTarget { - OrderBySorter(usize), - ResultRow, -} - -fn finalize_agg_without_group_by( +fn agg_without_group_by_emit( program: &mut ProgramBuilder, aggregates: &Vec, m: &mut Metadata, ) -> Result<()> { - let agg_start_reg = m - .aggregation_start_registers - .get(&AGG_WITHOUT_GROUP_BY_ID) - .unwrap(); + let agg_start_reg = m.aggregation_start_register.unwrap(); for (i, agg) in aggregates.iter().enumerate() { - let agg_result_reg = *agg_start_reg + i; + let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { register: agg_result_reg, func: agg.func.clone(), @@ -2748,7 +1681,7 @@ fn finalize_agg_without_group_by( } let output_reg = program.alloc_registers(aggregates.len()); program.emit_insn(Insn::Copy { - src_reg: *agg_start_reg, + src_reg: agg_start_reg, dst_reg: output_reg, amount: aggregates.len() - 1, }); @@ -2778,7 +1711,12 @@ fn sort_order_by( ty: crate::schema::Type::Null, }); } - for expr in result_columns.iter() { + for (i, expr) in result_columns.iter().enumerate() { + if let Some(ref v) = m.result_columns_to_skip_in_orderby_sorter { + if v.contains(&i) { + continue; + } + } pseudo_columns.push(Column { name: match expr { ResultSetColumn::Scalar(expr) => expr.to_string(), @@ -2790,7 +1728,11 @@ fn sort_order_by( }); } - let num_fields = pseudo_columns.len(); + let num_columns_in_sorter = order_by.len() + result_columns.len() + - m.result_columns_to_skip_in_orderby_sorter + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); let pseudo_cursor = program.alloc_cursor_id( None, @@ -2803,7 +1745,7 @@ fn sort_order_by( program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, content_reg: sort_metadata.sorter_data_register, - num_fields, + num_fields: num_columns_in_sorter, }); program.emit_insn_with_label_dependency( @@ -2823,25 +1765,20 @@ fn sort_order_by( let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); - sort_metadata.pseudo_table_cursor = pseudo_cursor; - // EMIT COLUMNS FROM SORTER AND EMIT ROW let cursor_id = pseudo_cursor; - let pseudo_table = program.resolve_cursor_to_table(cursor_id).unwrap(); - let start_column_offset = order_by.len(); - let column_count = pseudo_table.columns().len() - start_column_offset; - let start_reg = program.alloc_registers(column_count); - for i in 0..column_count { + let start_reg = program.alloc_registers(result_columns.len()); + for i in 0..result_columns.len() { let reg = start_reg + i; program.emit_insn(Insn::Column { cursor_id, - column: start_column_offset + i, + column: m.result_column_indexes_in_orderby_sorter[&i], dest: reg, }); } program.emit_insn(Insn::ResultRow { start_reg, - count: column_count, + count: result_columns.len(), }); if let Some(limit) = limit { diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 01dcae16c..7f74ef509 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -23,7 +23,7 @@ pub fn translate_condition_expr( expr: &ast::Expr, cursor_hint: Option, condition_metadata: ConditionMetadata, - result_set_register_start: usize, + precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result<()> { match expr { ast::Expr::Between { .. } => todo!(), @@ -39,7 +39,7 @@ pub fn translate_condition_expr( jump_if_condition_is_true: false, ..condition_metadata }, - result_set_register_start, + precomputed_exprs_to_registers, ); let _ = translate_condition_expr( program, @@ -47,7 +47,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, - result_set_register_start, + precomputed_exprs_to_registers, ); } ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { @@ -63,7 +63,7 @@ pub fn translate_condition_expr( jump_target_when_false, ..condition_metadata }, - result_set_register_start, + precomputed_exprs_to_registers, ); program.resolve_label(jump_target_when_false, program.offset()); let _ = translate_condition_expr( @@ -72,7 +72,7 @@ pub fn translate_condition_expr( rhs, cursor_hint, condition_metadata, - result_set_register_start, + precomputed_exprs_to_registers, ); } ast::Expr::Binary(lhs, op, rhs) => { @@ -83,7 +83,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant() @@ -95,7 +95,7 @@ pub fn translate_condition_expr( rhs, rhs_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant() @@ -344,7 +344,7 @@ pub fn translate_condition_expr( lhs, lhs_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; let rhs = rhs.as_ref().unwrap(); @@ -374,7 +374,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. if !last_condition { @@ -418,7 +418,7 @@ pub fn translate_condition_expr( expr, rhs_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn_with_label_dependency( Insn::Eq { @@ -464,7 +464,7 @@ pub fn translate_condition_expr( lhs, column_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = lhs.as_ref() { program.mark_last_insn_constant(); @@ -475,7 +475,7 @@ pub fn translate_condition_expr( rhs, pattern_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = rhs.as_ref() { program.mark_last_insn_constant(); @@ -549,7 +549,7 @@ pub fn translate_condition_expr( expr, cursor_hint, condition_metadata, - result_set_register_start, + precomputed_exprs_to_registers, ); } } @@ -564,27 +564,40 @@ pub fn translate_expr( expr: &ast::Expr, target_register: usize, cursor_hint: Option, - result_set_register_start: usize, + precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result { + if let Some(precomputed_exprs_to_registers) = precomputed_exprs_to_registers { + for (precomputed_expr, reg) in precomputed_exprs_to_registers.iter() { + if expr == *precomputed_expr { + program.emit_insn(Insn::Copy { + src_reg: *reg, + dst_reg: target_register, + amount: 0, + }); + return Ok(target_register); + } + } + } match expr { - ast::Expr::AggRef { index } => todo!(), ast::Expr::Between { .. } => todo!(), ast::Expr::Binary(e1, op, e2) => { - let e1_reg = translate_expr( + let e1_reg = program.alloc_register(); + translate_expr( program, referenced_tables, e1, - target_register, + e1_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; - let e2_reg = translate_expr( + let e2_reg = program.alloc_register(); + translate_expr( program, referenced_tables, e2, - target_register, + e2_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; match op { @@ -708,7 +721,7 @@ pub fn translate_expr( expr, reg_expr, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; let reg_type = program.alloc_register(); program.emit_insn(Insn::String8 { @@ -781,7 +794,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -808,7 +821,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } @@ -846,7 +859,7 @@ pub fn translate_expr( arg, target_register, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if index < args.len() - 1 { program.emit_insn_with_label_dependency( @@ -882,7 +895,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } program.emit_insn(Insn::Function { @@ -915,7 +928,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } program.emit_insn(Insn::Function { @@ -952,7 +965,7 @@ pub fn translate_expr( &args[0], temp_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::NotNull { reg: temp_reg, @@ -965,7 +978,7 @@ pub fn translate_expr( &args[1], temp_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Copy { src_reg: temp_reg, @@ -998,7 +1011,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1046,7 +1059,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1083,7 +1096,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } } @@ -1121,7 +1134,7 @@ pub fn translate_expr( &args[0], str_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; translate_expr( program, @@ -1129,7 +1142,7 @@ pub fn translate_expr( &args[1], start_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if args.len() == 3 { translate_expr( @@ -1138,7 +1151,7 @@ pub fn translate_expr( &args[2], length_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } @@ -1168,7 +1181,7 @@ pub fn translate_expr( &args[0], regs, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1192,7 +1205,7 @@ pub fn translate_expr( &args[0], arg_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; start_reg = arg_reg; } @@ -1217,7 +1230,7 @@ pub fn translate_expr( arg, target_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } } @@ -1257,7 +1270,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant(); @@ -1290,7 +1303,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1324,7 +1337,7 @@ pub fn translate_expr( arg, reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { program.mark_last_insn_constant() @@ -1362,7 +1375,7 @@ pub fn translate_expr( &args[0], first_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; let second_reg = program.alloc_register(); translate_expr( @@ -1371,7 +1384,7 @@ pub fn translate_expr( &args[1], second_reg, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { constant_mask: 0, @@ -1412,7 +1425,7 @@ pub fn translate_expr( database: _, table, column, - is_primary_key, + is_rowid_alias: is_primary_key, } => { let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier, cursor_hint); @@ -1503,7 +1516,7 @@ pub fn translate_expr( &exprs[0], target_register, cursor_hint, - result_set_register_start, + precomputed_exprs_to_registers, )?; } else { // Parenthesized expressions with multiple arguments are reserved for special cases @@ -1557,19 +1570,6 @@ fn wrap_eval_jump_expr( program.preassign_label_to_next_insn(if_true_label); } -pub fn resolve_ident_pseudo_table(ident: &String, pseudo_table: &PseudoTable) -> Result { - let res = pseudo_table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident); - if res.is_some() { - let (idx, _) = res.unwrap(); - return Ok(idx); - } - crate::bail_parse_error!("column with name {} not found", ident.as_str()); -} - pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mut ProgramBuilder) { if col_type == crate::schema::Type::Real { program.emit_insn(Insn::RealAffinity { @@ -1578,35 +1578,6 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu } } -pub fn translate_table_columns( - program: &mut ProgramBuilder, - cursor_id: usize, - table: &Table, - start_column_offset: usize, - start_reg: usize, -) -> usize { - let mut cur_reg = start_reg; - for i in start_column_offset..table.columns().len() { - let is_rowid = table.column_is_rowid_alias(table.get_column_at(i)); - let col_type = &table.get_column_at(i).ty; - if is_rowid { - program.emit_insn(Insn::RowId { - cursor_id, - dest: cur_reg, - }); - } else { - program.emit_insn(Insn::Column { - cursor_id, - column: i, - dest: cur_reg, - }); - } - maybe_apply_affinity(*col_type, cur_reg, program); - cur_reg += 1; - } - cur_reg -} - pub fn translate_aggregation( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], @@ -1627,7 +1598,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1649,7 +1620,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, ); expr_reg }; @@ -1692,7 +1663,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; translate_expr( program, @@ -1700,7 +1671,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { @@ -1724,7 +1695,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1746,7 +1717,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1783,7 +1754,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; translate_expr( program, @@ -1791,7 +1762,7 @@ pub fn translate_aggregation( &delimiter_expr, delimiter_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { @@ -1815,7 +1786,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1837,7 +1808,7 @@ pub fn translate_aggregation( expr, expr_reg, cursor_hint, - 0, + None, )?; program.emit_insn(Insn::AggStep { acc_reg: target_register, @@ -1850,3 +1821,186 @@ pub fn translate_aggregation( }; Ok(dest) } + +pub fn translate_aggregation_groupby( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + group_by_sorter_cursor_id: usize, + cursor_index: usize, + agg: &Aggregate, + target_register: usize, +) -> Result { + let emit_column = |program: &mut ProgramBuilder, expr_reg: usize| { + program.emit_insn(Insn::Column { + cursor_id: group_by_sorter_cursor_id, + column: cursor_index, + dest: expr_reg, + }); + }; + let dest = match agg.func { + AggFunc::Avg => { + if agg.args.len() != 1 { + crate::bail_parse_error!("avg bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Avg, + }); + target_register + } + AggFunc::Count => { + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Count, + }); + target_register + } + AggFunc::GroupConcat => { + if agg.args.len() != 1 && agg.args.len() != 2 { + crate::bail_parse_error!("group_concat bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let delimiter_expr: ast::Expr; + + if agg.args.len() == 2 { + match &agg.args[1] { + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + } else { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); + } + + emit_column(program, expr_reg); + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + None, + None, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::GroupConcat, + }); + + target_register + } + AggFunc::Max => { + if agg.args.len() != 1 { + crate::bail_parse_error!("max bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Max, + }); + target_register + } + AggFunc::Min => { + if agg.args.len() != 1 { + crate::bail_parse_error!("min bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Min, + }); + target_register + } + AggFunc::StringAgg => { + if agg.args.len() != 2 { + crate::bail_parse_error!("string_agg bad number of arguments"); + } + + let expr_reg = program.alloc_register(); + let delimiter_reg = program.alloc_register(); + + let delimiter_expr: ast::Expr; + + match &agg.args[1] { + ast::Expr::Column { .. } => { + delimiter_expr = agg.args[1].clone(); + } + ast::Expr::Literal(ast::Literal::String(s)) => { + delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + } + _ => crate::bail_parse_error!("Incorrect delimiter parameter"), + }; + + emit_column(program, expr_reg); + translate_expr( + program, + Some(referenced_tables), + &delimiter_expr, + delimiter_reg, + None, + None, + )?; + + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: delimiter_reg, + func: AggFunc::StringAgg, + }); + + target_register + } + AggFunc::Sum => { + if agg.args.len() != 1 { + crate::bail_parse_error!("sum bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Sum, + }); + target_register + } + AggFunc::Total => { + if agg.args.len() != 1 { + crate::bail_parse_error!("total bad number of arguments"); + } + let expr_reg = program.alloc_register(); + emit_column(program, expr_reg); + program.emit_insn(Insn::AggStep { + acc_reg: target_register, + col: expr_reg, + delimiter: 0, + func: AggFunc::Total, + }); + target_register + } + }; + Ok(dest) +} diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 8a5760516..ea890e994 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -98,7 +98,7 @@ pub fn translate_insert( expr, column_registers_start + col, None, - 0, + None, )?; } program.emit_insn(Insn::Yield { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index cd08f619f..4635a933a 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -25,7 +25,14 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { { return Ok(Plan { source: SourceOperator::Nothing, - ..select_plan + aggregates: None, + result_columns: vec![], + where_clause: None, + group_by: None, + order_by: None, + limit: None, + referenced_tables: select_plan.referenced_tables, + available_indexes: select_plan.available_indexes, }); } use_indexes( @@ -478,7 +485,7 @@ impl Optimizable for ast::Expr { ast::Expr::Column { table, column, - is_primary_key, + is_rowid_alias: is_primary_key, .. } => *is_primary_key && *table == table_index, _ => false, diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 3e6b73630..8641d3198 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -68,7 +68,6 @@ fn bind_column_references( referenced_tables: &[BTreeTableReference], ) -> Result<()> { match expr { - ast::Expr::AggRef { .. } => unreachable!(), ast::Expr::Id(id) => { let mut match_result = None; for (tbl_idx, table) in referenced_tables.iter().enumerate() { @@ -93,7 +92,7 @@ fn bind_column_references( database: None, // TODO: support different databases table: tbl_idx, column: col_idx, - is_primary_key, + is_rowid_alias: is_primary_key, }; Ok(()) } @@ -122,7 +121,7 @@ fn bind_column_references( database: None, // TODO: support different databases table: tbl_idx, column: col_idx.unwrap(), - is_primary_key: col.primary_key, + is_rowid_alias: col.primary_key, }; Ok(()) } @@ -276,7 +275,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ resolve_aggregates(&expr, &mut aggregate_expressions); + // TODO: can be compound aggregate + plan.result_columns + .push(ResultSetColumn::Scalar(expr.clone())); } _ => {} } diff --git a/vendored/sqlite3-parser/src/parser/ast/fmt.rs b/vendored/sqlite3-parser/src/parser/ast/fmt.rs index 6b0271919..80f87eefb 100644 --- a/vendored/sqlite3-parser/src/parser/ast/fmt.rs +++ b/vendored/sqlite3-parser/src/parser/ast/fmt.rs @@ -638,7 +638,6 @@ impl ToTokens for Expr { } Self::Id(id) => id.to_tokens(s), Self::Column { .. } => Ok(()), - Self::AggRef { .. } => Ok(()), Self::InList { lhs, not, rhs } => { lhs.to_tokens(s)?; if *not { diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index ac45b5170..690f5e71c 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -335,13 +335,8 @@ pub enum Expr { table: usize, /// the z in `x.y.z`. index of the column in the table. column: usize, - /// is the column a primary key - is_primary_key: bool, - }, - /// AggRef is a reference to a computed aggregate - AggRef { - /// index of the aggregate in the aggregates vector parsed from the query - index: usize, + /// is the column a rowid alias + is_rowid_alias: bool, }, /// `IN` InList { From 9a557516b849c71d14923d5656668fe380a9292a Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 16:26:41 +0200 Subject: [PATCH 04/32] Fixes for expressions with aggregate arguments + limit 0 --- core/translate/emitter.rs | 144 +++++++++++++++++++++++--------------- core/translate/expr.rs | 2 +- core/translate/plan.rs | 6 +- core/translate/planner.rs | 42 +++++++---- 4 files changed, 120 insertions(+), 74 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index b9bf5767a..a2eb575a0 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -35,8 +35,6 @@ pub struct LeftJoinMetadata { pub struct SortMetadata { // cursor id for the Sorter table where the sorted rows are stored pub sort_cursor: usize, - // cursor id for the Pseudo table where rows are temporarily inserted from the Sorter table - pub pseudo_table_cursor: usize, // label where the SorterData instruction is emitted; SorterNext will jump here if there is more data to read pub sorter_data_label: BranchOffset, // label for the instruction immediately following SorterNext; SorterSort will jump here in case there is no data @@ -77,13 +75,6 @@ pub struct GroupByMetadata { pub group_exprs_comparison_register: usize, } -#[derive(Debug)] -pub struct SortCursorOverride { - pub cursor_id: usize, - pub pseudo_table: Table, - pub sort_key_len: usize, -} - /// The Metadata struct holds various information and labels used during bytecode generation. /// It is used for maintaining state and control flow during the bytecode /// generation process. @@ -175,6 +166,14 @@ pub fn emit_program( ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; + // Trivial exit on LIMIT 0 + if let Some(limit) = plan.limit { + if limit == 0 { + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + return Ok(program.build(database_header, connection)); + } + } + // OPEN CURSORS ETC if let Some(ref mut order_by) = plan.order_by { init_order_by(&mut program, order_by, &mut metadata)?; @@ -221,7 +220,13 @@ pub fn emit_program( )?; } else if let Some(ref mut aggregates) = plan.aggregates { // Example: SELECT sum(x), count(*) FROM t; - agg_without_group_by_emit(&mut program, aggregates, &mut metadata)?; + agg_without_group_by_emit( + &mut program, + &plan.referenced_tables, + &plan.result_columns, + aggregates, + &mut metadata, + )?; // If we have an aggregate without a group by, we don't need an order by because currently // there can only be a single row result in those cases. order_by_necessary = false; @@ -247,8 +252,6 @@ pub fn emit_program( } const ORDER_BY_ID: usize = 0; -const GROUP_BY_ID: usize = 1; -const AGG_WITHOUT_GROUP_BY_ID: usize = 2; fn init_order_by( program: &mut ProgramBuilder, @@ -261,7 +264,6 @@ fn init_order_by( ORDER_BY_ID, SortMetadata { sort_cursor, - pseudo_table_cursor: usize::MAX, // will be set later sorter_data_register: program.alloc_register(), sorter_data_label: program.allocate_label(), done_label: program.allocate_label(), @@ -827,7 +829,6 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada if let Some(group_by) = &plan.group_by { return inner_loop_source_emit( program, - &plan.source, &plan.result_columns, &plan.aggregates, m, @@ -843,7 +844,6 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada if plan.aggregates.is_some() { return inner_loop_source_emit( program, - &plan.source, &plan.result_columns, &plan.aggregates, m, @@ -855,7 +855,6 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada if let Some(order_by) = &plan.order_by { return inner_loop_source_emit( program, - &plan.source, &plan.result_columns, &plan.aggregates, m, @@ -866,7 +865,6 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. return inner_loop_source_emit( program, - &plan.source, &plan.result_columns, &plan.aggregates, m, @@ -877,7 +875,6 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada fn inner_loop_source_emit( program: &mut ProgramBuilder, - source: &SourceOperator, result_columns: &Vec, aggregates: &Option>, m: &mut Metadata, @@ -940,7 +937,11 @@ fn inner_loop_source_emit( let mut result_columns_to_skip: Option> = None; for (i, rc) in result_columns.iter().enumerate() { match rc { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { + expr, + contains_aggregates, + } => { + assert!(!*contains_aggregates); let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); if let Some((j, _)) = found { if let Some(ref mut v) = result_columns_to_skip { @@ -965,11 +966,6 @@ fn inner_loop_source_emit( m.result_column_indexes_in_orderby_sorter.insert(i, j); } } - ResultSetColumn::ComputedAgg(_) => { - unreachable!( - "ComputedAgg should have been rewritten to a normal agg before emit" - ); - } } } let order_by_len = order_by.len(); @@ -993,7 +989,11 @@ fn inner_loop_source_emit( } } match rc { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { + expr, + contains_aggregates, + } => { + assert!(!*contains_aggregates); translate_expr( program, Some(referenced_tables), @@ -1038,12 +1038,19 @@ fn inner_loop_source_emit( } for (i, expr) in result_columns.iter().enumerate() { match expr { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { + expr, + contains_aggregates, + } => { + if *contains_aggregates { + // Do nothing, aggregates will be computed above and this full result expression will be + // computed later + continue; + } let reg = start_reg + num_aggs + i; translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; } ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } - other => unreachable!("Unexpected non-scalar result column: {:?}", other), } } Ok(()) @@ -1053,7 +1060,11 @@ fn inner_loop_source_emit( let start_reg = program.alloc_registers(result_columns.len()); for (i, expr) in result_columns.iter().enumerate() { match expr { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { + expr, + contains_aggregates, + } => { + assert!(!*contains_aggregates); let reg = start_reg + i; translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; } @@ -1298,7 +1309,7 @@ fn group_by_emit( // Read the group by columns from the pseudo cursor let groups_start_reg = program.alloc_registers(group_by.len()); - for (i, expr) in group_by.iter().enumerate() { + for i in 0..group_by.len() { let sorter_column_index = i; let group_reg = groups_start_reg + i; program.emit_insn(Insn::Column { @@ -1401,7 +1412,7 @@ fn group_by_emit( ); // Read the group by columns for a finished group - for (i, expr) in group_by.iter().enumerate() { + for i in 0..group_by.len() { let key_reg = group_exprs_start_register + i; let sorter_column_index = i; program.emit_insn(Insn::Column { @@ -1482,7 +1493,6 @@ fn group_by_emit( }); } - // TODO handle result column expressions like LENGTH(SUM(x)) // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the @@ -1505,7 +1515,7 @@ fn group_by_emit( if let Some(order_by) = order_by { for (i, rc) in result_columns.iter().enumerate() { match rc { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { expr, .. } => { let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); if let Some((j, _)) = found { if let Some(ref mut v) = result_columns_to_skip { @@ -1530,11 +1540,6 @@ fn group_by_emit( m.result_column_indexes_in_orderby_sorter.insert(i, j); } } - ResultSetColumn::ComputedAgg(_) => { - unreachable!( - "ComputedAgg should have been rewritten to a normal agg before emit" - ); - } } } } @@ -1543,8 +1548,8 @@ fn group_by_emit( .as_ref() .map(|v| v.len()) .unwrap_or(0); - let output_row_start_reg = - program.alloc_registers(result_columns.len() + order_by_len - result_columns_to_skip_len); + let output_column_count = result_columns.len() + order_by_len - result_columns_to_skip_len; + let output_row_start_reg = program.alloc_registers(output_column_count); let mut cur_reg = output_row_start_reg; if let Some(order_by) = order_by { for (expr, _) in order_by.iter() { @@ -1567,7 +1572,7 @@ fn group_by_emit( } } match rc { - ResultSetColumn::Scalar(expr) => { + ResultSetColumn::Expr { expr, .. } => { translate_expr( program, Some(referenced_tables), @@ -1589,12 +1594,6 @@ fn group_by_emit( unreachable!("agg {:?} not found", agg); } } - ResultSetColumn::ComputedAgg(agg) => { - unreachable!( - "ComputedAgg should have been rewritten to a normal agg before emit: {:?}", - agg - ); - } } m.result_column_indexes_in_orderby_sorter .insert(i, res_col_idx_in_orderby_sorter); @@ -1613,7 +1612,7 @@ fn group_by_emit( program.mark_last_insn_constant(); program.emit_insn(Insn::ResultRow { start_reg: output_row_start_reg, - count: aggregates.len() + group_by.len(), + count: output_column_count, }); program.emit_insn_with_label_dependency( Insn::DecrJumpZero { @@ -1627,7 +1626,7 @@ fn group_by_emit( Some(_) => { program.emit_insn(Insn::MakeRecord { start_reg: output_row_start_reg, - count: aggregates.len() + group_by.len(), + count: output_column_count, dest_reg: group_by_metadata.sorter_key_register, }); @@ -1668,6 +1667,8 @@ fn group_by_emit( fn agg_without_group_by_emit( program: &mut ProgramBuilder, + referenced_tables: &Vec, + result_columns: &Vec, aggregates: &Vec, m: &mut Metadata, ) -> Result<()> { @@ -1679,16 +1680,46 @@ fn agg_without_group_by_emit( func: agg.func.clone(), }); } - let output_reg = program.alloc_registers(aggregates.len()); - program.emit_insn(Insn::Copy { - src_reg: agg_start_reg, - dst_reg: output_reg, - amount: aggregates.len() - 1, - }); + // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) + // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) + // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the + // result column expression matches a) a group by column or b) an aggregation result. + let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len()); + for (i, agg) in aggregates.iter().enumerate() { + precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); + } + + let output_reg = program.alloc_registers(result_columns.len()); + for (i, rc) in result_columns.iter().enumerate() { + match rc { + ResultSetColumn::Expr { expr, .. } => { + translate_expr( + program, + Some(referenced_tables), + expr, + output_reg + i, + None, + Some(&precomputed_exprs_to_register), + )?; + } + ResultSetColumn::Agg(agg) => { + let found = aggregates.iter().enumerate().find(|(_, a)| **a == *agg); + if let Some((i, _)) = found { + program.emit_insn(Insn::Copy { + src_reg: agg_start_reg + i, + dst_reg: output_reg + i, + amount: 0, + }); + } else { + unreachable!("agg {:?} not found", agg); + } + } + } + } // This always emits a ResultRow because currently it can only be used for a single row result program.emit_insn(Insn::ResultRow { start_reg: output_reg, - count: aggregates.len(), + count: result_columns.len(), }); Ok(()) @@ -1719,9 +1750,8 @@ fn sort_order_by( } pseudo_columns.push(Column { name: match expr { - ResultSetColumn::Scalar(expr) => expr.to_string(), + ResultSetColumn::Expr { expr, .. } => expr.to_string(), ResultSetColumn::Agg(agg) => agg.to_string(), - _ => unreachable!(), }, primary_key: false, ty: crate::schema::Type::Null, diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 7f74ef509..a2f373e82 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -3,7 +3,7 @@ use sqlite3_parser::ast::{self, UnaryOperator}; #[cfg(feature = "json")] use crate::function::JsonFunc; use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; -use crate::schema::{PseudoTable, Table, Type}; +use crate::schema::Type; use crate::util::normalize_ident; use crate::vdbe::{builder::ProgramBuilder, BranchOffset, Insn}; use crate::Result; diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 43fcb4e96..ffcff1832 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -14,9 +14,11 @@ use crate::{ #[derive(Debug)] pub enum ResultSetColumn { - Scalar(ast::Expr), + Expr { + expr: ast::Expr, + contains_aggregates: bool, + }, Agg(Aggregate), - ComputedAgg(ast::Expr), } #[derive(Debug)] diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 8641d3198..02bf9cdcd 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -239,8 +239,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ for table_reference in plan.referenced_tables.iter() { for (idx, col) in table_reference.table.columns.iter().enumerate() { - plan.result_columns.push(ResultSetColumn::Scalar( - ast::Expr::Column { + plan.result_columns.push(ResultSetColumn::Expr { + expr: ast::Expr::Column { database: None, // TODO: support different databases table: table_reference.table_index, column: idx, is_rowid_alias: col.primary_key, }, - )); + contains_aggregates: false, + }); } } } @@ -293,13 +293,15 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ @@ -331,10 +333,14 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ + let cur_agg_count = aggregate_expressions.len(); resolve_aggregates(&expr, &mut aggregate_expressions); - // TODO: can be compound aggregate - plan.result_columns - .push(ResultSetColumn::Scalar(expr.clone())); + let contains_aggregates = + cur_agg_count != aggregate_expressions.len(); + plan.result_columns.push(ResultSetColumn::Expr { + expr: expr.clone(), + contains_aggregates, + }); } _ => {} } @@ -364,13 +370,21 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ + let cur_agg_count = aggregate_expressions.len(); resolve_aggregates(&lhs, &mut aggregate_expressions); resolve_aggregates(&rhs, &mut aggregate_expressions); - plan.result_columns - .push(ResultSetColumn::Scalar(expr.clone())); + let contains_aggregates = + cur_agg_count != aggregate_expressions.len(); + plan.result_columns.push(ResultSetColumn::Expr { + expr: expr.clone(), + contains_aggregates, + }); } e => { - plan.result_columns.push(ResultSetColumn::Scalar(e.clone())); + plan.result_columns.push(ResultSetColumn::Expr { + expr: e.clone(), + contains_aggregates: false, + }); } } } From 7ecc2525072eca001aa1df6ceecfa958ee1483e3 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 17:46:54 +0200 Subject: [PATCH 05/32] fix rest of the failing tests --- core/translate/emitter.rs | 15 ++++++++++++- core/translate/optimizer.rs | 16 +------------- core/translate/plan.rs | 42 ------------------------------------- core/translate/select.rs | 1 - core/vdbe/builder.rs | 4 ---- 5 files changed, 15 insertions(+), 63 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index a2eb575a0..25d247cc4 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -16,6 +16,7 @@ use super::expr::{ translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; +use super::optimizer::Optimizable; use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; use super::plan::{ResultSetColumn, SourceOperator}; @@ -825,6 +826,19 @@ pub enum InnerLoopEmitTarget<'a> { } fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metadata) -> Result<()> { + if let Some(wc) = &plan.where_clause { + for predicate in wc.iter() { + if predicate.is_always_false()? { + return Ok(()); + } else if predicate.is_always_true()? { + // do nothing + } else { + unreachable!( + "all WHERE clause terms that are not trivially true or false should have been pushed down to the source" + ); + } + } + } // if we have a group by, we emit a record into the group by sorter. if let Some(group_by) = &plan.group_by { return inner_loop_source_emit( @@ -886,7 +900,6 @@ fn inner_loop_source_emit( group_by, aggregates, } => { - // TODO: DOESNT WORK YET let sort_keys_count = group_by.len(); let aggregate_arguments_count = aggregates.iter().map(|agg| agg.args.len()).sum::(); diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4635a933a..2772751e2 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -20,21 +20,7 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result { &mut select_plan.where_clause, &select_plan.referenced_tables, )?; - if eliminate_constants(&mut select_plan.source)? - == ConstantConditionEliminationResult::ImpossibleCondition - { - return Ok(Plan { - source: SourceOperator::Nothing, - aggregates: None, - result_columns: vec![], - where_clause: None, - group_by: None, - order_by: None, - limit: None, - referenced_tables: select_plan.referenced_tables, - available_indexes: select_plan.available_indexes, - }); - } + eliminate_constants(&mut select_plan.source)?; use_indexes( &mut select_plan.source, &select_plan.referenced_tables, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index ffcff1832..6ab599372 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -130,48 +130,6 @@ pub enum Search { } impl SourceOperator { - pub fn column_count(&self, referenced_tables: &[BTreeTableReference]) -> usize { - match self { - SourceOperator::Join { left, right, .. } => { - left.column_count(referenced_tables) + right.column_count(referenced_tables) - } - SourceOperator::Scan { - table_reference, .. - } => table_reference.table.columns.len(), - SourceOperator::Search { - table_reference, .. - } => table_reference.table.columns.len(), - SourceOperator::Nothing => 0, - } - } - - pub fn column_names(&self) -> Vec { - match self { - SourceOperator::Join { left, right, .. } => { - let mut names = left.column_names(); - names.extend(right.column_names()); - names - } - SourceOperator::Scan { - table_reference, .. - } => table_reference - .table - .columns - .iter() - .map(|c| c.name.clone()) - .collect(), - SourceOperator::Search { - table_reference, .. - } => table_reference - .table - .columns - .iter() - .map(|c| c.name.clone()) - .collect(), - SourceOperator::Nothing => vec![], - } - } - pub fn id(&self) -> usize { match self { SourceOperator::Join { id, .. } => *id, diff --git a/core/translate/select.rs b/core/translate/select.rs index 0d16089eb..6d846ded8 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -18,6 +18,5 @@ pub fn translate_select( ) -> Result { let select_plan = prepare_select_plan(schema, select)?; let optimized_plan = optimize_plan(select_plan)?; - // println!("optimized_plan: {:?}", optimized_plan); emit_program(database_header, optimized_plan, connection) } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 0115c13e2..bbb7f75a8 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -361,10 +361,6 @@ impl ProgramBuilder { .unwrap() } - pub fn resolve_cursor_to_table(&self, cursor_id: CursorID) -> Option { - self.cursor_ref[cursor_id].1.clone() - } - pub fn resolve_deferred_labels(&mut self) { for i in 0..self.deferred_label_resolutions.len() { let (label, insn_reference) = self.deferred_label_resolutions[i]; From d2f84edd2e906439346b5e8259ad4567deea8cca Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 17:51:26 +0200 Subject: [PATCH 06/32] fix accidentally removing push_scan_direction() --- core/translate/optimizer.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 2772751e2..4f87c8010 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -90,12 +90,13 @@ fn eliminate_unnecessary_orderby( return Ok(()); } - let (key, _) = o.first_mut().unwrap(); + let (key, direction) = o.first_mut().unwrap(); let already_ordered = _operator_is_already_ordered_by(operator, key, referenced_tables, available_indexes)?; if already_ordered { + push_scan_direction(operator, direction); *order_by = None; } From 97ba4a788e6991fcfafeb9ab9c4bb7ce6b0b796b Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 17:54:38 +0200 Subject: [PATCH 07/32] remove sorts hashmap - only one sortmetadata struct is needed --- core/translate/emitter.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 25d247cc4..a1d9c7ce7 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -92,7 +92,7 @@ pub struct Metadata { // metadata for the group by operator group_by_metadata: Option, // mapping between Order operator id and associated metadata - sorts: HashMap, + sort_metadata: Option, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, // First register of the aggregation results @@ -125,7 +125,7 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> left_joins: HashMap::new(), next_row_labels: HashMap::new(), scan_loop_body_labels: vec![], - sorts: HashMap::new(), + sort_metadata: None, aggregation_start_register: None, result_column_indexes_in_orderby_sorter: HashMap::new(), result_columns_to_skip_in_orderby_sorter: None, @@ -261,15 +261,12 @@ fn init_order_by( ) -> Result<()> { m.termination_label_stack.push(program.allocate_label()); let sort_cursor = program.alloc_cursor_id(None, None); - m.sorts.insert( - ORDER_BY_ID, - SortMetadata { - sort_cursor, - sorter_data_register: program.alloc_register(), - sorter_data_label: program.allocate_label(), - done_label: program.allocate_label(), - }, - ); + m.sort_metadata = Some(SortMetadata { + sort_cursor, + sorter_data_register: program.alloc_register(), + sorter_data_label: program.allocate_label(), + done_label: program.allocate_label(), + }); let mut order = Vec::new(); for (_, direction) in order_by.iter() { order.push(OwnedValue::Integer(*direction as i64)); @@ -1024,7 +1021,7 @@ fn inner_loop_source_emit( cur_reg += 1; } - let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + let sort_metadata = m.sort_metadata.as_mut().unwrap(); program.emit_insn(Insn::MakeRecord { start_reg, count: orderby_sorter_column_count, @@ -1644,7 +1641,7 @@ fn group_by_emit( }); program.emit_insn(Insn::SorterInsert { - cursor_id: m.sorts.get(&ORDER_BY_ID).unwrap().sort_cursor, + cursor_id: m.sort_metadata.as_ref().unwrap().sort_cursor, record_reg: group_by_metadata.sorter_key_register, }); } @@ -1783,7 +1780,7 @@ fn sort_order_by( columns: pseudo_columns, }))), ); - let sort_metadata = m.sorts.get(&ORDER_BY_ID).unwrap(); + let sort_metadata = m.sort_metadata.as_mut().unwrap(); program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, @@ -1806,7 +1803,7 @@ fn sort_order_by( pseudo_cursor, }); - let sort_metadata = m.sorts.get_mut(&ORDER_BY_ID).unwrap(); + let sort_metadata = m.sort_metadata.as_mut().unwrap(); // EMIT COLUMNS FROM SORTER AND EMIT ROW let cursor_id = pseudo_cursor; From 120601f7324adcc80fb3a98e12af28d3beef5404 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 17:56:18 +0200 Subject: [PATCH 08/32] fix metadata comments --- core/translate/emitter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index a1d9c7ce7..37f14f73e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -87,11 +87,11 @@ pub struct Metadata { // for example, in a join with two nested scans, the inner loop will jump to its Next instruction when the join condition is false; // in a join with a scan and a seek, the seek will jump to the scan's Next instruction when the join condition is false. next_row_labels: HashMap, - // labels for the Rewind instructions. + // labels for the instructions beginning the inner loop of a scan operator. scan_loop_body_labels: Vec, // metadata for the group by operator group_by_metadata: Option, - // mapping between Order operator id and associated metadata + // metadata for the order by operator sort_metadata: Option, // mapping between Join operator id and associated metadata (for left joins only) left_joins: HashMap, From 52beeabd45b57c45fc2d7755b559af0b45927c01 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 17:58:34 +0200 Subject: [PATCH 09/32] tweaks --- core/translate/emitter.rs | 7 +++---- core/translate/optimizer.rs | 19 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 37f14f73e..34bf15b31 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -207,7 +207,8 @@ pub fn emit_program( let mut order_by_necessary = plan.order_by.is_some(); - // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION + // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION ETC + // EITHER EMITS RESULTROWS DIRECTLY OR INSERTS INTO ORDER BY SORTER if let Some(ref mut group_by) = plan.group_by { group_by_emit( &mut program, @@ -233,7 +234,7 @@ pub fn emit_program( order_by_necessary = false; } - // IF ORDER BY, SORT BY ORDER BY + // EMIT RESULT ROWS FROM THE ORDER BY SORTER if let Some(ref mut order_by) = plan.order_by { if order_by_necessary { sort_order_by( @@ -252,8 +253,6 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } -const ORDER_BY_ID: usize = 0; - fn init_order_by( program: &mut ProgramBuilder, order_by: &Vec<(ast::Expr, Direction)>, diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4f87c8010..307df356b 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -44,15 +44,15 @@ fn _operator_is_already_ordered_by( match operator { SourceOperator::Scan { table_reference, .. - } => Ok(key.is_primary_key_of(table_reference.table_index)), + } => Ok(key.is_rowid_alias_of(table_reference.table_index)), SourceOperator::Search { table_reference, search, .. } => match search { - Search::PrimaryKeyEq { .. } => Ok(key.is_primary_key_of(table_reference.table_index)), + Search::PrimaryKeyEq { .. } => Ok(key.is_rowid_alias_of(table_reference.table_index)), Search::PrimaryKeySearch { .. } => { - Ok(key.is_primary_key_of(table_reference.table_index)) + Ok(key.is_rowid_alias_of(table_reference.table_index)) } Search::IndexSearch { index, .. } => { let index_idx = key.check_index_scan( @@ -457,7 +457,7 @@ pub trait Optimizable { .check_constant()? .map_or(false, |c| c == ConstantPredicate::AlwaysFalse)) } - fn is_primary_key_of(&self, table_index: usize) -> bool; + fn is_rowid_alias_of(&self, table_index: usize) -> bool; fn check_index_scan( &mut self, table_index: usize, @@ -467,14 +467,13 @@ pub trait Optimizable { } impl Optimizable for ast::Expr { - fn is_primary_key_of(&self, table_index: usize) -> bool { + fn is_rowid_alias_of(&self, table_index: usize) -> bool { match self { ast::Expr::Column { table, - column, - is_rowid_alias: is_primary_key, + is_rowid_alias, .. - } => *is_primary_key && *table == table_index, + } => *is_rowid_alias && *table == table_index, _ => false, } } @@ -652,7 +651,7 @@ pub fn try_extract_index_search_expression( ) -> Result> { match expr { ast::Expr::Binary(mut lhs, operator, mut rhs) => { - if lhs.is_primary_key_of(table_index) { + if lhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *rhs })); @@ -670,7 +669,7 @@ pub fn try_extract_index_search_expression( } } - if rhs.is_primary_key_of(table_index) { + if rhs.is_rowid_alias_of(table_index) { match operator { ast::Operator::Equals => { return Ok(Either::Right(Search::PrimaryKeyEq { cmp_expr: *lhs })); From 4f3da982c08c4bcbc9d4083f75dbde159a342216 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 18:06:17 +0200 Subject: [PATCH 10/32] extract function emit_result_row() --- core/translate/emitter.rs | 105 ++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 61 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 34bf15b31..2fba800af 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1083,26 +1083,12 @@ fn inner_loop_source_emit( ), } } - program.emit_insn(Insn::ResultRow { + emit_result_row( + program, start_reg, - count: result_columns.len(), - }); - if let Some(limit) = limit { - let jump_label = m.termination_label_stack.last().unwrap(); - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *jump_label, - }, - *jump_label, - ); - } + result_columns.len(), + limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), + ); Ok(()) } @@ -1612,25 +1598,12 @@ fn group_by_emit( match order_by { None => { - if let Some(limit) = limit { - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - program.emit_insn(Insn::ResultRow { - start_reg: output_row_start_reg, - count: output_column_count, - }); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: *m.termination_label_stack.last().unwrap(), - }, - *m.termination_label_stack.last().unwrap(), - ); - } + emit_result_row( + program, + output_row_start_reg, + output_column_count, + limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), + ); } Some(_) => { program.emit_insn(Insn::MakeRecord { @@ -1726,10 +1699,7 @@ fn agg_without_group_by_emit( } } // This always emits a ResultRow because currently it can only be used for a single row result - program.emit_insn(Insn::ResultRow { - start_reg: output_reg, - count: result_columns.len(), - }); + emit_result_row(program, output_reg, result_columns.len(), None); Ok(()) } @@ -1815,26 +1785,12 @@ fn sort_order_by( dest: reg, }); } - program.emit_insn(Insn::ResultRow { + emit_result_row( + program, start_reg, - count: result_columns.len(), - }); - - if let Some(limit) = limit { - let limit_reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: limit as i64, - dest: limit_reg, - }); - program.mark_last_insn_constant(); - program.emit_insn_with_label_dependency( - Insn::DecrJumpZero { - reg: limit_reg, - target_pc: sort_metadata.done_label, - }, - sort_metadata.done_label, - ); - } + result_columns.len(), + limit.map(|l| (l, sort_metadata.done_label)), + ); program.emit_insn_with_label_dependency( Insn::SorterNext { @@ -1848,3 +1804,30 @@ fn sort_order_by( Ok(()) } + +fn emit_result_row( + program: &mut ProgramBuilder, + start_reg: usize, + column_count: usize, + limit: Option<(usize, BranchOffset)>, +) { + program.emit_insn(Insn::ResultRow { + start_reg, + count: column_count, + }); + if let Some((limit, jump_label_on_limit_reached)) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: jump_label_on_limit_reached, + }, + jump_label_on_limit_reached, + ); + } +} From 1c37d8b24b59440b25f5cca5df8e3b275a2c5e7c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 18:09:40 +0200 Subject: [PATCH 11/32] extract function sorter_insert() --- core/translate/emitter.rs | 65 ++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 2fba800af..ad14c29f1 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -923,16 +923,13 @@ fn inner_loop_source_emit( let group_by_metadata = m.group_by_metadata.as_ref().unwrap(); - program.emit_insn(Insn::MakeRecord { + sorter_insert( + program, start_reg, - count: column_count, - dest_reg: group_by_metadata.sorter_key_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: group_by_metadata.sort_cursor, - record_reg: group_by_metadata.sorter_key_register, - }); + column_count, + group_by_metadata.sort_cursor, + group_by_metadata.sorter_key_register, + ); Ok(()) } @@ -1021,16 +1018,13 @@ fn inner_loop_source_emit( } let sort_metadata = m.sort_metadata.as_mut().unwrap(); - program.emit_insn(Insn::MakeRecord { + sorter_insert( + program, start_reg, - count: orderby_sorter_column_count, - dest_reg: sort_metadata.sorter_data_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: sort_metadata.sort_cursor, - record_reg: sort_metadata.sorter_data_register, - }); + orderby_sorter_column_count, + sort_metadata.sort_cursor, + sort_metadata.sorter_data_register, + ); Ok(()) } @@ -1606,16 +1600,13 @@ fn group_by_emit( ); } Some(_) => { - program.emit_insn(Insn::MakeRecord { - start_reg: output_row_start_reg, - count: output_column_count, - dest_reg: group_by_metadata.sorter_key_register, - }); - - program.emit_insn(Insn::SorterInsert { - cursor_id: m.sort_metadata.as_ref().unwrap().sort_cursor, - record_reg: group_by_metadata.sorter_key_register, - }); + sorter_insert( + program, + output_row_start_reg, + output_column_count, + m.sort_metadata.as_ref().unwrap().sort_cursor, + group_by_metadata.sorter_key_register, + ); } } @@ -1831,3 +1822,21 @@ fn emit_result_row( ); } } + +fn sorter_insert( + program: &mut ProgramBuilder, + start_reg: usize, + column_count: usize, + cursor_id: usize, + record_reg: usize, +) { + program.emit_insn(Insn::MakeRecord { + start_reg, + count: column_count, + dest_reg: record_reg, + }); + program.emit_insn(Insn::SorterInsert { + cursor_id, + record_reg, + }); +} From 0510e150d3df4d41efba14543fd307aa158f20b1 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 18:11:28 +0200 Subject: [PATCH 12/32] fix comment --- core/translate/emitter.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index ad14c29f1..f6160710f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1653,8 +1653,7 @@ fn agg_without_group_by_emit( func: agg.func.clone(), }); } - // we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1) - // and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) + // we now have the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1) // we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the // result column expression matches a) a group by column or b) an aggregation result. let mut precomputed_exprs_to_register = Vec::with_capacity(aggregates.len()); From ebce78bcd9d671e7cd7d57b03bad23b7f26ee73b Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 18:12:10 +0200 Subject: [PATCH 13/32] rename --- core/translate/emitter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index f6160710f..a4cdba633 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -237,7 +237,7 @@ pub fn emit_program( // EMIT RESULT ROWS FROM THE ORDER BY SORTER if let Some(ref mut order_by) = plan.order_by { if order_by_necessary { - sort_order_by( + order_by_emit( &mut program, order_by, &plan.result_columns, @@ -1694,7 +1694,7 @@ fn agg_without_group_by_emit( Ok(()) } -fn sort_order_by( +fn order_by_emit( program: &mut ProgramBuilder, order_by: &Vec<(ast::Expr, Direction)>, result_columns: &Vec, From fc33c7048146cf3ebe8c94faf64f63976215b4a9 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 18:53:31 +0200 Subject: [PATCH 14/32] remove many unnecessary fields from SortMetadata and GroupByMetadata --- core/translate/emitter.rs | 50 +++++++++++++-------------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index a4cdba633..185bd3952 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -36,10 +36,6 @@ pub struct LeftJoinMetadata { pub struct SortMetadata { // cursor id for the Sorter table where the sorted rows are stored pub sort_cursor: usize, - // label where the SorterData instruction is emitted; SorterNext will jump here if there is more data to read - pub sorter_data_label: BranchOffset, - // label for the instruction immediately following SorterNext; SorterSort will jump here in case there is no data - pub done_label: BranchOffset, // register where the sorter data is inserted and later retrieved from pub sorter_data_register: usize, } @@ -58,12 +54,8 @@ pub struct GroupByMetadata { pub subroutine_accumulator_output_return_offset_register: usize, // Label for the instruction that sets the accumulator indicator to true (indicating data exists in the accumulator for the current group) pub accumulator_indicator_set_true_label: BranchOffset, - // Label for the instruction where SorterData is emitted (used for fetching sorted data) - pub sorter_data_label: BranchOffset, // Register holding the key used for sorting in the Sorter pub sorter_key_register: usize, - // Label for the instruction signaling the completion of grouping operations - pub grouping_done_label: BranchOffset, // Register holding a flag to abort the grouping process if necessary pub abort_flag_register: usize, // Register holding a boolean indicating whether there's data in the accumulator (used for aggregation) @@ -263,8 +255,6 @@ fn init_order_by( m.sort_metadata = Some(SortMetadata { sort_cursor, sorter_data_register: program.alloc_register(), - sorter_data_label: program.allocate_label(), - done_label: program.allocate_label(), }); let mut order = Vec::new(); for (_, direction) in order_by.iter() { @@ -299,8 +289,6 @@ fn init_group_by( let subroutine_accumulator_clear_label = program.allocate_label(); let subroutine_accumulator_output_label = program.allocate_label(); - let sorter_data_label = program.allocate_label(); - let grouping_done_label = program.allocate_label(); let mut order = Vec::new(); const ASCENDING: i64 = 0; @@ -352,8 +340,6 @@ fn init_group_by( subroutine_accumulator_output_label, subroutine_accumulator_output_return_offset_register: program.alloc_register(), accumulator_indicator_set_true_label: program.allocate_label(), - sorter_data_label, - grouping_done_label, abort_flag_register, data_in_accumulator_indicator_register, group_exprs_accumulator_register, @@ -1233,6 +1219,8 @@ fn group_by_emit( referenced_tables: &[BTreeTableReference], m: &mut Metadata, ) -> Result<()> { + let sorter_data_label = program.allocate_label(); + let grouping_done_label = program.allocate_label(); let group_by_metadata = m.group_by_metadata.as_mut().unwrap(); let GroupByMetadata { @@ -1280,15 +1268,12 @@ fn group_by_emit( program.emit_insn_with_label_dependency( Insn::SorterSort { cursor_id: group_by_metadata.sort_cursor, - pc_if_empty: group_by_metadata.grouping_done_label, + pc_if_empty: grouping_done_label, }, - group_by_metadata.grouping_done_label, + grouping_done_label, ); - program.defer_label_resolution( - group_by_metadata.sorter_data_label, - program.offset() as usize, - ); + program.defer_label_resolution(sorter_data_label, program.offset() as usize); // Read a row from the sorted data in the sorter into the pseudo cursor program.emit_insn(Insn::SorterData { cursor_id: group_by_metadata.sort_cursor, @@ -1421,12 +1406,12 @@ fn group_by_emit( program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: group_by_metadata.sort_cursor, - pc_if_next: group_by_metadata.sorter_data_label, + pc_if_next: sorter_data_label, }, - group_by_metadata.sorter_data_label, + sorter_data_label, ); - program.resolve_label(group_by_metadata.grouping_done_label, program.offset()); + program.resolve_label(grouping_done_label, program.offset()); program.add_comment(program.offset(), "emit row for final group"); program.emit_insn_with_label_dependency( @@ -1701,7 +1686,8 @@ fn order_by_emit( limit: Option, m: &mut Metadata, ) -> Result<()> { - // TODO: DOESNT WORK YET + let sorter_data_label = program.allocate_label(); + let sorting_done_label = program.allocate_label(); program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); let mut pseudo_columns = vec![]; for (i, _) in order_by.iter().enumerate() { @@ -1750,20 +1736,18 @@ fn order_by_emit( program.emit_insn_with_label_dependency( Insn::SorterSort { cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sort_metadata.done_label, + pc_if_empty: sorting_done_label, }, - sort_metadata.done_label, + sorting_done_label, ); - program.defer_label_resolution(sort_metadata.sorter_data_label, program.offset() as usize); + program.defer_label_resolution(sorter_data_label, program.offset() as usize); program.emit_insn(Insn::SorterData { cursor_id: sort_metadata.sort_cursor, dest_reg: sort_metadata.sorter_data_register, pseudo_cursor, }); - let sort_metadata = m.sort_metadata.as_mut().unwrap(); - // EMIT COLUMNS FROM SORTER AND EMIT ROW let cursor_id = pseudo_cursor; let start_reg = program.alloc_registers(result_columns.len()); @@ -1779,18 +1763,18 @@ fn order_by_emit( program, start_reg, result_columns.len(), - limit.map(|l| (l, sort_metadata.done_label)), + limit.map(|l| (l, sorting_done_label)), ); program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: sort_metadata.sort_cursor, - pc_if_next: sort_metadata.sorter_data_label, + pc_if_next: sorter_data_label, }, - sort_metadata.sorter_data_label, + sorter_data_label, ); - program.resolve_label(sort_metadata.done_label, program.offset()); + program.resolve_label(sorting_done_label, program.offset()); Ok(()) } From cfb7e79601014fd7a194865021756e1e1c0f6d39 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 19:10:31 +0200 Subject: [PATCH 15/32] Function doc comments --- core/translate/emitter.rs | 75 ++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 185bd3952..11b0110ae 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1,3 +1,6 @@ +// This module contains code for emitting bytecode instructions for SQL query execution. +// It handles translating high-level SQL operations into low-level bytecode that can be executed by the virtual machine. + use std::cell::RefCell; use std::collections::HashMap; use std::rc::{Rc, Weak}; @@ -20,6 +23,7 @@ use super::optimizer::Optimizable; use super::plan::{Aggregate, BTreeTableReference, Direction, Plan}; use super::plan::{ResultSetColumn, SourceOperator}; +// Metadata for handling LEFT JOIN operations #[derive(Debug)] pub struct LeftJoinMetadata { // integer register that holds a flag that is set to true if the current row has a match for the left join @@ -32,6 +36,7 @@ pub struct LeftJoinMetadata { pub on_match_jump_to_label: BranchOffset, } +// Metadata for handling ORDER BY operations #[derive(Debug)] pub struct SortMetadata { // cursor id for the Sorter table where the sorted rows are stored @@ -40,6 +45,7 @@ pub struct SortMetadata { pub sorter_data_register: usize, } +// Metadata for handling GROUP BY operations #[derive(Debug)] pub struct GroupByMetadata { // Cursor ID for the Sorter table where the grouped rows are stored @@ -97,6 +103,7 @@ pub struct Metadata { pub result_columns_to_skip_in_orderby_sorter: Option>, } +/// Initialize the program with basic setup and return initial metadata and labels fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); @@ -126,6 +133,9 @@ fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> Ok((program, metadata, init_label, start_offset)) } +/// Clean up and finalize the program, resolving any remaining labels +/// Note that although these are the final instructions, typically an SQLite +/// query will jump to the Transaction instruction via init_label. fn epilogue( program: &mut ProgramBuilder, metadata: &mut Metadata, @@ -152,6 +162,8 @@ fn epilogue( Ok(()) } +/// Main entry point for emitting bytecode for a SQL query +/// Takes a query plan and generates the corresponding bytecode program pub fn emit_program( database_header: Rc>, mut plan: Plan, @@ -167,7 +179,7 @@ pub fn emit_program( } } - // OPEN CURSORS ETC + // Initialize cursors and other resources needed for query execution if let Some(ref mut order_by) = plan.order_by { init_order_by(&mut program, order_by, &mut metadata)?; } @@ -178,7 +190,7 @@ pub fn emit_program( } init_source(&mut program, &plan.source, &mut metadata)?; - // REWIND CURSORS, EMIT CONDITIONS + // Set up main query execution loop open_loop( &mut program, &mut plan.source, @@ -186,10 +198,10 @@ pub fn emit_program( &mut metadata, )?; - // EMIT COLUMNS AND OTHER EXPRS IN INNER LOOP + // Process result columns and expressions in the inner loop inner_loop_emit(&mut program, &mut plan, &mut metadata)?; - // CLOSE LOOP + // Clean up and close the main execution loop close_loop( &mut program, &mut plan.source, @@ -199,8 +211,7 @@ pub fn emit_program( let mut order_by_necessary = plan.order_by.is_some(); - // IF GROUP BY, SORT BY GROUPS AND DO AGGREGATION ETC - // EITHER EMITS RESULTROWS DIRECTLY OR INSERTS INTO ORDER BY SORTER + // Handle GROUP BY and aggregation processing if let Some(ref mut group_by) = plan.group_by { group_by_emit( &mut program, @@ -213,7 +224,7 @@ pub fn emit_program( &mut metadata, )?; } else if let Some(ref mut aggregates) = plan.aggregates { - // Example: SELECT sum(x), count(*) FROM t; + // Handle aggregation without GROUP BY agg_without_group_by_emit( &mut program, &plan.referenced_tables, @@ -221,12 +232,11 @@ pub fn emit_program( aggregates, &mut metadata, )?; - // If we have an aggregate without a group by, we don't need an order by because currently - // there can only be a single row result in those cases. + // Single row result for aggregates without GROUP BY, so ORDER BY not needed order_by_necessary = false; } - // EMIT RESULT ROWS FROM THE ORDER BY SORTER + // Process ORDER BY results if needed if let Some(ref mut order_by) = plan.order_by { if order_by_necessary { order_by_emit( @@ -239,12 +249,13 @@ pub fn emit_program( } } - // EPILOGUE + // Finalize program epilogue(&mut program, &mut metadata, init_label, start_offset)?; Ok(program.build(database_header, connection)) } +/// Initialize resources needed for ORDER BY processing fn init_order_by( program: &mut ProgramBuilder, order_by: &Vec<(ast::Expr, Direction)>, @@ -268,6 +279,7 @@ fn init_order_by( Ok(()) } +/// Initialize resources needed for GROUP BY processing fn init_group_by( program: &mut ProgramBuilder, group_by: &Vec, @@ -349,15 +361,7 @@ fn init_group_by( Ok(()) } -// fn init_agg_without_group_by( -// program: &mut ProgramBuilder, -// aggregates: &Vec, -// m: &mut Metadata, -// ) -> Result<()> { - -// Ok(()) -// } - +/// Initialize resources needed for the source operators (tables, joins, etc) fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, @@ -449,6 +453,9 @@ fn init_source( } } +/// Set up the main query execution loop +/// For example in the case of a nested table scan, this means emitting the RewindAsync instruction +/// for all tables involved, outermost first. fn open_loop( program: &mut ProgramBuilder, source: &mut SourceOperator, @@ -793,6 +800,12 @@ fn open_loop( } } +/// SQLite (and so Limbo) processes joins as a nested loop. +/// The inner loop may emit rows to various destinations depending on the query: +/// - a GROUP BY sorter (grouping is done by sorting based on the GROUP BY keys and aggregating while the GROUP BY keys match) +/// - an ORDER BY sorter (when there is no GROUP BY, but there is an ORDER BY) +/// - an AggStep (the columns are collected for aggregation, which is finished later) +/// - a ResultRow (there is none of the above, so the loop emits a result row directly) pub enum InnerLoopEmitTarget<'a> { GroupBySorter { group_by: &'a Vec, @@ -801,12 +814,14 @@ pub enum InnerLoopEmitTarget<'a> { OrderBySorter { order_by: &'a Vec<(ast::Expr, Direction)>, }, + AggStep, ResultRow { limit: Option, }, - AggStep, } +/// Emits the bytecode for the inner loop of a query. +/// At this point the cursors for all tables have been opened and rewound. fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metadata) -> Result<()> { if let Some(wc) = &plan.where_clause { for predicate in wc.iter() { @@ -869,6 +884,9 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada ); } +/// This is a helper function for inner_loop_emit, +/// which does a different thing depending on the emit target. +/// See the InnerLoopEmitTarget enum for more details. fn inner_loop_source_emit( program: &mut ProgramBuilder, result_columns: &Vec, @@ -1075,6 +1093,9 @@ fn inner_loop_source_emit( } } +/// Closes the loop for a given source operator. +/// For example in the case of a nested table scan, this means emitting the NextAsync instruction +/// for all tables involved, innermost first. fn close_loop( program: &mut ProgramBuilder, source: &SourceOperator, @@ -1209,6 +1230,9 @@ fn close_loop( } } +/// Emits the bytecode for processing a GROUP BY clause. +/// This is called when the main query execution loop has finished processing, +/// and we now have data in the GROUP BY sorter. fn group_by_emit( program: &mut ProgramBuilder, result_columns: &Vec, @@ -1623,6 +1647,9 @@ fn group_by_emit( Ok(()) } +/// Emits the bytecode for processing an aggregate without a GROUP BY clause. +/// This is called when the main query execution loop has finished processing, +/// and we can now materialize the aggregate results. fn agg_without_group_by_emit( program: &mut ProgramBuilder, referenced_tables: &Vec, @@ -1679,6 +1706,9 @@ fn agg_without_group_by_emit( Ok(()) } +/// Emits the bytecode for processing an ORDER BY clause. +/// This is called when the main query execution loop has finished processing, +/// and we can now emit rows from the ORDER BY sorter. fn order_by_emit( program: &mut ProgramBuilder, order_by: &Vec<(ast::Expr, Direction)>, @@ -1779,6 +1809,7 @@ fn order_by_emit( Ok(()) } +/// Emits the bytecode for emitting a result row. fn emit_result_row( program: &mut ProgramBuilder, start_reg: usize, @@ -1806,6 +1837,8 @@ fn emit_result_row( } } +/// Emits the bytecode for inserting a row into a sorter. +/// This can be either a GROUP BY sorter or an ORDER BY sorter. fn sorter_insert( program: &mut ProgramBuilder, start_reg: usize, From 008be10cfde6d556da8f2a7c26e92c99ef99cfa7 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 19:44:11 +0200 Subject: [PATCH 16/32] Add TODO about expression equality comparisons --- core/translate/emitter.rs | 6 ++++++ core/translate/expr.rs | 3 +++ 2 files changed, 9 insertions(+) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 11b0110ae..cf6526fdf 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -963,6 +963,9 @@ fn inner_loop_source_emit( } } ResultSetColumn::Agg(agg) => { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) let found = order_by .iter() .enumerate() @@ -1525,6 +1528,9 @@ fn group_by_emit( } } ResultSetColumn::Agg(agg) => { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) let found = order_by .iter() .enumerate() diff --git a/core/translate/expr.rs b/core/translate/expr.rs index a2f373e82..5ce5b5537 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -568,6 +568,9 @@ pub fn translate_expr( ) -> Result { if let Some(precomputed_exprs_to_registers) = precomputed_exprs_to_registers { for (precomputed_expr, reg) in precomputed_exprs_to_registers.iter() { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) if expr == *precomputed_expr { program.emit_insn(Insn::Copy { src_reg: *reg, From 885b6ecd7623cd11a62cebe03e9c85ae28f816fe Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sat, 23 Nov 2024 20:00:58 +0200 Subject: [PATCH 17/32] Remove 'cursor_hint': it is never needed --- core/translate/emitter.rs | 71 ++++++----------------- core/translate/expr.rs | 118 +++----------------------------------- core/translate/insert.rs | 1 - core/vdbe/builder.rs | 9 +-- 4 files changed, 28 insertions(+), 171 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index cf6526fdf..13fe7de07 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -503,7 +503,6 @@ fn open_loop( program, referenced_tables, predicate, - None, condition_metadata, None, )?; @@ -531,7 +530,7 @@ fn open_loop( predicates, iter_dir, } => { - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); if iter_dir .as_ref() .is_some_and(|dir| *dir == IterationDirection::Backwards) @@ -575,7 +574,6 @@ fn open_loop( program, referenced_tables, expr, - None, condition_metadata, None, )?; @@ -592,14 +590,13 @@ fn open_loop( predicates, .. } => { - let table_cursor_id = - program.resolve_cursor_id(&table_reference.table_identifier, None); + let table_cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); // Open the loop for the index search. // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. if !matches!(search, Search::PrimaryKeyEq { .. }) { let index_cursor_id = if let Search::IndexSearch { index, .. } = search { - Some(program.resolve_cursor_id(&index.name, None)) + Some(program.resolve_cursor_id(&index.name)) } else { None }; @@ -617,14 +614,7 @@ fn open_loop( ast::Operator::Equals | ast::Operator::Greater | ast::Operator::GreaterEquals => { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; + translate_expr(program, Some(referenced_tables), cmp_expr, cmp_reg, None)?; } ast::Operator::Less | ast::Operator::LessEquals => { program.emit_insn(Insn::Null { @@ -657,14 +647,7 @@ fn open_loop( *m.termination_label_stack.last().unwrap(), ); if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - cmp_reg, - None, - None, - )?; + translate_expr(program, Some(referenced_tables), cmp_expr, cmp_reg, None)?; } program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); @@ -755,14 +738,7 @@ fn open_loop( if let Search::PrimaryKeyEq { cmp_expr } = search { let src_reg = program.alloc_register(); - translate_expr( - program, - Some(referenced_tables), - cmp_expr, - src_reg, - None, - None, - )?; + translate_expr(program, Some(referenced_tables), cmp_expr, src_reg, None)?; program.emit_insn_with_label_dependency( Insn::SeekRowid { cursor_id: table_cursor_id, @@ -784,7 +760,6 @@ fn open_loop( program, referenced_tables, predicate, - None, condition_metadata, None, )?; @@ -909,7 +884,7 @@ fn inner_loop_source_emit( for expr in group_by.iter() { let key_reg = cur_reg; cur_reg += 1; - translate_expr(program, Some(referenced_tables), expr, key_reg, None, None)?; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; } for agg in aggregates.iter() { // Here we are collecting scalars for the group by sorter, which will include @@ -921,7 +896,7 @@ fn inner_loop_source_emit( for expr in agg.args.iter() { let agg_reg = cur_reg; cur_reg += 1; - translate_expr(program, Some(referenced_tables), expr, agg_reg, None, None)?; + translate_expr(program, Some(referenced_tables), expr, agg_reg, None)?; } } @@ -991,7 +966,7 @@ fn inner_loop_source_emit( let start_reg = program.alloc_registers(orderby_sorter_column_count); for (i, (expr, _)) in order_by.iter().enumerate() { let key_reg = start_reg + i; - translate_expr(program, Some(referenced_tables), expr, key_reg, None, None)?; + translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; } let mut cur_reg = start_reg + order_by_len; let mut cur_idx_in_orderby_sorter = order_by_len; @@ -1007,14 +982,7 @@ fn inner_loop_source_emit( contains_aggregates, } => { assert!(!*contains_aggregates); - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - None, - None, - )?; + translate_expr(program, Some(referenced_tables), expr, cur_reg, None)?; } other => unreachable!("{:?}", other), } @@ -1044,7 +1012,7 @@ fn inner_loop_source_emit( m.aggregation_start_register = Some(start_reg); for (i, agg) in aggregates.iter().enumerate() { let reg = start_reg + i; - translate_aggregation(program, referenced_tables, agg, reg, None)?; + translate_aggregation(program, referenced_tables, agg, reg)?; } for (i, expr) in result_columns.iter().enumerate() { match expr { @@ -1058,7 +1026,7 @@ fn inner_loop_source_emit( continue; } let reg = start_reg + num_aggs + i; - translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; + translate_expr(program, Some(referenced_tables), expr, reg, None)?; } ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } } @@ -1076,7 +1044,7 @@ fn inner_loop_source_emit( } => { assert!(!*contains_aggregates); let reg = start_reg + i; - translate_expr(program, Some(referenced_tables), expr, reg, None, None)?; + translate_expr(program, Some(referenced_tables), expr, reg, None)?; } other => unreachable!( "Unexpected non-scalar result column in inner loop: {:?}", @@ -1131,10 +1099,10 @@ fn close_loop( let right_cursor_id = match right.as_ref() { SourceOperator::Scan { table_reference, .. - } => program.resolve_cursor_id(&table_reference.table_identifier, None), + } => program.resolve_cursor_id(&table_reference.table_identifier), SourceOperator::Search { table_reference, .. - } => program.resolve_cursor_id(&table_reference.table_identifier, None), + } => program.resolve_cursor_id(&table_reference.table_identifier), _ => unreachable!(), }; program.emit_insn(Insn::NullRow { @@ -1165,7 +1133,7 @@ fn close_loop( iter_dir, .. } => { - let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier, None); + let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); if iter_dir .as_ref() @@ -1210,9 +1178,9 @@ fn close_loop( return Ok(()); } let cursor_id = match search { - Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name, None), + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), Search::PrimaryKeySearch { .. } => { - program.resolve_cursor_id(&table_reference.table_identifier, None) + program.resolve_cursor_id(&table_reference.table_identifier) } Search::PrimaryKeyEq { .. } => unreachable!(), }; @@ -1562,7 +1530,6 @@ fn group_by_emit( Some(referenced_tables), expr, cur_reg, - None, Some(&precomputed_exprs_to_register), )?; cur_reg += 1; @@ -1582,7 +1549,6 @@ fn group_by_emit( Some(referenced_tables), expr, cur_reg, - None, Some(&precomputed_exprs_to_register), )?; } @@ -1688,7 +1654,6 @@ fn agg_without_group_by_emit( Some(referenced_tables), expr, output_reg + i, - None, Some(&precomputed_exprs_to_register), )?; } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 5ce5b5537..6c0b4437d 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -21,7 +21,6 @@ pub fn translate_condition_expr( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], expr: &ast::Expr, - cursor_hint: Option, condition_metadata: ConditionMetadata, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result<()> { @@ -34,7 +33,6 @@ pub fn translate_condition_expr( program, referenced_tables, lhs, - cursor_hint, ConditionMetadata { jump_if_condition_is_true: false, ..condition_metadata @@ -45,7 +43,6 @@ pub fn translate_condition_expr( program, referenced_tables, rhs, - cursor_hint, condition_metadata, precomputed_exprs_to_registers, ); @@ -56,7 +53,6 @@ pub fn translate_condition_expr( program, referenced_tables, lhs, - cursor_hint, ConditionMetadata { // If the first condition is true, we don't need to evaluate the second condition. jump_if_condition_is_true: true, @@ -70,7 +66,6 @@ pub fn translate_condition_expr( program, referenced_tables, rhs, - cursor_hint, condition_metadata, precomputed_exprs_to_registers, ); @@ -82,7 +77,6 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, lhs_reg, - cursor_hint, precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = lhs.as_ref() { @@ -94,7 +88,6 @@ pub fn translate_condition_expr( Some(referenced_tables), rhs, rhs_reg, - cursor_hint, precomputed_exprs_to_registers, ); if let ast::Expr::Literal(_) = rhs.as_ref() { @@ -343,7 +336,6 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, lhs_reg, - cursor_hint, precomputed_exprs_to_registers, )?; @@ -373,7 +365,6 @@ pub fn translate_condition_expr( Some(referenced_tables), expr, rhs_reg, - cursor_hint, precomputed_exprs_to_registers, )?; // If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true. @@ -417,7 +408,6 @@ pub fn translate_condition_expr( Some(referenced_tables), expr, rhs_reg, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn_with_label_dependency( @@ -463,7 +453,6 @@ pub fn translate_condition_expr( Some(referenced_tables), lhs, column_reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = lhs.as_ref() { @@ -474,7 +463,6 @@ pub fn translate_condition_expr( Some(referenced_tables), rhs, pattern_reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = rhs.as_ref() { @@ -547,7 +535,6 @@ pub fn translate_condition_expr( program, referenced_tables, expr, - cursor_hint, condition_metadata, precomputed_exprs_to_registers, ); @@ -563,7 +550,6 @@ pub fn translate_expr( referenced_tables: Option<&[BTreeTableReference]>, expr: &ast::Expr, target_register: usize, - cursor_hint: Option, precomputed_exprs_to_registers: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result { if let Some(precomputed_exprs_to_registers) = precomputed_exprs_to_registers { @@ -590,7 +576,6 @@ pub fn translate_expr( referenced_tables, e1, e1_reg, - cursor_hint, precomputed_exprs_to_registers, )?; let e2_reg = program.alloc_register(); @@ -599,7 +584,6 @@ pub fn translate_expr( referenced_tables, e2, e2_reg, - cursor_hint, precomputed_exprs_to_registers, )?; @@ -723,7 +707,6 @@ pub fn translate_expr( referenced_tables, expr, reg_expr, - cursor_hint, precomputed_exprs_to_registers, )?; let reg_type = program.alloc_register(); @@ -796,7 +779,6 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { @@ -823,7 +805,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -861,7 +842,6 @@ pub fn translate_expr( referenced_tables, arg, target_register, - cursor_hint, precomputed_exprs_to_registers, )?; if index < args.len() - 1 { @@ -897,7 +877,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -930,7 +909,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -967,7 +945,6 @@ pub fn translate_expr( referenced_tables, &args[0], temp_reg, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::NotNull { @@ -980,7 +957,6 @@ pub fn translate_expr( referenced_tables, &args[1], temp_reg, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Copy { @@ -1013,7 +989,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { @@ -1061,7 +1036,6 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { @@ -1098,7 +1072,6 @@ pub fn translate_expr( referenced_tables, arg, target_reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -1136,7 +1109,6 @@ pub fn translate_expr( referenced_tables, &args[0], str_reg, - cursor_hint, precomputed_exprs_to_registers, )?; translate_expr( @@ -1144,7 +1116,6 @@ pub fn translate_expr( referenced_tables, &args[1], start_reg, - cursor_hint, precomputed_exprs_to_registers, )?; if args.len() == 3 { @@ -1153,7 +1124,6 @@ pub fn translate_expr( referenced_tables, &args[2], length_reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -1183,7 +1153,6 @@ pub fn translate_expr( referenced_tables, &args[0], regs, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { @@ -1207,7 +1176,6 @@ pub fn translate_expr( referenced_tables, &args[0], arg_reg, - cursor_hint, precomputed_exprs_to_registers, )?; start_reg = arg_reg; @@ -1232,7 +1200,6 @@ pub fn translate_expr( referenced_tables, arg, target_reg, - cursor_hint, precomputed_exprs_to_registers, )?; } @@ -1272,7 +1239,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { @@ -1305,7 +1271,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { @@ -1339,7 +1304,6 @@ pub fn translate_expr( referenced_tables, arg, reg, - cursor_hint, precomputed_exprs_to_registers, )?; if let ast::Expr::Literal(_) = arg { @@ -1377,7 +1341,6 @@ pub fn translate_expr( referenced_tables, &args[0], first_reg, - cursor_hint, precomputed_exprs_to_registers, )?; let second_reg = program.alloc_register(); @@ -1386,7 +1349,6 @@ pub fn translate_expr( referenced_tables, &args[1], second_reg, - cursor_hint, precomputed_exprs_to_registers, )?; program.emit_insn(Insn::Function { @@ -1431,7 +1393,7 @@ pub fn translate_expr( is_rowid_alias: is_primary_key, } => { let tbl_ref = referenced_tables.as_ref().unwrap().get(*table).unwrap(); - let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier, cursor_hint); + let cursor_id = program.resolve_cursor_id(&tbl_ref.table_identifier); if *is_primary_key { program.emit_insn(Insn::RowId { cursor_id, @@ -1518,7 +1480,6 @@ pub fn translate_expr( referenced_tables, &exprs[0], target_register, - cursor_hint, precomputed_exprs_to_registers, )?; } else { @@ -1586,7 +1547,6 @@ pub fn translate_aggregation( referenced_tables: &[BTreeTableReference], agg: &Aggregate, target_register: usize, - cursor_hint: Option, ) -> Result { let dest = match agg.func { AggFunc::Avg => { @@ -1595,14 +1555,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1617,14 +1570,7 @@ pub fn translate_aggregation( } else { let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - ); + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None); expr_reg }; program.emit_insn(Insn::AggStep { @@ -1660,20 +1606,12 @@ pub fn translate_aggregation( delimiter_expr = ast::Expr::Literal(ast::Literal::String(String::from("\",\""))); } - translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; translate_expr( program, Some(referenced_tables), &delimiter_expr, delimiter_reg, - cursor_hint, None, )?; @@ -1692,14 +1630,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1714,14 +1645,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1751,20 +1675,12 @@ pub fn translate_aggregation( _ => crate::bail_parse_error!("Incorrect delimiter parameter"), }; - translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; translate_expr( program, Some(referenced_tables), &delimiter_expr, delimiter_reg, - cursor_hint, None, )?; @@ -1783,14 +1699,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1805,14 +1714,7 @@ pub fn translate_aggregation( } let expr = &agg.args[0]; let expr_reg = program.alloc_register(); - let _ = translate_expr( - program, - Some(referenced_tables), - expr, - expr_reg, - cursor_hint, - None, - )?; + let _ = translate_expr(program, Some(referenced_tables), expr, expr_reg, None)?; program.emit_insn(Insn::AggStep { acc_reg: target_register, col: expr_reg, @@ -1897,7 +1799,6 @@ pub fn translate_aggregation_groupby( &delimiter_expr, delimiter_reg, None, - None, )?; program.emit_insn(Insn::AggStep { @@ -1964,7 +1865,6 @@ pub fn translate_aggregation_groupby( &delimiter_expr, delimiter_reg, None, - None, )?; program.emit_insn(Insn::AggStep { diff --git a/core/translate/insert.rs b/core/translate/insert.rs index ea890e994..614cde8b2 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -98,7 +98,6 @@ pub fn translate_insert( expr, column_registers_start + col, None, - None, )?; } program.emit_insn(Insn::Yield { diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index bbb7f75a8..8dd1cd4de 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -343,14 +343,7 @@ impl ProgramBuilder { } // translate table to cursor id - pub fn resolve_cursor_id( - &self, - table_identifier: &str, - cursor_hint: Option, - ) -> CursorID { - if let Some(cursor_hint) = cursor_hint { - return cursor_hint; - } + pub fn resolve_cursor_id(&self, table_identifier: &str) -> CursorID { self.cursor_ref .iter() .position(|(t_ident, _)| { From 56b15193d08374fa27b2df557e4d370d67b84de0 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Sun, 24 Nov 2024 21:20:55 +0200 Subject: [PATCH 18/32] resolve aggregates from orderby as well --- core/translate/planner.rs | 6 ++++++ testing/orderby.test | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 02bf9cdcd..c2c5aa115 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -20,6 +20,9 @@ impl OperatorIdCounter { } fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) { + if aggs.iter().any(|a| a.original_expr == *expr) { + return; + } match expr { ast::Expr::FunctionCall { name, args, .. } => { let args_count = if let Some(args) = &args { @@ -433,6 +436,9 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

Date: Sun, 24 Nov 2024 21:39:21 +0200 Subject: [PATCH 19/32] test ordering by aggregate not mentioned in select --- testing/orderby.test | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/testing/orderby.test b/testing/orderby.test index 787483570..5155efcee 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -128,4 +128,12 @@ Christopher|6397 James|5921 Joseph|5711 Brian|5059 -William|5047} \ No newline at end of file +William|5047} + +do_execsql_test order-by-agg-not-mentioned-in-select { + select u.first_name, length(group_concat(u.last_name)) from users u group by u.first_name order by max(u.email) desc limit 5; +} {Louis|65 +Carolyn|118 +Katelyn|40 +Erik|88 +Collin|15} \ No newline at end of file From 7d5fa12bb78da76f3520dbbd7bbab064ae83eaae Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 25 Nov 2024 17:19:56 +0200 Subject: [PATCH 20/32] fix allocating wrong number of registers upfront for aggregation results --- core/translate/emitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 13fe7de07..78a3c5a0f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1008,7 +1008,7 @@ fn inner_loop_source_emit( let agg_final_label = program.allocate_label(); m.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); - let start_reg = program.alloc_registers(result_columns.len()); + let start_reg = program.alloc_registers(num_aggs); m.aggregation_start_register = Some(start_reg); for (i, agg) in aggregates.iter().enumerate() { let reg = start_reg + i; From bb8ba7fb0159b78f6c87522922d0f4ae1c459289 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 25 Nov 2024 17:20:22 +0200 Subject: [PATCH 21/32] add tests for arithmetic on two aggregates with no from clause --- testing/math.test | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/testing/math.test b/testing/math.test index 9d6e359b4..c567550e2 100644 --- a/testing/math.test +++ b/testing/math.test @@ -15,6 +15,18 @@ do_execsql_test add-int-float { SELECT 10 + 0.1 } {10.1} +do_execsql_test add-agg-int-agg-int { + SELECT sum(1) + sum(2) +} {3} + +do_execsql_test add-agg-int-agg-float { + SELECT sum(1) + sum(2.5) +} {3.5} + +do_execsql_test add-agg-float-agg-int { + SELECT sum(1.5) + sum(2) +} {3.5} + do_execsql_test subtract-int { SELECT 10 - 1 } {9} @@ -27,6 +39,18 @@ do_execsql_test subtract-int-float { SELECT 10 - 0.1 } {9.9} +do_execsql_test subtract-agg-int-agg-int { + SELECT sum(3) - sum(1) +} {2} + +do_execsql_test subtract-agg-int-agg-float { + SELECT sum(3) - sum(1.5) +} {1.5} + +do_execsql_test subtract-agg-float-agg-int { + SELECT sum(3.5) - sum(1) +} {2.5} + do_execsql_test multiply-int { SELECT 10 * 2 } {20} @@ -43,6 +67,18 @@ do_execsql_test multiply-float-int { SELECT 1.45 * 10 } {14.5} +do_execsql_test multiply-agg-int-agg-int { + SELECT sum(2) * sum(3) +} {6} + +do_execsql_test multiply-agg-int-agg-float { + SELECT sum(2) * sum(3.5) +} {7.0} + +do_execsql_test multiply-agg-float-agg-int { + SELECT sum(2.5) * sum(3) +} {7.5} + do_execsql_test divide-int { SELECT 10 / 2 } {5} @@ -79,6 +115,17 @@ do_execsql_test divide-null { SELECT null / null } {} +do_execsql_test divide-agg-int-agg-int { + SELECT sum(4) / sum(2) +} {2} + +do_execsql_test divide-agg-int-agg-float { + SELECT sum(4) / sum(2.0) +} {2.0} + +do_execsql_test divide-agg-float-agg-int { + SELECT sum(4.0) / sum(2) +} {2.0} do_execsql_test add-agg-int { From ac12e9c7fd4844677c973bdb58fb53d14c58da23 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 25 Nov 2024 18:25:09 +0200 Subject: [PATCH 22/32] No need for ResultSetColumn to be an enum --- core/translate/emitter.rs | 223 +++++++++++++------------------------- core/translate/plan.rs | 10 +- core/translate/planner.rs | 30 ++--- 3 files changed, 97 insertions(+), 166 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 78a3c5a0f..9f292f625 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -921,38 +921,34 @@ fn inner_loop_source_emit( // should be emitted in the SELECT clause order, not the ORDER BY clause order. let mut result_columns_to_skip: Option> = None; for (i, rc) in result_columns.iter().enumerate() { - match rc { - ResultSetColumn::Expr { - expr, - contains_aggregates, - } => { - assert!(!*contains_aggregates); - let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + if !rc.contains_aggregates { + let found = order_by + .iter() + .enumerate() + .find(|(_, (e, _))| e == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } - ResultSetColumn::Agg(agg) => { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &agg.original_expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + } else { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } } } @@ -976,16 +972,8 @@ fn inner_loop_source_emit( continue; } } - match rc { - ResultSetColumn::Expr { - expr, - contains_aggregates, - } => { - assert!(!*contains_aggregates); - translate_expr(program, Some(referenced_tables), expr, cur_reg, None)?; - } - other => unreachable!("{:?}", other), - } + assert!(!rc.contains_aggregates); + translate_expr(program, Some(referenced_tables), &rc.expr, cur_reg, None)?; m.result_column_indexes_in_orderby_sorter .insert(i, cur_idx_in_orderby_sorter); cur_idx_in_orderby_sorter += 1; @@ -1014,43 +1002,23 @@ fn inner_loop_source_emit( let reg = start_reg + i; translate_aggregation(program, referenced_tables, agg, reg)?; } - for (i, expr) in result_columns.iter().enumerate() { - match expr { - ResultSetColumn::Expr { - expr, - contains_aggregates, - } => { - if *contains_aggregates { - // Do nothing, aggregates will be computed above and this full result expression will be - // computed later - continue; - } - let reg = start_reg + num_aggs + i; - translate_expr(program, Some(referenced_tables), expr, reg, None)?; - } - ResultSetColumn::Agg(_) => { /* do nothing, aggregates are computed above */ } + for (i, rc) in result_columns.iter().enumerate() { + if rc.contains_aggregates { + // Do nothing, aggregates are computed above + continue; } + let reg = start_reg + num_aggs + i; + translate_expr(program, Some(referenced_tables), &rc.expr, reg, None)?; } Ok(()) } InnerLoopEmitTarget::ResultRow { limit } => { assert!(aggregates.is_none()); let start_reg = program.alloc_registers(result_columns.len()); - for (i, expr) in result_columns.iter().enumerate() { - match expr { - ResultSetColumn::Expr { - expr, - contains_aggregates, - } => { - assert!(!*contains_aggregates); - let reg = start_reg + i; - translate_expr(program, Some(referenced_tables), expr, reg, None)?; - } - other => unreachable!( - "Unexpected non-scalar result column in inner loop: {:?}", - other - ), - } + for (i, rc) in result_columns.iter().enumerate() { + assert!(!rc.contains_aggregates); + let reg = start_reg + i; + translate_expr(program, Some(referenced_tables), &rc.expr, reg, None)?; } emit_result_row( program, @@ -1483,34 +1451,34 @@ fn group_by_emit( let mut result_columns_to_skip: Option> = None; if let Some(order_by) = order_by { for (i, rc) in result_columns.iter().enumerate() { - match rc { - ResultSetColumn::Expr { expr, .. } => { - let found = order_by.iter().enumerate().find(|(_, (e, _))| e == expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + if !rc.contains_aggregates { + let found = order_by + .iter() + .enumerate() + .find(|(_, (e, _))| e == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } - ResultSetColumn::Agg(agg) => { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &agg.original_expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + } else { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } } } @@ -1542,29 +1510,13 @@ fn group_by_emit( continue; } } - match rc { - ResultSetColumn::Expr { expr, .. } => { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - } - ResultSetColumn::Agg(agg) => { - let found = aggregates.iter().enumerate().find(|(_, a)| **a == *agg); - if let Some((i, _)) = found { - program.emit_insn(Insn::Copy { - src_reg: agg_start_reg + i, - dst_reg: cur_reg, - amount: 0, - }); - } else { - unreachable!("agg {:?} not found", agg); - } - } - } + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + cur_reg, + Some(&precomputed_exprs_to_register), + )?; m.result_column_indexes_in_orderby_sorter .insert(i, res_col_idx_in_orderby_sorter); res_col_idx_in_orderby_sorter += 1; @@ -1647,29 +1599,13 @@ fn agg_without_group_by_emit( let output_reg = program.alloc_registers(result_columns.len()); for (i, rc) in result_columns.iter().enumerate() { - match rc { - ResultSetColumn::Expr { expr, .. } => { - translate_expr( - program, - Some(referenced_tables), - expr, - output_reg + i, - Some(&precomputed_exprs_to_register), - )?; - } - ResultSetColumn::Agg(agg) => { - let found = aggregates.iter().enumerate().find(|(_, a)| **a == *agg); - if let Some((i, _)) = found { - program.emit_insn(Insn::Copy { - src_reg: agg_start_reg + i, - dst_reg: output_reg + i, - amount: 0, - }); - } else { - unreachable!("agg {:?} not found", agg); - } - } - } + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + output_reg + i, + Some(&precomputed_exprs_to_register), + )?; } // This always emits a ResultRow because currently it can only be used for a single row result emit_result_row(program, output_reg, result_columns.len(), None); @@ -1698,17 +1634,14 @@ fn order_by_emit( ty: crate::schema::Type::Null, }); } - for (i, expr) in result_columns.iter().enumerate() { + for (i, rc) in result_columns.iter().enumerate() { if let Some(ref v) = m.result_columns_to_skip_in_orderby_sorter { if v.contains(&i) { continue; } } pseudo_columns.push(Column { - name: match expr { - ResultSetColumn::Expr { expr, .. } => expr.to_string(), - ResultSetColumn::Agg(agg) => agg.to_string(), - }, + name: rc.expr.to_string(), primary_key: false, ty: crate::schema::Type::Null, }); diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 6ab599372..ac75981f6 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -13,12 +13,10 @@ use crate::{ }; #[derive(Debug)] -pub enum ResultSetColumn { - Expr { - expr: ast::Expr, - contains_aggregates: bool, - }, - Agg(Aggregate), +pub struct ResultSetColumn { + pub expr: ast::Expr, + // TODO: encode which aggregates (e.g. index bitmask of plan.aggregates) are present in this column + pub contains_aggregates: bool, } #[derive(Debug)] diff --git a/core/translate/planner.rs b/core/translate/planner.rs index c2c5aa115..51706f108 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -60,6 +60,7 @@ fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec) { resolve_aggregates(lhs, aggs); resolve_aggregates(rhs, aggs); } + // TODO: handle other expressions that may contain aggregates _ => {} } } @@ -272,7 +273,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ for table_reference in plan.referenced_tables.iter() { for (idx, col) in table_reference.table.columns.iter().enumerate() { - plan.result_columns.push(ResultSetColumn::Expr { + plan.result_columns.push(ResultSetColumn { expr: ast::Expr::Column { database: None, // TODO: support different databases table: table_reference.table_index, @@ -296,7 +297,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ let cur_agg_count = aggregate_expressions.len(); resolve_aggregates(&expr, &mut aggregate_expressions); let contains_aggregates = cur_agg_count != aggregate_expressions.len(); - plan.result_columns.push(ResultSetColumn::Expr { + plan.result_columns.push(ResultSetColumn { expr: expr.clone(), contains_aggregates, }); @@ -364,7 +368,10 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ + expr => { let cur_agg_count = aggregate_expressions.len(); - resolve_aggregates(&lhs, &mut aggregate_expressions); - resolve_aggregates(&rhs, &mut aggregate_expressions); + resolve_aggregates(expr, &mut aggregate_expressions); let contains_aggregates = cur_agg_count != aggregate_expressions.len(); - plan.result_columns.push(ResultSetColumn::Expr { + plan.result_columns.push(ResultSetColumn { expr: expr.clone(), contains_aggregates, }); } - e => { - plan.result_columns.push(ResultSetColumn::Expr { - expr: e.clone(), - contains_aggregates: false, - }); - } } } } From 89569fa7a3242cef50f66069a3846550f405bcb3 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 25 Nov 2024 18:32:31 +0200 Subject: [PATCH 23/32] Remove redundant if-else after refactoring ResultSetColumn to struct --- core/translate/emitter.rs | 88 +++++++++++++-------------------------- 1 file changed, 30 insertions(+), 58 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 9f292f625..258dc9ce7 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -915,41 +915,26 @@ fn inner_loop_source_emit( InnerLoopEmitTarget::OrderBySorter { order_by } => { // We need to handle the case where we are emitting to sorter. // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. - // In case any of the sort keys are exactly equal to a result column, we need to skip emitting that result column. + // In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns // should be emitted in the SELECT clause order, not the ORDER BY clause order. let mut result_columns_to_skip: Option> = None; for (i, rc) in result_columns.iter().enumerate() { - if !rc.contains_aggregates { - let found = order_by - .iter() - .enumerate() - .find(|(_, (e, _))| e == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); - } - } else { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + // TODO: although this is an optimization and not strictly necessary, we should implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // length(foo) != LENGTH(foo) which causes the length to be computed twice + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } } let order_by_len = order_by.len(); @@ -1005,6 +990,8 @@ fn inner_loop_source_emit( for (i, rc) in result_columns.iter().enumerate() { if rc.contains_aggregates { // Do nothing, aggregates are computed above + // if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet, + // it will be computed after the aggregations are finalized. continue; } let reg = start_reg + num_aggs + i; @@ -1444,42 +1431,27 @@ fn group_by_emit( // We need to handle the case where we are emitting to sorter. // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. - // In case any of the sort keys are exactly equal to a result column, we need to skip emitting that result column. + // In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns // should be emitted in the SELECT clause order, not the ORDER BY clause order. let mut result_columns_to_skip: Option> = None; if let Some(order_by) = order_by { for (i, rc) in result_columns.iter().enumerate() { - if !rc.contains_aggregates { - let found = order_by - .iter() - .enumerate() - .find(|(_, (e, _))| e == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); - } - } else { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_columns_to_skip { + v.push(i); + } else { + result_columns_to_skip = Some(vec![i]); } + m.result_column_indexes_in_orderby_sorter.insert(i, j); } } } From c74981873e8d4252270ad078f14f6f81a7470799 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Mon, 25 Nov 2024 19:06:27 +0200 Subject: [PATCH 24/32] Extract ORDER BY result column deduping into a function --- core/translate/emitter.rs | 187 +++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 94 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 258dc9ce7..3f7527020 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -913,31 +913,9 @@ fn inner_loop_source_emit( Ok(()) } InnerLoopEmitTarget::OrderBySorter { order_by } => { - // We need to handle the case where we are emitting to sorter. - // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. - // In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. - // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. - // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns - // should be emitted in the SELECT clause order, not the ORDER BY clause order. - let mut result_columns_to_skip: Option> = None; - for (i, rc) in result_columns.iter().enumerate() { - // TODO: although this is an optimization and not strictly necessary, we should implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // length(foo) != LENGTH(foo) which causes the length to be computed twice - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); - } - } let order_by_len = order_by.len(); + let result_columns_to_skip = + orderby_deduplicate_result_columns(order_by, result_columns); let result_columns_to_skip_len = result_columns_to_skip .as_ref() .map(|v| v.len()) @@ -953,7 +931,10 @@ fn inner_loop_source_emit( let mut cur_idx_in_orderby_sorter = order_by_len; for (i, rc) in result_columns.iter().enumerate() { if let Some(ref v) = result_columns_to_skip { - if v.contains(&i) { + let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); + if let Some((_, result_column_idx)) = found { + m.result_column_indexes_in_orderby_sorter + .insert(i, *result_column_idx); continue; } } @@ -1429,74 +1410,21 @@ fn group_by_emit( precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); } - // We need to handle the case where we are emitting to sorter. - // In that case the first columns should be the sort key columns, and the rest is the result columns of the select. - // In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. - // We need to do this before rewriting the result columns to registers because we need to know which columns to skip. - // Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns - // should be emitted in the SELECT clause order, not the ORDER BY clause order. - let mut result_columns_to_skip: Option> = None; - if let Some(order_by) = order_by { - for (i, rc) in result_columns.iter().enumerate() { - // TODO: implement a custom equality check for expressions - // there are lots of examples where this breaks, even simple ones like - // sum(x) != SUM(x) - let found = order_by - .iter() - .enumerate() - .find(|(_, (expr, _))| expr == &rc.expr); - if let Some((j, _)) = found { - if let Some(ref mut v) = result_columns_to_skip { - v.push(i); - } else { - result_columns_to_skip = Some(vec![i]); - } - m.result_column_indexes_in_orderby_sorter.insert(i, j); - } - } - } - let order_by_len = order_by.as_ref().map(|v| v.len()).unwrap_or(0); - let result_columns_to_skip_len = result_columns_to_skip - .as_ref() - .map(|v| v.len()) - .unwrap_or(0); - let output_column_count = result_columns.len() + order_by_len - result_columns_to_skip_len; - let output_row_start_reg = program.alloc_registers(output_column_count); - let mut cur_reg = output_row_start_reg; - if let Some(order_by) = order_by { - for (expr, _) in order_by.iter() { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - cur_reg += 1; - } - } - let mut res_col_idx_in_orderby_sorter = order_by_len; - for (i, rc) in result_columns.iter().enumerate() { - if let Some(ref v) = result_columns_to_skip { - if v.contains(&i) { - continue; - } - } - translate_expr( - program, - Some(referenced_tables), - &rc.expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - m.result_column_indexes_in_orderby_sorter - .insert(i, res_col_idx_in_orderby_sorter); - res_col_idx_in_orderby_sorter += 1; - cur_reg += 1; - } - match order_by { None => { + let output_column_count = result_columns.len(); + let output_row_start_reg = program.alloc_registers(output_column_count); + let mut cur_reg = output_row_start_reg; + for (i, rc) in result_columns.iter().enumerate() { + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + cur_reg, + Some(&precomputed_exprs_to_register), + )?; + cur_reg += 1; + } emit_result_row( program, output_row_start_reg, @@ -1504,7 +1432,47 @@ fn group_by_emit( limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), ); } - Some(_) => { + Some(order_by) => { + let skipped_result_cols = orderby_deduplicate_result_columns(order_by, result_columns); + let skipped_result_cols_len = + skipped_result_cols.as_ref().map(|v| v.len()).unwrap_or(0); + let output_column_count = + result_columns.len() + order_by.len() - skipped_result_cols_len; + let output_row_start_reg = program.alloc_registers(output_column_count); + let mut cur_reg = output_row_start_reg; + for (expr, _) in order_by.iter() { + translate_expr( + program, + Some(referenced_tables), + expr, + cur_reg, + Some(&precomputed_exprs_to_register), + )?; + cur_reg += 1; + } + + let mut res_col_idx_in_orderby_sorter = order_by.len(); + for (i, rc) in result_columns.iter().enumerate() { + if let Some(ref v) = skipped_result_cols { + let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); + if let Some((_, result_col_idx_in_orderby_sorter)) = found { + m.result_column_indexes_in_orderby_sorter + .insert(i, *result_col_idx_in_orderby_sorter); + continue; + } + } + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + cur_reg, + Some(&precomputed_exprs_to_register), + )?; + m.result_column_indexes_in_orderby_sorter + .insert(i, res_col_idx_in_orderby_sorter); + res_col_idx_in_orderby_sorter += 1; + cur_reg += 1; + } sorter_insert( program, output_row_start_reg, @@ -1538,8 +1506,6 @@ fn group_by_emit( return_reg: group_by_metadata.subroutine_accumulator_clear_return_offset_register, }); - m.result_columns_to_skip_in_orderby_sorter = result_columns_to_skip; - Ok(()) } @@ -1732,3 +1698,36 @@ fn sorter_insert( record_reg, }); } + +/// We need to handle the case where we are emitting to sorter. +/// In that case the first columns should be the sort key columns, and the rest is the result columns of the select. +/// In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. +/// We need to do this before rewriting the result columns to registers because we need to know which columns to skip. +/// Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns +/// should be emitted in the SELECT clause order, not the ORDER BY clause order. +/// +/// If any result columsn can be skipped, returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) +fn orderby_deduplicate_result_columns( + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, +) -> Option> { + let mut result_column_remapping: Option> = None; + for (i, rc) in result_columns.iter().enumerate() { + // TODO: implement a custom equality check for expressions + // there are lots of examples where this breaks, even simple ones like + // sum(x) != SUM(x) + let found = order_by + .iter() + .enumerate() + .find(|(_, (expr, _))| expr == &rc.expr); + if let Some((j, _)) = found { + if let Some(ref mut v) = result_column_remapping { + v.push((i, j)); + } else { + result_column_remapping = Some(vec![(i, j)]); + } + } + } + + return result_column_remapping; +} From 3d27ef90f57630fc779ff427a6d2f95bc67669e2 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 26 Nov 2024 17:31:42 +0200 Subject: [PATCH 25/32] emitting result columns generally works the same way -> extract it --- core/translate/emitter.rs | 94 ++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 3f7527020..405079802 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -982,18 +982,13 @@ fn inner_loop_source_emit( } InnerLoopEmitTarget::ResultRow { limit } => { assert!(aggregates.is_none()); - let start_reg = program.alloc_registers(result_columns.len()); - for (i, rc) in result_columns.iter().enumerate() { - assert!(!rc.contains_aggregates); - let reg = start_reg + i; - translate_expr(program, Some(referenced_tables), &rc.expr, reg, None)?; - } - emit_result_row( + emit_select_result( program, - start_reg, - result_columns.len(), + referenced_tables, + result_columns, + None, limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), - ); + )?; Ok(()) } @@ -1412,25 +1407,13 @@ fn group_by_emit( match order_by { None => { - let output_column_count = result_columns.len(); - let output_row_start_reg = program.alloc_registers(output_column_count); - let mut cur_reg = output_row_start_reg; - for (i, rc) in result_columns.iter().enumerate() { - translate_expr( - program, - Some(referenced_tables), - &rc.expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - cur_reg += 1; - } - emit_result_row( + emit_select_result( program, - output_row_start_reg, - output_column_count, + referenced_tables, + result_columns, + Some(&precomputed_exprs_to_register), limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), - ); + )?; } Some(order_by) => { let skipped_result_cols = orderby_deduplicate_result_columns(order_by, result_columns); @@ -1535,18 +1518,15 @@ fn agg_without_group_by_emit( precomputed_exprs_to_register.push((&agg.original_expr, agg_start_reg + i)); } - let output_reg = program.alloc_registers(result_columns.len()); - for (i, rc) in result_columns.iter().enumerate() { - translate_expr( - program, - Some(referenced_tables), - &rc.expr, - output_reg + i, - Some(&precomputed_exprs_to_register), - )?; - } // This always emits a ResultRow because currently it can only be used for a single row result - emit_result_row(program, output_reg, result_columns.len(), None); + // Limit is None because we early exit on limit 0 and the max rows here is 1 + emit_select_result( + program, + referenced_tables, + result_columns, + Some(&precomputed_exprs_to_register), + None, + )?; Ok(()) } @@ -1631,12 +1611,12 @@ fn order_by_emit( dest: reg, }); } - emit_result_row( + emit_result_row_and_limit( program, start_reg, result_columns.len(), limit.map(|l| (l, sorting_done_label)), - ); + )?; program.emit_insn_with_label_dependency( Insn::SorterNext { @@ -1651,16 +1631,16 @@ fn order_by_emit( Ok(()) } -/// Emits the bytecode for emitting a result row. -fn emit_result_row( +/// Emits the bytecode for: result row and limit. +fn emit_result_row_and_limit( program: &mut ProgramBuilder, start_reg: usize, - column_count: usize, + result_columns_len: usize, limit: Option<(usize, BranchOffset)>, -) { +) -> Result<()> { program.emit_insn(Insn::ResultRow { start_reg, - count: column_count, + count: result_columns_len, }); if let Some((limit, jump_label_on_limit_reached)) = limit { let limit_reg = program.alloc_register(); @@ -1677,6 +1657,30 @@ fn emit_result_row( jump_label_on_limit_reached, ); } + Ok(()) +} + +/// Emits the bytecode for: all result columns, result row, and limit. +fn emit_select_result( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + result_columns: &[ResultSetColumn], + precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, + limit: Option<(usize, BranchOffset)>, +) -> Result<()> { + let start_reg = program.alloc_registers(result_columns.len()); + for (i, rc) in result_columns.iter().enumerate() { + let reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + reg, + precomputed_exprs_to_register, + )?; + } + emit_result_row_and_limit(program, start_reg, result_columns.len(), limit)?; + Ok(()) } /// Emits the bytecode for inserting a row into a sorter. From 122546444f56d431fccc30d0f250f8ec17a3c99b Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 26 Nov 2024 17:40:49 +0200 Subject: [PATCH 26/32] extract function order_by_sorter_insert() --- core/translate/emitter.rs | 162 ++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 87 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 405079802..9045643f4 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -913,48 +913,15 @@ fn inner_loop_source_emit( Ok(()) } InnerLoopEmitTarget::OrderBySorter { order_by } => { - let order_by_len = order_by.len(); - let result_columns_to_skip = - orderby_deduplicate_result_columns(order_by, result_columns); - let result_columns_to_skip_len = result_columns_to_skip - .as_ref() - .map(|v| v.len()) - .unwrap_or(0); - let orderby_sorter_column_count = - order_by_len + result_columns.len() - result_columns_to_skip_len; - let start_reg = program.alloc_registers(orderby_sorter_column_count); - for (i, (expr, _)) in order_by.iter().enumerate() { - let key_reg = start_reg + i; - translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; - } - let mut cur_reg = start_reg + order_by_len; - let mut cur_idx_in_orderby_sorter = order_by_len; - for (i, rc) in result_columns.iter().enumerate() { - if let Some(ref v) = result_columns_to_skip { - let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); - if let Some((_, result_column_idx)) = found { - m.result_column_indexes_in_orderby_sorter - .insert(i, *result_column_idx); - continue; - } - } - assert!(!rc.contains_aggregates); - translate_expr(program, Some(referenced_tables), &rc.expr, cur_reg, None)?; - m.result_column_indexes_in_orderby_sorter - .insert(i, cur_idx_in_orderby_sorter); - cur_idx_in_orderby_sorter += 1; - cur_reg += 1; - } - - let sort_metadata = m.sort_metadata.as_mut().unwrap(); - sorter_insert( + order_by_sorter_insert( program, - start_reg, - orderby_sorter_column_count, - sort_metadata.sort_cursor, - sort_metadata.sorter_data_register, - ); - + referenced_tables, + order_by, + result_columns, + &mut m.result_column_indexes_in_orderby_sorter, + &m.sort_metadata.as_ref().unwrap(), + None, + )?; Ok(()) } InnerLoopEmitTarget::AggStep => { @@ -1416,53 +1383,15 @@ fn group_by_emit( )?; } Some(order_by) => { - let skipped_result_cols = orderby_deduplicate_result_columns(order_by, result_columns); - let skipped_result_cols_len = - skipped_result_cols.as_ref().map(|v| v.len()).unwrap_or(0); - let output_column_count = - result_columns.len() + order_by.len() - skipped_result_cols_len; - let output_row_start_reg = program.alloc_registers(output_column_count); - let mut cur_reg = output_row_start_reg; - for (expr, _) in order_by.iter() { - translate_expr( - program, - Some(referenced_tables), - expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - cur_reg += 1; - } - - let mut res_col_idx_in_orderby_sorter = order_by.len(); - for (i, rc) in result_columns.iter().enumerate() { - if let Some(ref v) = skipped_result_cols { - let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); - if let Some((_, result_col_idx_in_orderby_sorter)) = found { - m.result_column_indexes_in_orderby_sorter - .insert(i, *result_col_idx_in_orderby_sorter); - continue; - } - } - translate_expr( - program, - Some(referenced_tables), - &rc.expr, - cur_reg, - Some(&precomputed_exprs_to_register), - )?; - m.result_column_indexes_in_orderby_sorter - .insert(i, res_col_idx_in_orderby_sorter); - res_col_idx_in_orderby_sorter += 1; - cur_reg += 1; - } - sorter_insert( + order_by_sorter_insert( program, - output_row_start_reg, - output_column_count, - m.sort_metadata.as_ref().unwrap().sort_cursor, - group_by_metadata.sorter_key_register, - ); + referenced_tables, + order_by, + result_columns, + &mut m.result_column_indexes_in_orderby_sorter, + &m.sort_metadata.as_ref().unwrap(), + Some(&precomputed_exprs_to_register), + )?; } } @@ -1703,6 +1632,65 @@ fn sorter_insert( }); } +fn order_by_sorter_insert( + program: &mut ProgramBuilder, + referenced_tables: &[BTreeTableReference], + order_by: &Vec<(ast::Expr, Direction)>, + result_columns: &Vec, + result_column_indexes_in_orderby_sorter: &mut HashMap, + sort_metadata: &SortMetadata, + precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, +) -> Result<()> { + let order_by_len = order_by.len(); + let result_columns_to_skip = orderby_deduplicate_result_columns(order_by, result_columns); + let result_columns_to_skip_len = result_columns_to_skip + .as_ref() + .map(|v| v.len()) + .unwrap_or(0); + let orderby_sorter_column_count = + order_by_len + result_columns.len() - result_columns_to_skip_len; + let start_reg = program.alloc_registers(orderby_sorter_column_count); + for (i, (expr, _)) in order_by.iter().enumerate() { + let key_reg = start_reg + i; + translate_expr( + program, + Some(referenced_tables), + expr, + key_reg, + precomputed_exprs_to_register, + )?; + } + let mut cur_reg = start_reg + order_by_len; + let mut cur_idx_in_orderby_sorter = order_by_len; + for (i, rc) in result_columns.iter().enumerate() { + if let Some(ref v) = result_columns_to_skip { + let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); + if let Some((_, result_column_idx)) = found { + result_column_indexes_in_orderby_sorter.insert(i, *result_column_idx); + continue; + } + } + translate_expr( + program, + Some(referenced_tables), + &rc.expr, + cur_reg, + precomputed_exprs_to_register, + )?; + result_column_indexes_in_orderby_sorter.insert(i, cur_idx_in_orderby_sorter); + cur_idx_in_orderby_sorter += 1; + cur_reg += 1; + } + + sorter_insert( + program, + start_reg, + orderby_sorter_column_count, + sort_metadata.sort_cursor, + sort_metadata.sorter_data_register, + ); + Ok(()) +} /// We need to handle the case where we are emitting to sorter. /// In that case the first columns should be the sort key columns, and the rest is the result columns of the select. /// In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. From 7f04f8e88fdc7a31dc77d585a8399fafe3396845 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 26 Nov 2024 17:41:08 +0200 Subject: [PATCH 27/32] rename --- core/translate/emitter.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 9045643f4..fe8fbf4cd 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1632,6 +1632,7 @@ fn sorter_insert( }); } +/// Emits the bytecode for inserting a row into an ORDER BY sorter. fn order_by_sorter_insert( program: &mut ProgramBuilder, referenced_tables: &[BTreeTableReference], @@ -1642,7 +1643,7 @@ fn order_by_sorter_insert( precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result<()> { let order_by_len = order_by.len(); - let result_columns_to_skip = orderby_deduplicate_result_columns(order_by, result_columns); + let result_columns_to_skip = order_by_deduplicate_result_columns(order_by, result_columns); let result_columns_to_skip_len = result_columns_to_skip .as_ref() .map(|v| v.len()) @@ -1699,7 +1700,7 @@ fn order_by_sorter_insert( /// should be emitted in the SELECT clause order, not the ORDER BY clause order. /// /// If any result columsn can be skipped, returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) -fn orderby_deduplicate_result_columns( +fn order_by_deduplicate_result_columns( order_by: &Vec<(ast::Expr, Direction)>, result_columns: &Vec, ) -> Option> { From 1b34698872e8efcf00e3e775ed0955e526d0187c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 26 Nov 2024 18:28:19 +0200 Subject: [PATCH 28/32] add comments and rename some misleading label variables --- core/translate/emitter.rs | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index fe8fbf4cd..f8f5d65de 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -881,11 +881,14 @@ fn inner_loop_source_emit( let column_count = sort_keys_count + aggregate_arguments_count; let start_reg = program.alloc_registers(column_count); let mut cur_reg = start_reg; + + // The group by sorter rows will contain the grouping keys first. They are also the sort keys. for expr in group_by.iter() { let key_reg = cur_reg; cur_reg += 1; translate_expr(program, Some(referenced_tables), expr, key_reg, None)?; } + // Then we have the aggregate arguments. for agg in aggregates.iter() { // Here we are collecting scalars for the group by sorter, which will include // both the group by expressions and the aggregate arguments. @@ -900,6 +903,9 @@ fn inner_loop_source_emit( } } + // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. + // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. + let group_by_metadata = m.group_by_metadata.as_ref().unwrap(); sorter_insert( @@ -931,6 +937,11 @@ fn inner_loop_source_emit( let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(num_aggs); m.aggregation_start_register = Some(start_reg); + + // In planner.rs, we have collected all aggregates from the SELECT clause, including ones where the aggregate is embedded inside + // a more complex expression. Some examples: length(sum(x)), sum(x) + avg(y), sum(x) + 1, etc. + // The result of those more complex expressions depends on the final result of the aggregate, so we don't translate the complete expressions here. + // Instead, we translate the aggregates + any expressions that do not contain aggregates. for (i, agg) in aggregates.iter().enumerate() { let reg = start_reg + i; translate_aggregation(program, referenced_tables, agg, reg)?; @@ -948,7 +959,10 @@ fn inner_loop_source_emit( Ok(()) } InnerLoopEmitTarget::ResultRow { limit } => { - assert!(aggregates.is_none()); + assert!( + aggregates.is_none(), + "We should not get here with aggregates" + ); emit_select_result( program, referenced_tables, @@ -1112,7 +1126,7 @@ fn group_by_emit( referenced_tables: &[BTreeTableReference], m: &mut Metadata, ) -> Result<()> { - let sorter_data_label = program.allocate_label(); + let sort_loop_start_label = program.allocate_label(); let grouping_done_label = program.allocate_label(); let group_by_metadata = m.group_by_metadata.as_mut().unwrap(); @@ -1166,7 +1180,7 @@ fn group_by_emit( grouping_done_label, ); - program.defer_label_resolution(sorter_data_label, program.offset() as usize); + program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); // Read a row from the sorted data in the sorter into the pseudo cursor program.emit_insn(Insn::SorterData { cursor_id: group_by_metadata.sort_cursor, @@ -1299,9 +1313,9 @@ fn group_by_emit( program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: group_by_metadata.sort_cursor, - pc_if_next: sorter_data_label, + pc_if_next: sort_loop_start_label, }, - sorter_data_label, + sort_loop_start_label, ); program.resolve_label(grouping_done_label, program.offset()); @@ -1460,7 +1474,7 @@ fn agg_without_group_by_emit( Ok(()) } -/// Emits the bytecode for processing an ORDER BY clause. +/// Emits the bytecode for outputting rows from an ORDER BY sorter. /// This is called when the main query execution loop has finished processing, /// and we can now emit rows from the ORDER BY sorter. fn order_by_emit( @@ -1470,18 +1484,20 @@ fn order_by_emit( limit: Option, m: &mut Metadata, ) -> Result<()> { - let sorter_data_label = program.allocate_label(); - let sorting_done_label = program.allocate_label(); + let sort_loop_start_label = program.allocate_label(); + let sort_loop_end_label = program.allocate_label(); program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); let mut pseudo_columns = vec![]; for (i, _) in order_by.iter().enumerate() { pseudo_columns.push(Column { + // Names don't matter. We are tracking which result column is in which position in the ORDER BY clause in m.result_column_indexes_in_orderby_sorter. name: format!("sort_key_{}", i), primary_key: false, ty: crate::schema::Type::Null, }); } for (i, rc) in result_columns.iter().enumerate() { + // If any result columns are not in the ORDER BY sorter, it's because they are equal to a sort key and were already added to the pseudo columns above. if let Some(ref v) = m.result_columns_to_skip_in_orderby_sorter { if v.contains(&i) { continue; @@ -1517,19 +1533,20 @@ fn order_by_emit( program.emit_insn_with_label_dependency( Insn::SorterSort { cursor_id: sort_metadata.sort_cursor, - pc_if_empty: sorting_done_label, + pc_if_empty: sort_loop_end_label, }, - sorting_done_label, + sort_loop_end_label, ); - program.defer_label_resolution(sorter_data_label, program.offset() as usize); + program.defer_label_resolution(sort_loop_start_label, program.offset() as usize); program.emit_insn(Insn::SorterData { cursor_id: sort_metadata.sort_cursor, dest_reg: sort_metadata.sorter_data_register, pseudo_cursor, }); - // EMIT COLUMNS FROM SORTER AND EMIT ROW + // We emit the columns in SELECT order, not sorter order (sorter always has the sort keys first). + // This is tracked in m.result_column_indexes_in_orderby_sorter. let cursor_id = pseudo_cursor; let start_reg = program.alloc_registers(result_columns.len()); for i in 0..result_columns.len() { @@ -1544,18 +1561,18 @@ fn order_by_emit( program, start_reg, result_columns.len(), - limit.map(|l| (l, sorting_done_label)), + limit.map(|l| (l, sort_loop_end_label)), )?; program.emit_insn_with_label_dependency( Insn::SorterNext { cursor_id: sort_metadata.sort_cursor, - pc_if_next: sorter_data_label, + pc_if_next: sort_loop_start_label, }, - sorter_data_label, + sort_loop_start_label, ); - program.resolve_label(sorting_done_label, program.offset()); + program.resolve_label(sort_loop_end_label, program.offset()); Ok(()) } @@ -1643,11 +1660,14 @@ fn order_by_sorter_insert( precomputed_exprs_to_register: Option<&Vec<(&ast::Expr, usize)>>, ) -> Result<()> { let order_by_len = order_by.len(); + // If any result columns can be skipped due to being an exact duplicate of a sort key, we need to know which ones and their new index in the ORDER BY sorter. let result_columns_to_skip = order_by_deduplicate_result_columns(order_by, result_columns); let result_columns_to_skip_len = result_columns_to_skip .as_ref() .map(|v| v.len()) .unwrap_or(0); + + // The ORDER BY sorter has the sort keys first, then the result columns. let orderby_sorter_column_count = order_by_len + result_columns.len() - result_columns_to_skip_len; let start_reg = program.alloc_registers(orderby_sorter_column_count); @@ -1666,6 +1686,7 @@ fn order_by_sorter_insert( for (i, rc) in result_columns.iter().enumerate() { if let Some(ref v) = result_columns_to_skip { let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); + // If the result column is in the list of columns to skip, we need to know its new index in the ORDER BY sorter. if let Some((_, result_column_idx)) = found { result_column_indexes_in_orderby_sorter.insert(i, *result_column_idx); continue; From 7d569aee1f0fa7f5e17c01724da41251288caec8 Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Tue, 26 Nov 2024 18:37:06 +0200 Subject: [PATCH 29/32] fix stupid comment --- core/translate/emitter.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index f8f5d65de..c286bb017 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1713,14 +1713,12 @@ fn order_by_sorter_insert( ); Ok(()) } -/// We need to handle the case where we are emitting to sorter. -/// In that case the first columns should be the sort key columns, and the rest is the result columns of the select. -/// In case any of the sort keys are exactly equal to a result column, we can skip emitting that result column. -/// We need to do this before rewriting the result columns to registers because we need to know which columns to skip. -/// Moreover, we need to keep track what index in the ORDER BY sorter the result columns have, because the result columns -/// should be emitted in the SELECT clause order, not the ORDER BY clause order. + +/// In case any of the ORDER BY sort keys are exactly equal to a result column, we can skip emitting that result column. +/// If we skip a result column, we need to keep track what index in the ORDER BY sorter the result columns have, +/// because the result columns should be emitted in the SELECT clause order, not the ORDER BY clause order. /// -/// If any result columsn can be skipped, returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) +/// If any result columns can be skipped, this returns list of 2-tuples of (SkippedResultColumnIndex: usize, ResultColumnIndexInOrderBySorter: usize) fn order_by_deduplicate_result_columns( order_by: &Vec<(ast::Expr, Direction)>, result_columns: &Vec, From db462530f1969b500b97dabdf3845f9ef730625a Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 27 Nov 2024 19:27:36 +0200 Subject: [PATCH 30/32] metadata instead of m --- core/translate/emitter.rs | 168 +++++++++++++++++++++----------------- 1 file changed, 92 insertions(+), 76 deletions(-) diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index c286bb017..8548b28b1 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -259,11 +259,13 @@ pub fn emit_program( fn init_order_by( program: &mut ProgramBuilder, order_by: &Vec<(ast::Expr, Direction)>, - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { - m.termination_label_stack.push(program.allocate_label()); + metadata + .termination_label_stack + .push(program.allocate_label()); let sort_cursor = program.alloc_cursor_id(None, None); - m.sort_metadata = Some(SortMetadata { + metadata.sort_metadata = Some(SortMetadata { sort_cursor, sorter_data_register: program.alloc_register(), }); @@ -284,10 +286,10 @@ fn init_group_by( program: &mut ProgramBuilder, group_by: &Vec, aggregates: &Vec, - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); + metadata.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); let sort_cursor = program.alloc_cursor_id(None, None); @@ -343,9 +345,9 @@ fn init_group_by( subroutine_accumulator_clear_label, ); - m.aggregation_start_register = Some(agg_exprs_start_reg); + metadata.aggregation_start_register = Some(agg_exprs_start_reg); - m.group_by_metadata = Some(GroupByMetadata { + metadata.group_by_metadata = Some(GroupByMetadata { sort_cursor, subroutine_accumulator_clear_label, subroutine_accumulator_clear_return_offset_register, @@ -365,7 +367,7 @@ fn init_group_by( fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { match source { SourceOperator::Join { @@ -382,10 +384,10 @@ fn init_source( check_match_flag_label: program.allocate_label(), on_match_jump_to_label: program.allocate_label(), }; - m.left_joins.insert(*id, lj_metadata); + metadata.left_joins.insert(*id, lj_metadata); } - init_source(program, left, m)?; - init_source(program, right, m)?; + init_source(program, left, metadata)?; + init_source(program, right, metadata)?; return Ok(()); } @@ -400,7 +402,7 @@ fn init_source( ); let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); - m.next_row_labels.insert(*id, next_row_label); + metadata.next_row_labels.insert(*id, next_row_label); program.emit_insn(Insn::OpenReadAsync { cursor_id, root_page, @@ -424,11 +426,11 @@ fn init_source( if !matches!(search, Search::PrimaryKeyEq { .. }) { // Primary key equality search is handled with a SeekRowid instruction which does not loop, since it is a single row lookup. - m.next_row_labels.insert(*id, next_row_label); + metadata.next_row_labels.insert(*id, next_row_label); } let scan_loop_body_label = program.allocate_label(); - m.scan_loop_body_labels.push(scan_loop_body_label); + metadata.scan_loop_body_labels.push(scan_loop_body_label); program.emit_insn(Insn::OpenReadAsync { cursor_id: table_cursor_id, root_page: table_reference.table.root_page, @@ -460,7 +462,7 @@ fn open_loop( program: &mut ProgramBuilder, source: &mut SourceOperator, referenced_tables: &[BTreeTableReference], - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { match source { SourceOperator::Join { @@ -471,25 +473,27 @@ fn open_loop( outer, .. } => { - open_loop(program, left, referenced_tables, m)?; + open_loop(program, left, referenced_tables, metadata)?; - let mut jump_target_when_false = *m + let mut jump_target_when_false = *metadata .next_row_labels .get(&right.id()) - .or(m.next_row_labels.get(&left.id())) - .unwrap_or(m.termination_label_stack.last().unwrap()); + .or(metadata.next_row_labels.get(&left.id())) + .unwrap_or(metadata.termination_label_stack.last().unwrap()); if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); + let lj_meta = metadata.left_joins.get(id).unwrap(); program.emit_insn(Insn::Integer { value: 0, dest: lj_meta.match_flag_register, }); jump_target_when_false = lj_meta.check_match_flag_label; } - m.next_row_labels.insert(right.id(), jump_target_when_false); + metadata + .next_row_labels + .insert(right.id(), jump_target_when_false); - open_loop(program, right, referenced_tables, m)?; + open_loop(program, right, referenced_tables, metadata)?; if let Some(predicates) = predicates { let jump_target_when_true = program.allocate_label(); @@ -511,7 +515,7 @@ fn open_loop( } if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); + let lj_meta = metadata.left_joins.get(id).unwrap(); program.defer_label_resolution( lj_meta.set_match_flag_true_label, program.offset() as usize, @@ -540,7 +544,7 @@ fn open_loop( program.emit_insn(Insn::RewindAsync { cursor_id }); } let scan_loop_body_label = program.allocate_label(); - let halt_label = m.termination_label_stack.last().unwrap(); + let halt_label = metadata.termination_label_stack.last().unwrap(); program.emit_insn_with_label_dependency( if iter_dir .as_ref() @@ -558,10 +562,10 @@ fn open_loop( }, *halt_label, ); - m.scan_loop_body_labels.push(scan_loop_body_label); + metadata.scan_loop_body_labels.push(scan_loop_body_label); program.defer_label_resolution(scan_loop_body_label, program.offset() as usize); - let jump_label = m.next_row_labels.get(id).unwrap_or(halt_label); + let jump_label = metadata.next_row_labels.get(id).unwrap_or(halt_label); if let Some(preds) = predicates { for expr in preds { let jump_target_when_true = program.allocate_label(); @@ -600,7 +604,7 @@ fn open_loop( } else { None }; - let scan_loop_body_label = *m.scan_loop_body_labels.last().unwrap(); + let scan_loop_body_label = *metadata.scan_loop_body_labels.last().unwrap(); let cmp_reg = program.alloc_register(); let (cmp_expr, cmp_op) = match search { Search::IndexSearch { @@ -631,7 +635,7 @@ fn open_loop( cursor_id: index_cursor_id.unwrap_or(table_cursor_id), start_reg: cmp_reg, num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), + target_pc: *metadata.termination_label_stack.last().unwrap(), }, ast::Operator::Greater | ast::Operator::Less @@ -640,11 +644,11 @@ fn open_loop( cursor_id: index_cursor_id.unwrap_or(table_cursor_id), start_reg: cmp_reg, num_regs: 1, - target_pc: *m.termination_label_stack.last().unwrap(), + target_pc: *metadata.termination_label_stack.last().unwrap(), }, _ => unreachable!(), }, - *m.termination_label_stack.last().unwrap(), + *metadata.termination_label_stack.last().unwrap(), ); if *cmp_op == ast::Operator::Less || *cmp_op == ast::Operator::LessEquals { translate_expr(program, Some(referenced_tables), cmp_expr, cmp_reg, None)?; @@ -661,10 +665,10 @@ fn open_loop( // // For primary key searches we emit RowId and then compare it to the seek value. - let abort_jump_target = *m + let abort_jump_target = *metadata .next_row_labels .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); + .unwrap_or(metadata.termination_label_stack.last().unwrap()); match cmp_op { ast::Operator::Equals | ast::Operator::LessEquals => { if let Some(index_cursor_id) = index_cursor_id { @@ -731,10 +735,10 @@ fn open_loop( } } - let jump_label = m + let jump_label = metadata .next_row_labels .get(id) - .unwrap_or(m.termination_label_stack.last().unwrap()); + .unwrap_or(metadata.termination_label_stack.last().unwrap()); if let Search::PrimaryKeyEq { cmp_expr } = search { let src_reg = program.alloc_register(); @@ -797,7 +801,11 @@ pub enum InnerLoopEmitTarget<'a> { /// Emits the bytecode for the inner loop of a query. /// At this point the cursors for all tables have been opened and rewound. -fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metadata) -> Result<()> { +fn inner_loop_emit( + program: &mut ProgramBuilder, + plan: &mut Plan, + metadata: &mut Metadata, +) -> Result<()> { if let Some(wc) = &plan.where_clause { for predicate in wc.iter() { if predicate.is_always_false()? { @@ -817,7 +825,7 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada program, &plan.result_columns, &plan.aggregates, - m, + metadata, InnerLoopEmitTarget::GroupBySorter { group_by, aggregates: &plan.aggregates.as_ref().unwrap(), @@ -832,7 +840,7 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada program, &plan.result_columns, &plan.aggregates, - m, + metadata, InnerLoopEmitTarget::AggStep, &plan.referenced_tables, ); @@ -843,7 +851,7 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada program, &plan.result_columns, &plan.aggregates, - m, + metadata, InnerLoopEmitTarget::OrderBySorter { order_by }, &plan.referenced_tables, ); @@ -853,7 +861,7 @@ fn inner_loop_emit(program: &mut ProgramBuilder, plan: &mut Plan, m: &mut Metada program, &plan.result_columns, &plan.aggregates, - m, + metadata, InnerLoopEmitTarget::ResultRow { limit: plan.limit }, &plan.referenced_tables, ); @@ -866,7 +874,7 @@ fn inner_loop_source_emit( program: &mut ProgramBuilder, result_columns: &Vec, aggregates: &Option>, - m: &mut Metadata, + metadata: &mut Metadata, emit_target: InnerLoopEmitTarget, referenced_tables: &[BTreeTableReference], ) -> Result<()> { @@ -906,7 +914,7 @@ fn inner_loop_source_emit( // TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate. // We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed. - let group_by_metadata = m.group_by_metadata.as_ref().unwrap(); + let group_by_metadata = metadata.group_by_metadata.as_ref().unwrap(); sorter_insert( program, @@ -924,8 +932,8 @@ fn inner_loop_source_emit( referenced_tables, order_by, result_columns, - &mut m.result_column_indexes_in_orderby_sorter, - &m.sort_metadata.as_ref().unwrap(), + &mut metadata.result_column_indexes_in_orderby_sorter, + &metadata.sort_metadata.as_ref().unwrap(), None, )?; Ok(()) @@ -933,10 +941,10 @@ fn inner_loop_source_emit( InnerLoopEmitTarget::AggStep => { let aggregates = aggregates.as_ref().unwrap(); let agg_final_label = program.allocate_label(); - m.termination_label_stack.push(agg_final_label); + metadata.termination_label_stack.push(agg_final_label); let num_aggs = aggregates.len(); let start_reg = program.alloc_registers(num_aggs); - m.aggregation_start_register = Some(start_reg); + metadata.aggregation_start_register = Some(start_reg); // In planner.rs, we have collected all aggregates from the SELECT clause, including ones where the aggregate is embedded inside // a more complex expression. Some examples: length(sum(x)), sum(x) + avg(y), sum(x) + 1, etc. @@ -968,7 +976,7 @@ fn inner_loop_source_emit( referenced_tables, result_columns, None, - limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), + limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), )?; Ok(()) @@ -982,7 +990,7 @@ fn inner_loop_source_emit( fn close_loop( program: &mut ProgramBuilder, source: &SourceOperator, - m: &mut Metadata, + metadata: &mut Metadata, referenced_tables: &[BTreeTableReference], ) -> Result<()> { match source { @@ -993,10 +1001,10 @@ fn close_loop( outer, .. } => { - close_loop(program, right, m, referenced_tables)?; + close_loop(program, right, metadata, referenced_tables)?; if *outer { - let lj_meta = m.left_joins.get(id).unwrap(); + let lj_meta = metadata.left_joins.get(id).unwrap(); // If the left join match flag has been set to 1, we jump to the next row on the outer table (result row has been emitted already) program.resolve_label(lj_meta.check_match_flag_label, program.offset()); program.emit_insn_with_label_dependency( @@ -1029,13 +1037,13 @@ fn close_loop( ); } let next_row_label = if *outer { - m.left_joins.get(id).unwrap().on_match_jump_to_label + metadata.left_joins.get(id).unwrap().on_match_jump_to_label } else { - *m.next_row_labels.get(&right.id()).unwrap() + *metadata.next_row_labels.get(&right.id()).unwrap() }; // This points to the NextAsync instruction of the left table program.resolve_label(next_row_label, program.offset()); - close_loop(program, left, m, referenced_tables)?; + close_loop(program, left, metadata, referenced_tables)?; Ok(()) } @@ -1046,7 +1054,7 @@ fn close_loop( .. } => { let cursor_id = program.resolve_cursor_id(&table_reference.table_identifier); - program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); if iter_dir .as_ref() .is_some_and(|dir| *dir == IterationDirection::Backwards) @@ -1055,7 +1063,7 @@ fn close_loop( } else { program.emit_insn(Insn::NextAsync { cursor_id }); } - let jump_label = m.scan_loop_body_labels.pop().unwrap(); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); if iter_dir .as_ref() @@ -1096,9 +1104,9 @@ fn close_loop( } Search::PrimaryKeyEq { .. } => unreachable!(), }; - program.resolve_label(*m.next_row_labels.get(id).unwrap(), program.offset()); + program.resolve_label(*metadata.next_row_labels.get(id).unwrap(), program.offset()); program.emit_insn(Insn::NextAsync { cursor_id }); - let jump_label = m.scan_loop_body_labels.pop().unwrap(); + let jump_label = metadata.scan_loop_body_labels.pop().unwrap(); program.emit_insn_with_label_dependency( Insn::NextAwait { cursor_id, @@ -1124,11 +1132,11 @@ fn group_by_emit( aggregates: &Vec, limit: Option, referenced_tables: &[BTreeTableReference], - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let grouping_done_label = program.allocate_label(); - let group_by_metadata = m.group_by_metadata.as_mut().unwrap(); + let group_by_metadata = metadata.group_by_metadata.as_mut().unwrap(); let GroupByMetadata { group_exprs_comparison_register: comparison_register, @@ -1143,7 +1151,7 @@ fn group_by_emit( sorter_key_register, .. } = *group_by_metadata; - let halt_label = *m.termination_label_stack.first().unwrap(); + let halt_label = *metadata.termination_label_stack.first().unwrap(); // all group by columns and all arguments of agg functions are in the sorter. // the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same) @@ -1249,7 +1257,7 @@ fn group_by_emit( target_pc: halt_label, decrement_by: 0, }, - m.termination_label_stack[0], + metadata.termination_label_stack[0], ); program.add_comment(program.offset(), "goto clear accumulator subroutine"); @@ -1263,7 +1271,7 @@ fn group_by_emit( // Accumulate the values into the aggregations program.resolve_label(agg_step_label, program.offset()); - let start_reg = m.aggregation_start_register.unwrap(); + let start_reg = metadata.aggregation_start_register.unwrap(); let mut cursor_index = group_by.len(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = start_reg + i; @@ -1330,7 +1338,8 @@ fn group_by_emit( ); program.add_comment(program.offset(), "group by finished"); - let termination_label = m.termination_label_stack[m.termination_label_stack.len() - 2]; + let termination_label = + metadata.termination_label_stack[metadata.termination_label_stack.len() - 2]; program.emit_insn_with_label_dependency( Insn::Goto { target_pc: termination_label, @@ -1351,7 +1360,7 @@ fn group_by_emit( ); program.add_comment(program.offset(), "output group by row subroutine start"); - let termination_label = *m.termination_label_stack.last().unwrap(); + let termination_label = *metadata.termination_label_stack.last().unwrap(); program.emit_insn_with_label_dependency( Insn::IfPos { reg: group_by_metadata.data_in_accumulator_indicator_register, @@ -1364,8 +1373,11 @@ fn group_by_emit( return_reg: group_by_metadata.subroutine_accumulator_output_return_offset_register, }); - let agg_start_reg = m.aggregation_start_register.unwrap(); - program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); + let agg_start_reg = metadata.aggregation_start_register.unwrap(); + program.resolve_label( + metadata.termination_label_stack.pop().unwrap(), + program.offset(), + ); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1393,7 +1405,7 @@ fn group_by_emit( referenced_tables, result_columns, Some(&precomputed_exprs_to_register), - limit.map(|l| (l, *m.termination_label_stack.last().unwrap())), + limit.map(|l| (l, *metadata.termination_label_stack.last().unwrap())), )?; } Some(order_by) => { @@ -1402,8 +1414,8 @@ fn group_by_emit( referenced_tables, order_by, result_columns, - &mut m.result_column_indexes_in_orderby_sorter, - &m.sort_metadata.as_ref().unwrap(), + &mut metadata.result_column_indexes_in_orderby_sorter, + &metadata.sort_metadata.as_ref().unwrap(), Some(&precomputed_exprs_to_register), )?; } @@ -1443,9 +1455,9 @@ fn agg_without_group_by_emit( referenced_tables: &Vec, result_columns: &Vec, aggregates: &Vec, - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { - let agg_start_reg = m.aggregation_start_register.unwrap(); + let agg_start_reg = metadata.aggregation_start_register.unwrap(); for (i, agg) in aggregates.iter().enumerate() { let agg_result_reg = agg_start_reg + i; program.emit_insn(Insn::AggFinal { @@ -1482,11 +1494,14 @@ fn order_by_emit( order_by: &Vec<(ast::Expr, Direction)>, result_columns: &Vec, limit: Option, - m: &mut Metadata, + metadata: &mut Metadata, ) -> Result<()> { let sort_loop_start_label = program.allocate_label(); let sort_loop_end_label = program.allocate_label(); - program.resolve_label(m.termination_label_stack.pop().unwrap(), program.offset()); + program.resolve_label( + metadata.termination_label_stack.pop().unwrap(), + program.offset(), + ); let mut pseudo_columns = vec![]; for (i, _) in order_by.iter().enumerate() { pseudo_columns.push(Column { @@ -1498,7 +1513,7 @@ fn order_by_emit( } for (i, rc) in result_columns.iter().enumerate() { // If any result columns are not in the ORDER BY sorter, it's because they are equal to a sort key and were already added to the pseudo columns above. - if let Some(ref v) = m.result_columns_to_skip_in_orderby_sorter { + if let Some(ref v) = metadata.result_columns_to_skip_in_orderby_sorter { if v.contains(&i) { continue; } @@ -1511,7 +1526,8 @@ fn order_by_emit( } let num_columns_in_sorter = order_by.len() + result_columns.len() - - m.result_columns_to_skip_in_orderby_sorter + - metadata + .result_columns_to_skip_in_orderby_sorter .as_ref() .map(|v| v.len()) .unwrap_or(0); @@ -1522,7 +1538,7 @@ fn order_by_emit( columns: pseudo_columns, }))), ); - let sort_metadata = m.sort_metadata.as_mut().unwrap(); + let sort_metadata = metadata.sort_metadata.as_mut().unwrap(); program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, @@ -1553,7 +1569,7 @@ fn order_by_emit( let reg = start_reg + i; program.emit_insn(Insn::Column { cursor_id, - column: m.result_column_indexes_in_orderby_sorter[&i], + column: metadata.result_column_indexes_in_orderby_sorter[&i], dest: reg, }); } From da811dc40380a3ece794de17e0acdfa4ca614dce Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 27 Nov 2024 19:30:07 +0200 Subject: [PATCH 31/32] add doc comments for members of Plan struct --- core/translate/plan.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index ac75981f6..2922b9fdb 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -21,14 +21,23 @@ pub struct ResultSetColumn { #[derive(Debug)] pub struct Plan { + /// A tree of sources (tables). pub source: SourceOperator, + /// the columns inside SELECT ... FROM pub result_columns: Vec, + /// where clause split into a vec at 'AND' boundaries. pub where_clause: Option>, + /// group by clause pub group_by: Option>, + /// order by clause pub order_by: Option>, + /// all the aggregates collected from the result columns, order by, and (TODO) having clauses pub aggregates: Option>, + /// limit clause pub limit: Option, + /// all the tables referenced in the query pub referenced_tables: Vec, + /// all the indexes available pub available_indexes: Vec>, } From 84742b81fa78ba5d08bdba7618dfe6919c150c5c Mon Sep 17 00:00:00 2001 From: jussisaurio Date: Wed, 27 Nov 2024 22:43:36 +0200 Subject: [PATCH 32/32] Obsolete comment --- core/translate/plan.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 2922b9fdb..ef5d97948 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -54,17 +54,7 @@ pub enum IterationDirection { } /** - An Operator is a Node in the query plan. - Operators form a tree structure, with each having zero or more children. - For example, a query like `SELECT t1.foo FROM t1 ORDER BY t1.foo LIMIT 1` would have the following structure: - Limit - Order - Project - Scan - - Operators also have a unique ID, which is used to identify them in the query plan and attach metadata. - They also have a step counter, which is used to track the current step in the operator's execution. - TODO: perhaps 'step' shouldn't be in this struct, since it's an execution time concept, not a plan time concept. + A SourceOperator is a Node in the query plan that reads data from a table. */ #[derive(Clone, Debug)] pub enum SourceOperator {