diff --git a/core/expr.rs b/core/expr.rs new file mode 100644 index 000000000..b4bf181fe --- /dev/null +++ b/core/expr.rs @@ -0,0 +1,583 @@ +use anyhow::Result; +use sqlite3_parser::ast::{self, Expr, UnaryOperator}; + +use crate::{ + function::{Func, SingleRowFunc}, + schema::{Column, Schema, Table}, + select::{ColumnInfo, Select, SrcTable}, + util::normalize_ident, + vdbe::{BranchOffset, Insn, ProgramBuilder}, +}; + +pub fn build_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result> { + match &select.body.select { + ast::OneSelect::Select { + columns, + from: Some(from), + where_clause, + .. + } => { + let (table_name, maybe_alias) = match &from.select { + Some(select_table) => match select_table.as_ref() { + ast::SelectTable::Table(name, alias, ..) => ( + &name.name, + alias.as_ref().map(|als| match als { + ast::As::As(alias) => alias, // users as u + ast::As::Elided(alias) => alias, // users u + }), + ), + _ => todo!(), + }, + None => todo!(), + }; + let table_name = &table_name.0; + let maybe_alias = maybe_alias.map(|als| &als.0); + let table = match schema.get_table(table_name) { + Some(table) => table, + None => anyhow::bail!("Parse error: no such table: {}", table_name), + }; + let mut joins = Vec::new(); + joins.push(SrcTable { + table: Table::BTree(table.clone()), + alias: maybe_alias, + join_info: None, + }); + if let Some(selected_joins) = &from.joins { + for join in selected_joins { + let (table_name, maybe_alias) = match &join.table { + ast::SelectTable::Table(name, alias, ..) => ( + &name.name, + alias.as_ref().map(|als| match als { + ast::As::As(alias) => alias, // users as u + ast::As::Elided(alias) => alias, // users u + }), + ), + _ => todo!(), + }; + let table_name = &table_name.0; + let maybe_alias = maybe_alias.as_ref().map(|als| &als.0); + let table = match schema.get_table(table_name) { + Some(table) => table, + None => anyhow::bail!("Parse error: no such table: {}", table_name), + }; + joins.push(SrcTable { + table: Table::BTree(table), + alias: maybe_alias, + join_info: Some(join), + }); + } + } + + let _table = Table::BTree(table); + let column_info = analyze_columns(columns, &joins); + let exist_aggregation = column_info + .iter() + .any(|info| info.is_aggregation_function()); + Ok(Select { + columns, + column_info, + src_tables: joins, + limit: &select.limit, + exist_aggregation, + where_clause, + loops: Vec::new(), + }) + } + ast::OneSelect::Select { + columns, + from: None, + where_clause, + .. + } => { + let column_info = analyze_columns(columns, &Vec::new()); + let exist_aggregation = column_info + .iter() + .any(|info| info.is_aggregation_function()); + Ok(Select { + columns, + column_info, + src_tables: Vec::new(), + limit: &select.limit, + where_clause, + exist_aggregation, + loops: Vec::new(), + }) + } + _ => todo!(), + } +} + +pub fn translate_expr( + program: &mut ProgramBuilder, + select: &Select, + expr: &ast::Expr, + target_register: usize, +) -> Result { + match expr { + ast::Expr::Between { .. } => todo!(), + ast::Expr::Binary(e1, op, e2) => { + let e1_reg = program.alloc_register(); + let e2_reg = program.alloc_register(); + let _ = translate_expr(program, select, e1, e1_reg)?; + let _ = translate_expr(program, select, e2, e2_reg)?; + + match op { + ast::Operator::NotEquals => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Ne { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::Equals => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Eq { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::Less => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Lt { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::LessEquals => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Le { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::Greater => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Gt { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::GreaterEquals => { + let if_true_label = program.allocate_label(); + wrap_eval_jump_expr( + program, + Insn::Ge { + lhs: e1_reg, + rhs: e2_reg, + target_pc: if_true_label, + }, + target_register, + if_true_label, + ); + } + ast::Operator::Add => { + program.emit_insn(Insn::Add { + lhs: e1_reg, + rhs: e2_reg, + dest: target_register, + }); + } + other_unimplemented => todo!("{:?}", other_unimplemented), + } + Ok(target_register) + } + ast::Expr::Case { .. } => todo!(), + ast::Expr::Cast { .. } => todo!(), + ast::Expr::Collate(_, _) => todo!(), + ast::Expr::DoublyQualified(_, _, _) => todo!(), + ast::Expr::Exists(_) => todo!(), + ast::Expr::FunctionCall { + name, + distinctness: _, + args, + filter_over: _, + } => { + let func_type: Option = match normalize_ident(name.0.as_str()).as_str().parse() { + Ok(func) => Some(func), + Err(_) => None, + }; + match func_type { + Some(Func::Agg(_)) => { + anyhow::bail!("Parse error: aggregation function in non-aggregation context") + } + Some(Func::SingleRow(srf)) => { + match srf { + SingleRowFunc::Coalesce => { + let args = if let Some(args) = args { + if args.len() < 2 { + anyhow::bail!( + "Parse error: coalesce function with less than 2 arguments" + ); + } + args + } else { + anyhow::bail!("Parse error: coalesce function with no arguments"); + }; + + // coalesce function is implemented as a series of not null checks + // whenever a not null check succeeds, we jump to the end of the series + let label_coalesce_end = program.allocate_label(); + for (index, arg) in args.iter().enumerate() { + let reg = translate_expr(program, select, arg, target_register)?; + if index < args.len() - 1 { + program.emit_insn_with_label_dependency( + Insn::NotNull { + reg, + target_pc: label_coalesce_end, + }, + label_coalesce_end, + ); + } + } + program.preassign_label_to_next_insn(label_coalesce_end); + + Ok(target_register) + } + SingleRowFunc::Like => { + let args = if let Some(args) = args { + if args.len() < 2 { + anyhow::bail!( + "Parse error: like function with less than 2 arguments" + ); + } + args + } else { + anyhow::bail!("Parse error: like function with no arguments"); + }; + for arg in args { + let reg = program.alloc_register(); + let _ = translate_expr(program, select, arg, reg)?; + match arg { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + } + program.emit_insn(Insn::Function { + start_reg: target_register + 1, + dest: target_register, + func: SingleRowFunc::Like, + }); + Ok(target_register) + } + SingleRowFunc::Abs => { + let args = if let Some(args) = args { + if args.len() != 1 { + anyhow::bail!( + "Parse error: abs function with not exactly 1 argument" + ); + } + args + } else { + anyhow::bail!("Parse error: abs function with no arguments"); + }; + + let regs = program.alloc_register(); + let _ = translate_expr(program, select, &args[0], regs)?; + program.emit_insn(Insn::Function { + start_reg: regs, + dest: target_register, + func: SingleRowFunc::Abs, + }); + + Ok(target_register) + } + } + } + None => { + anyhow::bail!("Parse error: unknown function {}", name.0); + } + } + } + ast::Expr::FunctionCallStar { .. } => todo!(), + ast::Expr::Id(ident) => { + // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); + let (idx, col, cursor_id) = resolve_ident_table(program, &ident.0, select)?; + if col.primary_key { + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } else { + program.emit_insn(Insn::Column { + column: idx, + dest: target_register, + cursor_id, + }); + } + maybe_apply_affinity(col, target_register, program); + Ok(target_register) + } + ast::Expr::InList { .. } => todo!(), + ast::Expr::InSelect { .. } => todo!(), + ast::Expr::InTable { .. } => todo!(), + ast::Expr::IsNull(_) => todo!(), + ast::Expr::Like { .. } => todo!(), + ast::Expr::Literal(lit) => match lit { + ast::Literal::Numeric(val) => { + let maybe_int = val.parse::(); + if let Ok(int_value) = maybe_int { + program.emit_insn(Insn::Integer { + value: int_value, + dest: target_register, + }); + } else { + // must be a float + program.emit_insn(Insn::Real { + value: val.parse().unwrap(), + dest: target_register, + }); + } + Ok(target_register) + } + ast::Literal::String(s) => { + program.emit_insn(Insn::String8 { + value: s[1..s.len() - 1].to_string(), + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::Blob(_) => todo!(), + ast::Literal::Keyword(_) => todo!(), + ast::Literal::Null => { + program.emit_insn(Insn::Null { + dest: target_register, + }); + Ok(target_register) + } + ast::Literal::CurrentDate => todo!(), + ast::Literal::CurrentTime => todo!(), + ast::Literal::CurrentTimestamp => todo!(), + }, + ast::Expr::Name(_) => todo!(), + ast::Expr::NotNull(_) => todo!(), + ast::Expr::Parenthesized(_) => todo!(), + ast::Expr::Qualified(tbl, ident) => { + let (idx, col, cursor_id) = resolve_ident_qualified(program, &tbl.0, &ident.0, select)?; + if col.primary_key { + program.emit_insn(Insn::RowId { + cursor_id, + dest: target_register, + }); + } else { + program.emit_insn(Insn::Column { + column: idx, + dest: target_register, + cursor_id, + }); + } + maybe_apply_affinity(col, target_register, program); + Ok(target_register) + } + ast::Expr::Raise(_, _) => todo!(), + ast::Expr::Subquery(_) => todo!(), + ast::Expr::Unary(op, expr) => match (op, expr.as_ref()) { + (UnaryOperator::Negative, ast::Expr::Literal(ast::Literal::Numeric(numeric_value))) => { + let maybe_int = numeric_value.parse::(); + if let Ok(value) = maybe_int { + program.emit_insn(Insn::Integer { + value: -value, + dest: target_register, + }); + } else { + program.emit_insn(Insn::Real { + value: -numeric_value.parse::()?, + dest: target_register, + }); + } + Ok(target_register) + } + _ => todo!(), + }, + ast::Expr::Variable(_) => todo!(), + } +} + +pub fn analyze_columns<'a>( + columns: &'a Vec, + joins: &Vec, +) -> Vec> { + let mut column_information_list = Vec::with_capacity(columns.len()); + for column in columns { + let mut info = ColumnInfo::new(); + if let ast::ResultColumn::Star = column { + info.columns_to_allocate = 0; + for join in joins { + info.columns_to_allocate += join.table.columns().len(); + } + } else { + info.columns_to_allocate = 1; + analyze_column(column, &mut info); + } + column_information_list.push(info); + } + column_information_list +} + +/// Analyze a column expression. +/// +/// This function will walk all columns and find information about: +/// * Aggregation functions. +fn analyze_column<'a>(column: &'a ast::ResultColumn, column_info_out: &mut ColumnInfo<'a>) { + match column { + ast::ResultColumn::Expr(expr, _) => analyze_expr(expr, column_info_out), + ast::ResultColumn::Star => {} + ast::ResultColumn::TableStar(_) => {} + } +} + +pub fn analyze_expr<'a>(expr: &'a Expr, column_info_out: &mut ColumnInfo<'a>) { + match expr { + ast::Expr::FunctionCall { + name, + distinctness: _, + args, + filter_over: _, + } => { + let func_type = match normalize_ident(name.0.as_str()).as_str().parse() { + Ok(func) => Some(func), + Err(_) => None, + }; + if func_type.is_none() { + let args = args.as_ref().unwrap(); + if !args.is_empty() { + analyze_expr(args.first().unwrap(), column_info_out); + } + } else { + column_info_out.func = func_type; + // TODO(pere): use lifetimes for args? Arenas would be lovely here :( + column_info_out.args = args; + } + } + ast::Expr::FunctionCallStar { .. } => todo!(), + _ => {} + } +} + +fn wrap_eval_jump_expr( + program: &mut ProgramBuilder, + insn: Insn, + target_register: usize, + if_true_label: BranchOffset, +) { + program.emit_insn(Insn::Integer { + value: 1, // emit True by default + dest: target_register, + }); + program.emit_insn_with_label_dependency(insn, if_true_label); + program.emit_insn(Insn::Integer { + value: 0, // emit False if we reach this point (no jump) + dest: target_register, + }); + program.preassign_label_to_next_insn(if_true_label); +} + +pub fn resolve_ident_qualified<'a>( + program: &ProgramBuilder, + table_name: &String, + ident: &String, + select: &'a Select, +) -> Result<(usize, &'a Column, usize)> { + for join in &select.src_tables { + match join.table { + Table::BTree(ref table) => { + let table_identifier = match join.alias { + Some(alias) => alias.clone(), + None => table.name.to_string(), + }; + if table_identifier == *table_name { + let res = table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident); + if res.is_some() { + let (idx, col) = res.unwrap(); + let cursor_id = program.resolve_cursor_id(&table_identifier); + return Ok((idx, col, cursor_id)); + } + } + } + Table::Pseudo(_) => todo!(), + } + } + anyhow::bail!( + "Parse error: column with qualified name {}.{} not found", + table_name, + ident + ); +} + +pub fn resolve_ident_table<'a>( + program: &ProgramBuilder, + ident: &String, + select: &'a Select, +) -> Result<(usize, &'a Column, usize)> { + let mut found = Vec::new(); + for join in &select.src_tables { + match join.table { + Table::BTree(ref table) => { + let table_identifier = match join.alias { + Some(alias) => alias.clone(), + None => table.name.to_string(), + }; + let res = table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident); + if res.is_some() { + let (idx, col) = res.unwrap(); + let cursor_id = program.resolve_cursor_id(&table_identifier); + found.push((idx, col, cursor_id)); + } + } + Table::Pseudo(_) => todo!(), + } + } + if found.len() == 1 { + return Ok(found[0]); + } + if found.is_empty() { + anyhow::bail!("Parse error: column with name {} not found", ident.as_str()); + } + + anyhow::bail!("Parse error: ambiguous column name {}", ident.as_str()); +} + +pub fn maybe_apply_affinity(col: &Column, target_register: usize, program: &mut ProgramBuilder) { + if col.ty == crate::schema::Type::Real { + program.emit_insn(Insn::RealAffinity { + register: target_register, + }) + } +} diff --git a/core/io/common.rs b/core/io/common.rs index a8456cd95..d69177ff8 100644 --- a/core/io/common.rs +++ b/core/io/common.rs @@ -4,21 +4,21 @@ pub const ENV_DISABLE_FILE_LOCK: &str = "LIMBO_DISABLE_FILE_LOCK"; pub mod tests { use crate::IO; use anyhow::Result; - use tempfile::NamedTempFile; use std::process::{Command, Stdio}; + use tempfile::NamedTempFile; fn run_test_parent_process(create_io: fn() -> Result) { let temp_file: NamedTempFile = NamedTempFile::new().expect("Failed to create temp file"); let path = temp_file.path().to_str().unwrap().to_string(); - + // Parent process opens the file let io1 = create_io().expect("Failed to create IO"); let _file1 = io1 .open_file(&path) .expect("Failed to open file in parent process"); - + let current_exe = std::env::current_exe().expect("Failed to get current executable path"); - + // Spawn a child process and try to open the same file let child = Command::new(current_exe) .env("RUST_TEST_CHILD_PROCESS", "1") @@ -27,11 +27,14 @@ pub mod tests { .stderr(Stdio::piped()) .spawn() .expect("Failed to spawn child process"); - - let output = child.wait_with_output().expect("Failed to wait on child"); - assert!(!output.status.success(), "Child process should have failed to open the file"); + + let output = child.wait_with_output().expect("Failed to wait on child"); + assert!( + !output.status.success(), + "Child process should have failed to open the file" + ); } - + fn run_test_child_process(create_io: fn() -> Result) -> Result<()> { if std::env::var("RUST_TEST_CHILD_PROCESS").is_ok() { let path = std::env::var("RUST_TEST_FILE_PATH")?; diff --git a/core/io/mod.rs b/core/io/mod.rs index f0021959c..a7add6b01 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -137,4 +137,4 @@ cfg_block! { } } -mod common; \ No newline at end of file +mod common; diff --git a/core/lib.rs b/core/lib.rs index af387afd7..4049ab8f0 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1,9 +1,11 @@ mod btree; mod buffer_pool; +mod expr; mod function; mod io; mod pager; mod schema; +mod select; mod sorter; mod sqlite3_ondisk; mod storage; @@ -11,6 +13,7 @@ mod translate; mod types; mod util; mod vdbe; +mod where_clause; #[cfg(not(target_family = "wasm"))] #[global_allocator] diff --git a/core/select.rs b/core/select.rs new file mode 100644 index 000000000..ba23b1b4a --- /dev/null +++ b/core/select.rs @@ -0,0 +1,54 @@ +use sqlite3_parser::ast; + +use crate::{function::Func, schema::Table, vdbe::BranchOffset}; + +pub struct SrcTable<'a> { + pub table: Table, + pub alias: Option<&'a String>, + pub join_info: Option<&'a ast::JoinedSelectTable>, // FIXME: preferably this should be a reference with lifetime == Select ast expr +} + +pub struct ColumnInfo<'a> { + pub func: Option, + pub args: &'a Option>, + pub columns_to_allocate: usize, /* number of result columns this col will result on */ +} + +impl<'a> ColumnInfo<'a> { + pub fn new() -> Self { + Self { + func: None, + args: &None, + columns_to_allocate: 1, + } + } + + pub fn is_aggregation_function(&self) -> bool { + matches!(self.func, Some(Func::Agg(_))) + } +} + +pub struct LoopInfo { + pub rewind_offset: BranchOffset, + pub rewind_label: BranchOffset, + pub open_cursor: usize, +} + +pub struct Select<'a> { + pub columns: &'a Vec, + pub column_info: Vec>, + pub src_tables: Vec>, // Tables we use to get data from. This includes "from" and "joins" + pub limit: &'a Option, + pub exist_aggregation: bool, + pub where_clause: &'a Option, + /// Ordered list of opened read table loops + /// Used for generating a loop that looks like this: + /// cursor 0 = open table 0 + /// for each row in cursor 0 + /// cursor 1 = open table 1 + /// for each row in cursor 1 + /// ... + /// end cursor 1 + /// end cursor 0 + pub loops: Vec, +} diff --git a/core/translate.rs b/core/translate.rs index 3307ac696..14f1ff207 100644 --- a/core/translate.rs +++ b/core/translate.rs @@ -1,68 +1,18 @@ use std::cell::RefCell; use std::rc::Rc; -use crate::function::{AggFunc, Func, SingleRowFunc}; +use crate::expr::{build_select, maybe_apply_affinity, translate_expr}; +use crate::function::{AggFunc, Func}; use crate::pager::Pager; -use crate::schema::{Column, Schema, Table}; +use crate::schema::{Schema, Table}; +use crate::select::{ColumnInfo, LoopInfo, Select, SrcTable}; use crate::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; -use crate::util::normalize_ident; use crate::vdbe::{BranchOffset, Insn, Program, ProgramBuilder}; +use crate::where_clause::{ + evaluate_conditions, translate_conditions, translate_where, Inner, Left, QueryConstraint, +}; use anyhow::Result; -use sqlite3_parser::ast::{self, Expr, JoinOperator, Literal, UnaryOperator}; - -const HARDCODED_CURSOR_LEFT_TABLE: usize = 0; -const HARDCODED_CURSOR_RIGHT_TABLE: usize = 1; - -struct Select<'a> { - columns: &'a Vec, - column_info: Vec>, - src_tables: Vec>, // Tables we use to get data from. This includes "from" and "joins" - limit: &'a Option, - exist_aggregation: bool, - where_clause: &'a Option, - /// Ordered list of opened read table loops - /// Used for generating a loop that looks like this: - /// cursor 0 = open table 0 - /// for each row in cursor 0 - /// cursor 1 = open table 1 - /// for each row in cursor 1 - /// ... - /// end cursor 1 - /// end cursor 0 - loops: Vec, -} - -struct LoopInfo { - rewind_offset: BranchOffset, - rewind_label: BranchOffset, - open_cursor: usize, -} - -struct SrcTable<'a> { - table: Table, - alias: Option<&'a String>, - join_info: Option<&'a ast::JoinedSelectTable>, // FIXME: preferably this should be a reference with lifetime == Select ast expr -} - -struct ColumnInfo<'a> { - func: Option, - args: &'a Option>, - columns_to_allocate: usize, /* number of result columns this col will result on */ -} - -impl<'a> ColumnInfo<'a> { - pub fn new() -> Self { - Self { - func: None, - args: &None, - columns_to_allocate: 1, - } - } - - pub fn is_aggregation_function(&self) -> bool { - matches!(self.func, Some(Func::Agg(_))) - } -} +use sqlite3_parser::ast::{self, Literal}; struct LimitInfo { limit_reg: usize, @@ -87,104 +37,6 @@ pub fn translate( } } -fn build_select<'a>(schema: &Schema, select: &'a ast::Select) -> Result> { - match &select.body.select { - ast::OneSelect::Select { - columns, - from: Some(from), - where_clause, - .. - } => { - let (table_name, maybe_alias) = match &from.select { - Some(select_table) => match select_table.as_ref() { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }, - None => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.map(|als| &als.0); - let table = match schema.get_table(&table_name) { - Some(table) => table, - None => anyhow::bail!("Parse error: no such table: {}", table_name), - }; - let mut joins = Vec::new(); - joins.push(SrcTable { - table: Table::BTree(table.clone()), - alias: maybe_alias, - join_info: None, - }); - if let Some(selected_joins) = &from.joins { - for join in selected_joins { - let (table_name, maybe_alias) = match &join.table { - ast::SelectTable::Table(name, alias, ..) => ( - &name.name, - alias.as_ref().map(|als| match als { - ast::As::As(alias) => alias, // users as u - ast::As::Elided(alias) => alias, // users u - }), - ), - _ => todo!(), - }; - let table_name = &table_name.0; - let maybe_alias = maybe_alias.as_ref().map(|als| &als.0); - let table = match schema.get_table(table_name) { - Some(table) => table, - None => anyhow::bail!("Parse error: no such table: {}", table_name), - }; - joins.push(SrcTable { - table: Table::BTree(table), - alias: maybe_alias, - join_info: Some(&join), - }); - } - } - - let _table = Table::BTree(table); - let column_info = analyze_columns(columns, &joins); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - columns, - column_info, - src_tables: joins, - limit: &select.limit, - exist_aggregation, - where_clause, - loops: Vec::new(), - }) - } - ast::OneSelect::Select { - columns, - from: None, - where_clause, - .. - } => { - let column_info = analyze_columns(columns, &Vec::new()); - let exist_aggregation = column_info - .iter() - .any(|info| info.is_aggregation_function()); - Ok(Select { - columns, - column_info, - src_tables: Vec::new(), - limit: &select.limit, - where_clause, - exist_aggregation, - loops: Vec::new(), - }) - } - _ => todo!(), - } -} - /// Generate code for a SELECT statement. fn translate_select(mut select: Select) -> Result { let mut program = ProgramBuilder::new(); @@ -303,264 +155,6 @@ fn emit_limit_insn(limit_info: &Option, program: &mut ProgramBuilder) } } -fn translate_where(select: &Select, program: &mut ProgramBuilder) -> Result> { - if let Some(w) = &select.where_clause { - let label = program.allocate_label(); - translate_condition_expr(program, select, w, label, false)?; - Ok(Some(label)) - } else { - Ok(None) - } -} - -fn introspect_expression_for_cursors( - program: &ProgramBuilder, - select: &Select, - where_expr: &ast::Expr, -) -> Result> { - let mut cursors = vec![]; - match where_expr { - ast::Expr::Binary(e1, _, e2) => { - cursors.extend(introspect_expression_for_cursors(program, select, e1)?); - cursors.extend(introspect_expression_for_cursors(program, select, e2)?); - } - ast::Expr::Id(ident) => { - let (_, _, cursor_id) = resolve_ident_table(program, &ident.0, select)?; - cursors.push(cursor_id); - } - ast::Expr::Qualified(tbl, ident) => { - let (_, _, cursor_id) = resolve_ident_qualified(program, &tbl.0, &ident.0, select)?; - cursors.push(cursor_id); - } - ast::Expr::Literal(_) => {} - ast::Expr::Like { - lhs, - not, - op, - rhs, - escape, - } => { - cursors.extend(introspect_expression_for_cursors(program, select, lhs)?); - cursors.extend(introspect_expression_for_cursors(program, select, rhs)?); - } - other => { - anyhow::bail!("Parse error: unsupported expression: {:?}", other); - } - } - - Ok(cursors) -} - -fn get_no_match_target_cursor( - program: &ProgramBuilder, - select: &Select, - expr: &ast::Expr, -) -> usize { - // This is the hackiest part of the code. We are finding the cursor that should be advanced to the next row - // when the condition is not met. This is done by introspecting the expression and finding the innermost cursor that is - // used in the expression. This is a very naive approach and will not work in all cases. - // Thankfully though it might be possible to just refine the logic contained here to make it work in all cases. Maybe. - let cursors = introspect_expression_for_cursors(program, select, expr).unwrap_or_default(); - if cursors.is_empty() { - HARDCODED_CURSOR_LEFT_TABLE - } else { - *cursors.iter().max().unwrap() - } -} - -fn evaluate_conditions( - program: &mut ProgramBuilder, - select: &Select, -) -> Result> { - let join_constraints = select - .src_tables - .iter() - .map(|v| v.join_info.clone()) - .filter_map(|v| v.map(|v| (v.constraint.clone(), v.operator))) - .collect::>(); - // TODO: only supports one JOIN; -> add support for multiple JOINs, e.g. SELECT * FROM a JOIN b ON a.id = b.id JOIN c ON b.id = c.id - if join_constraints.len() > 1 { - anyhow::bail!("Parse error: multiple JOINs not supported"); - } - - let join_maybe = join_constraints.first(); - - let parsed_where_maybe = select.where_clause.as_ref().map(|where_clause| Where { - constraint_expr: where_clause.clone(), - no_match_jump_label: program.allocate_label(), - no_match_target_cursor: get_no_match_target_cursor(program, select, &where_clause), - }); - - let parsed_join_maybe = join_maybe - .map(|(constraint, _)| { - if let Some(ast::JoinConstraint::On(expr)) = constraint { - Some(Join { - constraint_expr: expr.clone(), - no_match_jump_label: program.allocate_label(), - no_match_target_cursor: get_no_match_target_cursor(program, select, expr), - }) - } else { - None - } - }) - .flatten(); - - let constraint_maybe = match (parsed_where_maybe, parsed_join_maybe) { - (None, None) => None, - (Some(where_clause), None) => Some(QueryConstraint::Inner(Inner { - where_clause: Some(where_clause), - join_clause: None, - })), - (where_clause, Some(join_clause)) => { - let (_, op) = join_maybe.unwrap(); - match op { - JoinOperator::TypedJoin { natural, join_type } => { - if *natural { - todo!("Natural join not supported"); - } - // default to inner join when no join type is specified - let join_type = join_type.unwrap_or(ast::JoinType::Inner); - match join_type { - ast::JoinType::Inner | ast::JoinType::Cross => { - // cross join with a condition is an inner join - Some(QueryConstraint::Inner(Inner { - where_clause, - join_clause: Some(join_clause), - })) - } - ast::JoinType::LeftOuter | ast::JoinType::Left => { - let left_join_match_flag = program.alloc_register(); - let left_join_match_flag_hit_marker = program.allocate_label(); - let left_join_found_match_next_row_label = program.allocate_label(); - - Some(QueryConstraint::Left(Left { - where_clause, - join_clause: Some(join_clause), - found_match_next_row_label: left_join_found_match_next_row_label, - match_flag: left_join_match_flag, - match_flag_hit_marker: left_join_match_flag_hit_marker, - left_cursor: HARDCODED_CURSOR_LEFT_TABLE, // FIXME: hardcoded - right_cursor: HARDCODED_CURSOR_RIGHT_TABLE, // FIXME: hardcoded - })) - } - ast::JoinType::RightOuter | ast::JoinType::Right => { - todo!(); - } - ast::JoinType::FullOuter | ast::JoinType::Full => { - todo!(); - } - } - } - JoinOperator::Comma => { - todo!(); - } - } - } - }; - - Ok(constraint_maybe) -} - -fn translate_conditions( - program: &mut ProgramBuilder, - select: &Select, - conditions: Option, -) -> Result> { - match conditions.as_ref() { - Some(QueryConstraint::Left(Left { - where_clause, - join_clause, - match_flag, - match_flag_hit_marker, - .. - })) => { - if let Some(where_clause) = where_clause { - translate_condition_expr( - program, - select, - &where_clause.constraint_expr, - where_clause.no_match_jump_label, - false, - )?; - } - if let Some(join_clause) = join_clause { - translate_condition_expr( - program, - select, - &join_clause.constraint_expr, - join_clause.no_match_jump_label, - false, - )?; - } - // Set match flag to 1 if we hit the marker (i.e. jump didn't happen to no_match_label as a result of the condition) - program.emit_insn(Insn::Integer { - value: 1, - dest: *match_flag, - }); - program.defer_label_resolution(*match_flag_hit_marker, (program.offset() - 1) as usize); - } - Some(QueryConstraint::Inner(inner_join)) => { - if let Some(where_clause) = &inner_join.where_clause { - translate_condition_expr( - program, - select, - &where_clause.constraint_expr, - where_clause.no_match_jump_label, - false, - )?; - } - if let Some(join_clause) = &inner_join.join_clause { - translate_condition_expr( - program, - select, - &join_clause.constraint_expr, - join_clause.no_match_jump_label, - false, - )?; - } - } - None => {} - } - - Ok(conditions) -} - -#[derive(Debug)] -struct Where { - constraint_expr: ast::Expr, - no_match_jump_label: BranchOffset, - no_match_target_cursor: usize, -} - -#[derive(Debug)] -struct Join { - constraint_expr: ast::Expr, - no_match_jump_label: BranchOffset, - no_match_target_cursor: usize, -} - -#[derive(Debug)] -struct Left { - where_clause: Option, - join_clause: Option, - match_flag: usize, - match_flag_hit_marker: BranchOffset, - found_match_next_row_label: BranchOffset, - left_cursor: usize, - right_cursor: usize, -} - -#[derive(Debug)] -struct Inner { - where_clause: Option, - join_clause: Option, -} - -enum QueryConstraint { - Left(Left), - Inner(Inner), -} - fn translate_tables_begin( program: &mut ProgramBuilder, select: &mut Select, @@ -815,730 +409,6 @@ fn translate_table_star(table: &SrcTable, program: &mut ProgramBuilder, target_r } } -fn analyze_columns<'a>( - columns: &'a Vec, - joins: &Vec, -) -> Vec> { - let mut column_information_list = Vec::with_capacity(columns.len()); - for column in columns { - let mut info = ColumnInfo::new(); - if let ast::ResultColumn::Star = column { - info.columns_to_allocate = 0; - for join in joins { - info.columns_to_allocate += join.table.columns().len(); - } - } else { - info.columns_to_allocate = 1; - analyze_column(column, &mut info); - } - column_information_list.push(info); - } - column_information_list -} - -/// Analyze a column expression. -/// -/// This function will walk all columns and find information about: -/// * Aggregation functions. -fn analyze_column<'a>(column: &'a ast::ResultColumn, column_info_out: &mut ColumnInfo<'a>) { - match column { - ast::ResultColumn::Expr(expr, _) => analyze_expr(expr, column_info_out), - ast::ResultColumn::Star => {} - ast::ResultColumn::TableStar(_) => {} - } -} - -fn analyze_expr<'a>(expr: &'a Expr, column_info_out: &mut ColumnInfo<'a>) { - match expr { - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - } => { - let func_type = match normalize_ident(name.0.as_str()).as_str().parse() { - Ok(func) => Some(func), - Err(_) => None, - }; - if func_type.is_none() { - let args = args.as_ref().unwrap(); - if !args.is_empty() { - analyze_expr(args.first().unwrap(), column_info_out); - } - } else { - column_info_out.func = func_type; - // TODO(pere): use lifetimes for args? Arenas would be lovely here :( - column_info_out.args = args; - } - } - ast::Expr::FunctionCallStar { .. } => todo!(), - _ => {} - } -} - -fn translate_condition_expr( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - target_jump: BranchOffset, - jump_if_true: bool, // if true jump to target on op == true, if false invert op -) -> Result<()> { - match expr { - ast::Expr::Between { .. } => todo!(), - ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { - if jump_if_true { - let label = program.allocate_label(); - let _ = translate_condition_expr(program, select, lhs, label, false); - let _ = translate_condition_expr(program, select, rhs, target_jump, true); - program.resolve_label(label, program.offset()); - } else { - let _ = translate_condition_expr(program, select, lhs, target_jump, false); - let _ = translate_condition_expr(program, select, rhs, target_jump, false); - } - } - ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { - if jump_if_true { - let _ = translate_condition_expr(program, select, lhs, target_jump, true); - let _ = translate_condition_expr(program, select, rhs, target_jump, true); - } else { - let label = program.allocate_label(); - let _ = translate_condition_expr(program, select, lhs, label, true); - let _ = translate_condition_expr(program, select, rhs, target_jump, false); - program.resolve_label(label, program.offset()); - } - } - ast::Expr::Binary(lhs, op, rhs) => { - let lhs_reg = program.alloc_register(); - let rhs_reg = program.alloc_register(); - let _ = translate_expr(program, select, lhs, lhs_reg); - match lhs.as_ref() { - ast::Expr::Literal(_) => program.mark_last_insn_constant(), - _ => {} - } - let _ = translate_expr(program, select, rhs, rhs_reg); - match rhs.as_ref() { - ast::Expr::Literal(_) => program.mark_last_insn_constant(), - _ => {} - } - match op { - ast::Operator::Greater => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::GreaterEquals => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::Less => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Lt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ge { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::LessEquals => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Le { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Gt { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::Equals => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::NotEquals => { - if jump_if_true { - program.emit_insn_with_label_dependency( - Insn::Ne { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::Eq { - lhs: lhs_reg, - rhs: rhs_reg, - target_pc: target_jump, - }, - target_jump, - ) - } - } - ast::Operator::Is => todo!(), - ast::Operator::IsNot => todo!(), - _ => { - todo!("op {:?} not implemented", op); - } - } - } - ast::Expr::Literal(lit) => match lit { - ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { - let reg = program.alloc_register(); - program.emit_insn(Insn::Integer { - value: int_value, - dest: reg, - }); - if target_jump < 0 { - program.add_label_dependency(target_jump, program.offset()); - } - program.emit_insn(Insn::IfNot { - reg, - target_pc: target_jump, - null_reg: reg, - }); - } else { - anyhow::bail!("Parse error: unsupported literal type in condition"); - } - } - _ => todo!(), - }, - ast::Expr::InList { lhs, not, rhs } => {} - ast::Expr::Like { - lhs, - not, - op, - rhs, - escape, - } => { - let cur_reg = program.alloc_register(); - assert!(match rhs.as_ref() { - ast::Expr::Literal(_) => true, - _ => false, - }); - match op { - ast::LikeOperator::Like => { - let pattern_reg = program.alloc_register(); - let column_reg = program.alloc_register(); - // LIKE(pattern, column). We should translate the pattern first before the column - let _ = translate_expr(program, select, rhs, pattern_reg)?; - program.mark_last_insn_constant(); - let _ = translate_expr(program, select, lhs, column_reg)?; - program.emit_insn(Insn::Function { - func: SingleRowFunc::Like, - start_reg: pattern_reg, - dest: cur_reg, - }); - } - ast::LikeOperator::Glob => todo!(), - ast::LikeOperator::Match => todo!(), - ast::LikeOperator::Regexp => todo!(), - } - if jump_if_true ^ *not { - program.emit_insn_with_label_dependency( - Insn::If { - reg: cur_reg, - target_pc: target_jump, - null_reg: cur_reg, - }, - target_jump, - ) - } else { - program.emit_insn_with_label_dependency( - Insn::IfNot { - reg: cur_reg, - target_pc: target_jump, - null_reg: cur_reg, - }, - target_jump, - ) - } - } - _ => todo!("op {:?} not implemented", expr), - } - Ok(()) -} - -fn wrap_eval_jump_expr( - program: &mut ProgramBuilder, - insn: Insn, - target_register: usize, - if_true_label: BranchOffset, -) { - program.emit_insn(Insn::Integer { - value: 1, // emit True by default - dest: target_register, - }); - program.emit_insn_with_label_dependency(insn, if_true_label); - program.emit_insn(Insn::Integer { - value: 0, // emit False if we reach this point (no jump) - dest: target_register, - }); - program.preassign_label_to_next_insn(if_true_label); -} - -fn translate_expr( - program: &mut ProgramBuilder, - select: &Select, - expr: &ast::Expr, - target_register: usize, -) -> Result { - match expr { - ast::Expr::Between { .. } => todo!(), - ast::Expr::Binary(e1, op, e2) => { - let e1_reg = program.alloc_register(); - let e2_reg = program.alloc_register(); - let _ = translate_expr(program, select, e1, e1_reg)?; - let _ = translate_expr(program, select, e2, e2_reg)?; - - match op { - ast::Operator::NotEquals => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Ne { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::Equals => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Eq { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::Less => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Lt { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::LessEquals => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Le { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::Greater => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Gt { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::GreaterEquals => { - let if_true_label = program.allocate_label(); - wrap_eval_jump_expr( - program, - Insn::Ge { - lhs: e1_reg, - rhs: e2_reg, - target_pc: if_true_label, - }, - target_register, - if_true_label, - ); - } - ast::Operator::Add => { - program.emit_insn(Insn::Add { - lhs: e1_reg, - rhs: e2_reg, - dest: target_register, - }); - } - other_unimplemented => todo!("{:?}", other_unimplemented), - } - Ok(target_register) - } - ast::Expr::Case { .. } => todo!(), - ast::Expr::Cast { .. } => todo!(), - ast::Expr::Collate(_, _) => todo!(), - ast::Expr::DoublyQualified(_, _, _) => todo!(), - ast::Expr::Exists(_) => todo!(), - ast::Expr::FunctionCall { - name, - distinctness: _, - args, - filter_over: _, - } => { - let func_type: Option = match normalize_ident(name.0.as_str()).as_str().parse() { - Ok(func) => Some(func), - Err(_) => None, - }; - match func_type { - Some(Func::Agg(_)) => { - anyhow::bail!("Parse error: aggregation function in non-aggregation context") - } - Some(Func::SingleRow(srf)) => { - match srf { - SingleRowFunc::Coalesce => { - let args = if let Some(args) = args { - if args.len() < 2 { - anyhow::bail!( - "Parse error: coalesce function with less than 2 arguments" - ); - } - args - } else { - anyhow::bail!("Parse error: coalesce function with no arguments"); - }; - - // coalesce function is implemented as a series of not null checks - // whenever a not null check succeeds, we jump to the end of the series - let label_coalesce_end = program.allocate_label(); - for (index, arg) in args.iter().enumerate() { - let reg = translate_expr(program, select, arg, target_register)?; - if index < args.len() - 1 { - program.emit_insn_with_label_dependency( - Insn::NotNull { - reg, - target_pc: label_coalesce_end, - }, - label_coalesce_end, - ); - } - } - program.preassign_label_to_next_insn(label_coalesce_end); - - Ok(target_register) - } - SingleRowFunc::Like => { - let args = if let Some(args) = args { - if args.len() < 2 { - anyhow::bail!( - "Parse error: like function with less than 2 arguments" - ); - } - args - } else { - anyhow::bail!("Parse error: like function with no arguments"); - }; - for arg in args { - let reg = program.alloc_register(); - let _ = translate_expr(program, select, arg, reg)?; - match arg { - ast::Expr::Literal(_) => program.mark_last_insn_constant(), - _ => {} - } - } - program.emit_insn(Insn::Function { - start_reg: target_register + 1, - dest: target_register, - func: SingleRowFunc::Like, - }); - Ok(target_register) - } - SingleRowFunc::Abs => { - let args = if let Some(args) = args { - if args.len() != 1 { - anyhow::bail!( - "Parse error: abs function with not exactly 1 argument" - ); - } - args - } else { - anyhow::bail!("Parse error: abs function with no arguments"); - }; - - let regs = program.alloc_register(); - let _ = translate_expr(program, select, &args[0], regs)?; - program.emit_insn(Insn::Function { - start_reg: regs, - dest: target_register, - func: SingleRowFunc::Abs, - }); - - Ok(target_register) - } - } - } - None => { - anyhow::bail!("Parse error: unknown function {}", name.0); - } - } - } - ast::Expr::FunctionCallStar { .. } => todo!(), - ast::Expr::Id(ident) => { - // let (idx, col) = table.unwrap().get_column(&ident.0).unwrap(); - let (idx, col, cursor_id) = resolve_ident_table(program, &ident.0, select)?; - if col.primary_key { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: idx, - dest: target_register, - cursor_id, - }); - } - maybe_apply_affinity(col, target_register, program); - Ok(target_register) - } - ast::Expr::InList { .. } => todo!(), - ast::Expr::InSelect { .. } => todo!(), - ast::Expr::InTable { .. } => todo!(), - ast::Expr::IsNull(_) => todo!(), - ast::Expr::Like { .. } => todo!(), - ast::Expr::Literal(lit) => match lit { - ast::Literal::Numeric(val) => { - let maybe_int = val.parse::(); - if let Ok(int_value) = maybe_int { - program.emit_insn(Insn::Integer { - value: int_value, - dest: target_register, - }); - } else { - // must be a float - program.emit_insn(Insn::Real { - value: val.parse().unwrap(), - dest: target_register, - }); - } - Ok(target_register) - } - ast::Literal::String(s) => { - program.emit_insn(Insn::String8 { - value: s[1..s.len() - 1].to_string(), - dest: target_register, - }); - Ok(target_register) - } - ast::Literal::Blob(_) => todo!(), - ast::Literal::Keyword(_) => todo!(), - ast::Literal::Null => { - program.emit_insn(Insn::Null { - dest: target_register, - }); - Ok(target_register) - } - ast::Literal::CurrentDate => todo!(), - ast::Literal::CurrentTime => todo!(), - ast::Literal::CurrentTimestamp => todo!(), - }, - ast::Expr::Name(_) => todo!(), - ast::Expr::NotNull(_) => todo!(), - ast::Expr::Parenthesized(_) => todo!(), - ast::Expr::Qualified(tbl, ident) => { - let (idx, col, cursor_id) = resolve_ident_qualified(program, &tbl.0, &ident.0, select)?; - if col.primary_key { - program.emit_insn(Insn::RowId { - cursor_id, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Column { - column: idx, - dest: target_register, - cursor_id, - }); - } - maybe_apply_affinity(col, target_register, program); - Ok(target_register) - } - ast::Expr::Raise(_, _) => todo!(), - ast::Expr::Subquery(_) => todo!(), - ast::Expr::Unary(op, expr) => match (op, expr.as_ref()) { - (UnaryOperator::Negative, ast::Expr::Literal(ast::Literal::Numeric(numeric_value))) => { - let maybe_int = numeric_value.parse::(); - if let Ok(value) = maybe_int { - program.emit_insn(Insn::Integer { - value: -value, - dest: target_register, - }); - } else { - program.emit_insn(Insn::Real { - value: -numeric_value.parse::()?, - dest: target_register, - }); - } - Ok(target_register) - } - _ => todo!(), - }, - ast::Expr::Variable(_) => todo!(), - } -} - -fn resolve_ident_qualified<'a>( - program: &ProgramBuilder, - table_name: &String, - ident: &String, - select: &'a Select, -) -> Result<(usize, &'a Column, usize)> { - for join in &select.src_tables { - match join.table { - Table::BTree(ref table) => { - let table_identifier = match join.alias { - Some(alias) => alias.clone(), - None => table.name.to_string(), - }; - if table_identifier == *table_name { - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident); - if res.is_some() { - let (idx, col) = res.unwrap(); - let cursor_id = program.resolve_cursor_id(&table_identifier); - return Ok((idx, col, cursor_id)); - } - } - } - Table::Pseudo(_) => todo!(), - } - } - anyhow::bail!( - "Parse error: column with qualified name {}.{} not found", - table_name, - ident - ); -} - -fn resolve_ident_table<'a>( - program: &ProgramBuilder, - ident: &String, - select: &'a Select, -) -> Result<(usize, &'a Column, usize)> { - let mut found = Vec::new(); - for join in &select.src_tables { - match join.table { - Table::BTree(ref table) => { - let table_identifier = match join.alias { - Some(alias) => alias.clone(), - None => table.name.to_string(), - }; - let res = table - .columns - .iter() - .enumerate() - .find(|(_, col)| col.name == *ident); - if res.is_some() { - let (idx, col) = res.unwrap(); - let cursor_id = program.resolve_cursor_id(&table_identifier); - found.push((idx, col, cursor_id)); - } - } - Table::Pseudo(_) => todo!(), - } - } - if found.len() == 1 { - return Ok(found[0]); - } - if found.is_empty() { - anyhow::bail!("Parse error: column with name {} not found", ident.as_str()); - } - - anyhow::bail!("Parse error: ambiguous column name {}", ident.as_str()); -} - fn translate_aggregation( program: &mut ProgramBuilder, select: &Select, @@ -1831,11 +701,3 @@ fn update_pragma(name: &str, value: i64, header: Rc>, pa _ => todo!(), } } - -fn maybe_apply_affinity(col: &Column, target_register: usize, program: &mut ProgramBuilder) { - if col.ty == crate::schema::Type::Real { - program.emit_insn(Insn::RealAffinity { - register: target_register, - }) - } -} diff --git a/core/vdbe.rs b/core/vdbe.rs index b5cc046af..1f0c2f993 100644 --- a/core/vdbe.rs +++ b/core/vdbe.rs @@ -1832,7 +1832,7 @@ fn exec_abs(reg: &OwnedValue) -> Option { // Implements LIKE pattern matching. fn exec_like(pattern: &str, text: &str) -> bool { - let re = Regex::new(&format!("{}", pattern.replace("%", ".*").replace("_", "."))).unwrap(); + let re = Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap(); re.is_match(text) } @@ -1850,8 +1850,10 @@ fn exec_if(reg: &OwnedValue, null_reg: &OwnedValue, not: bool) -> bool { } } +#[cfg(test)] mod tests { - use super::*; + use super::{exec_abs, exec_if, exec_like, OwnedValue}; + use std::rc::Rc; #[test] fn test_abs() { @@ -1884,27 +1886,27 @@ mod tests { fn test_exec_if() { let reg = OwnedValue::Integer(0); let null_reg = OwnedValue::Integer(0); - assert_eq!(exec_if(®, &null_reg, false), false); - assert_eq!(exec_if(®, &null_reg, true), true); + assert!(!exec_if(®, &null_reg, false)); + assert!(exec_if(®, &null_reg, true)); let reg = OwnedValue::Integer(1); let null_reg = OwnedValue::Integer(0); - assert_eq!(exec_if(®, &null_reg, false), true); - assert_eq!(exec_if(®, &null_reg, true), false); + assert!(exec_if(®, &null_reg, false)); + assert!(!exec_if(®, &null_reg, true)); let reg = OwnedValue::Null; let null_reg = OwnedValue::Integer(0); - assert_eq!(exec_if(®, &null_reg, false), false); - assert_eq!(exec_if(®, &null_reg, true), false); + assert!(!exec_if(®, &null_reg, false)); + assert!(!exec_if(®, &null_reg, true)); let reg = OwnedValue::Null; let null_reg = OwnedValue::Integer(1); - assert_eq!(exec_if(®, &null_reg, false), true); - assert_eq!(exec_if(®, &null_reg, true), true); + assert!(exec_if(®, &null_reg, false)); + assert!(exec_if(®, &null_reg, true)); let reg = OwnedValue::Null; let null_reg = OwnedValue::Null; - assert_eq!(exec_if(®, &null_reg, false), false); - assert_eq!(exec_if(®, &null_reg, true), false); + assert!(!exec_if(®, &null_reg, false)); + assert!(!exec_if(®, &null_reg, true)); } } diff --git a/core/where_clause.rs b/core/where_clause.rs new file mode 100644 index 000000000..225d83246 --- /dev/null +++ b/core/where_clause.rs @@ -0,0 +1,528 @@ +use anyhow::Result; +use sqlite3_parser::ast::{self, JoinOperator}; + +use crate::{ + expr::{resolve_ident_qualified, resolve_ident_table, translate_expr}, + function::SingleRowFunc, + select::Select, + vdbe::{BranchOffset, Insn, ProgramBuilder}, +}; + +const HARDCODED_CURSOR_LEFT_TABLE: usize = 0; +const HARDCODED_CURSOR_RIGHT_TABLE: usize = 1; + +#[derive(Debug)] +pub struct Where { + pub constraint_expr: ast::Expr, + pub no_match_jump_label: BranchOffset, + pub no_match_target_cursor: usize, +} + +#[derive(Debug)] +pub struct Join { + pub constraint_expr: ast::Expr, + pub no_match_jump_label: BranchOffset, + pub no_match_target_cursor: usize, +} + +#[derive(Debug)] +pub struct Left { + pub where_clause: Option, + pub join_clause: Option, + pub match_flag: usize, + pub match_flag_hit_marker: BranchOffset, + pub found_match_next_row_label: BranchOffset, + pub left_cursor: usize, + pub right_cursor: usize, +} + +#[derive(Debug)] +pub struct Inner { + pub where_clause: Option, + pub join_clause: Option, +} + +pub enum QueryConstraint { + Left(Left), + Inner(Inner), +} + +pub fn translate_where( + select: &Select, + program: &mut ProgramBuilder, +) -> Result> { + if let Some(w) = &select.where_clause { + let label = program.allocate_label(); + translate_condition_expr(program, select, w, label, false)?; + Ok(Some(label)) + } else { + Ok(None) + } +} + +pub fn evaluate_conditions( + program: &mut ProgramBuilder, + select: &Select, +) -> Result> { + let join_constraints = select + .src_tables + .iter() + .map(|v| v.join_info) + .filter_map(|v| v.map(|v| (v.constraint.clone(), v.operator))) + .collect::>(); + // TODO: only supports one JOIN; -> add support for multiple JOINs, e.g. SELECT * FROM a JOIN b ON a.id = b.id JOIN c ON b.id = c.id + if join_constraints.len() > 1 { + anyhow::bail!("Parse error: multiple JOINs not supported"); + } + + let join_maybe = join_constraints.first(); + + let parsed_where_maybe = select.where_clause.as_ref().map(|where_clause| Where { + constraint_expr: where_clause.clone(), + no_match_jump_label: program.allocate_label(), + no_match_target_cursor: get_no_match_target_cursor(program, select, where_clause), + }); + + let parsed_join_maybe = join_maybe.and_then(|(constraint, _)| { + if let Some(ast::JoinConstraint::On(expr)) = constraint { + Some(Join { + constraint_expr: expr.clone(), + no_match_jump_label: program.allocate_label(), + no_match_target_cursor: get_no_match_target_cursor(program, select, expr), + }) + } else { + None + } + }); + + let constraint_maybe = match (parsed_where_maybe, parsed_join_maybe) { + (None, None) => None, + (Some(where_clause), None) => Some(QueryConstraint::Inner(Inner { + where_clause: Some(where_clause), + join_clause: None, + })), + (where_clause, Some(join_clause)) => { + let (_, op) = join_maybe.unwrap(); + match op { + JoinOperator::TypedJoin { natural, join_type } => { + if *natural { + todo!("Natural join not supported"); + } + // default to inner join when no join type is specified + let join_type = join_type.unwrap_or(ast::JoinType::Inner); + match join_type { + ast::JoinType::Inner | ast::JoinType::Cross => { + // cross join with a condition is an inner join + Some(QueryConstraint::Inner(Inner { + where_clause, + join_clause: Some(join_clause), + })) + } + ast::JoinType::LeftOuter | ast::JoinType::Left => { + let left_join_match_flag = program.alloc_register(); + let left_join_match_flag_hit_marker = program.allocate_label(); + let left_join_found_match_next_row_label = program.allocate_label(); + + Some(QueryConstraint::Left(Left { + where_clause, + join_clause: Some(join_clause), + found_match_next_row_label: left_join_found_match_next_row_label, + match_flag: left_join_match_flag, + match_flag_hit_marker: left_join_match_flag_hit_marker, + left_cursor: HARDCODED_CURSOR_LEFT_TABLE, // FIXME: hardcoded + right_cursor: HARDCODED_CURSOR_RIGHT_TABLE, // FIXME: hardcoded + })) + } + ast::JoinType::RightOuter | ast::JoinType::Right => { + todo!(); + } + ast::JoinType::FullOuter | ast::JoinType::Full => { + todo!(); + } + } + } + JoinOperator::Comma => { + todo!(); + } + } + } + }; + + Ok(constraint_maybe) +} + +pub fn translate_conditions( + program: &mut ProgramBuilder, + select: &Select, + conditions: Option, +) -> Result> { + match conditions.as_ref() { + Some(QueryConstraint::Left(Left { + where_clause, + join_clause, + match_flag, + match_flag_hit_marker, + .. + })) => { + if let Some(where_clause) = where_clause { + translate_condition_expr( + program, + select, + &where_clause.constraint_expr, + where_clause.no_match_jump_label, + false, + )?; + } + if let Some(join_clause) = join_clause { + translate_condition_expr( + program, + select, + &join_clause.constraint_expr, + join_clause.no_match_jump_label, + false, + )?; + } + // Set match flag to 1 if we hit the marker (i.e. jump didn't happen to no_match_label as a result of the condition) + program.emit_insn(Insn::Integer { + value: 1, + dest: *match_flag, + }); + program.defer_label_resolution(*match_flag_hit_marker, (program.offset() - 1) as usize); + } + Some(QueryConstraint::Inner(inner_join)) => { + if let Some(where_clause) = &inner_join.where_clause { + translate_condition_expr( + program, + select, + &where_clause.constraint_expr, + where_clause.no_match_jump_label, + false, + )?; + } + if let Some(join_clause) = &inner_join.join_clause { + translate_condition_expr( + program, + select, + &join_clause.constraint_expr, + join_clause.no_match_jump_label, + false, + )?; + } + } + None => {} + } + + Ok(conditions) +} + +fn translate_condition_expr( + program: &mut ProgramBuilder, + select: &Select, + expr: &ast::Expr, + target_jump: BranchOffset, + jump_if_true: bool, // if true jump to target on op == true, if false invert op +) -> Result<()> { + match expr { + ast::Expr::Between { .. } => todo!(), + ast::Expr::Binary(lhs, ast::Operator::And, rhs) => { + if jump_if_true { + let label = program.allocate_label(); + let _ = translate_condition_expr(program, select, lhs, label, false); + let _ = translate_condition_expr(program, select, rhs, target_jump, true); + program.resolve_label(label, program.offset()); + } else { + let _ = translate_condition_expr(program, select, lhs, target_jump, false); + let _ = translate_condition_expr(program, select, rhs, target_jump, false); + } + } + ast::Expr::Binary(lhs, ast::Operator::Or, rhs) => { + if jump_if_true { + let _ = translate_condition_expr(program, select, lhs, target_jump, true); + let _ = translate_condition_expr(program, select, rhs, target_jump, true); + } else { + let label = program.allocate_label(); + let _ = translate_condition_expr(program, select, lhs, label, true); + let _ = translate_condition_expr(program, select, rhs, target_jump, false); + program.resolve_label(label, program.offset()); + } + } + ast::Expr::Binary(lhs, op, rhs) => { + let lhs_reg = program.alloc_register(); + let rhs_reg = program.alloc_register(); + let _ = translate_expr(program, select, lhs, lhs_reg); + match lhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + let _ = translate_expr(program, select, rhs, rhs_reg); + match rhs.as_ref() { + ast::Expr::Literal(_) => program.mark_last_insn_constant(), + _ => {} + } + match op { + ast::Operator::Greater => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::GreaterEquals => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::Less => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Lt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ge { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::LessEquals => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Le { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Gt { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::Equals => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::NotEquals => { + if jump_if_true { + program.emit_insn_with_label_dependency( + Insn::Ne { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::Eq { + lhs: lhs_reg, + rhs: rhs_reg, + target_pc: target_jump, + }, + target_jump, + ) + } + } + ast::Operator::Is => todo!(), + ast::Operator::IsNot => todo!(), + _ => { + todo!("op {:?} not implemented", op); + } + } + } + ast::Expr::Literal(lit) => match lit { + ast::Literal::Numeric(val) => { + let maybe_int = val.parse::(); + if let Ok(int_value) = maybe_int { + let reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: int_value, + dest: reg, + }); + if target_jump < 0 { + program.add_label_dependency(target_jump, program.offset()); + } + program.emit_insn(Insn::IfNot { + reg, + target_pc: target_jump, + null_reg: reg, + }); + } else { + anyhow::bail!("Parse error: unsupported literal type in condition"); + } + } + _ => todo!(), + }, + ast::Expr::InList { lhs, not, rhs } => {} + ast::Expr::Like { + lhs, + not, + op, + rhs, + escape, + } => { + let cur_reg = program.alloc_register(); + assert!(match rhs.as_ref() { + ast::Expr::Literal(_) => true, + _ => false, + }); + match op { + ast::LikeOperator::Like => { + let pattern_reg = program.alloc_register(); + let column_reg = program.alloc_register(); + // LIKE(pattern, column). We should translate the pattern first before the column + let _ = translate_expr(program, select, rhs, pattern_reg)?; + program.mark_last_insn_constant(); + let _ = translate_expr(program, select, lhs, column_reg)?; + program.emit_insn(Insn::Function { + func: SingleRowFunc::Like, + start_reg: pattern_reg, + dest: cur_reg, + }); + } + ast::LikeOperator::Glob => todo!(), + ast::LikeOperator::Match => todo!(), + ast::LikeOperator::Regexp => todo!(), + } + if jump_if_true ^ *not { + program.emit_insn_with_label_dependency( + Insn::If { + reg: cur_reg, + target_pc: target_jump, + null_reg: cur_reg, + }, + target_jump, + ) + } else { + program.emit_insn_with_label_dependency( + Insn::IfNot { + reg: cur_reg, + target_pc: target_jump, + null_reg: cur_reg, + }, + target_jump, + ) + } + } + _ => todo!("op {:?} not implemented", expr), + } + Ok(()) +} + +fn introspect_expression_for_cursors( + program: &ProgramBuilder, + select: &Select, + where_expr: &ast::Expr, +) -> Result> { + let mut cursors = vec![]; + match where_expr { + ast::Expr::Binary(e1, _, e2) => { + cursors.extend(introspect_expression_for_cursors(program, select, e1)?); + cursors.extend(introspect_expression_for_cursors(program, select, e2)?); + } + ast::Expr::Id(ident) => { + let (_, _, cursor_id) = resolve_ident_table(program, &ident.0, select)?; + cursors.push(cursor_id); + } + ast::Expr::Qualified(tbl, ident) => { + let (_, _, cursor_id) = resolve_ident_qualified(program, &tbl.0, &ident.0, select)?; + cursors.push(cursor_id); + } + ast::Expr::Literal(_) => {} + ast::Expr::Like { + lhs, + not, + op, + rhs, + escape, + } => { + cursors.extend(introspect_expression_for_cursors(program, select, lhs)?); + cursors.extend(introspect_expression_for_cursors(program, select, rhs)?); + } + other => { + anyhow::bail!("Parse error: unsupported expression: {:?}", other); + } + } + + Ok(cursors) +} + +fn get_no_match_target_cursor( + program: &ProgramBuilder, + select: &Select, + expr: &ast::Expr, +) -> usize { + // This is the hackiest part of the code. We are finding the cursor that should be advanced to the next row + // when the condition is not met. This is done by introspecting the expression and finding the innermost cursor that is + // used in the expression. This is a very naive approach and will not work in all cases. + // Thankfully though it might be possible to just refine the logic contained here to make it work in all cases. Maybe. + let cursors = introspect_expression_for_cursors(program, select, expr).unwrap_or_default(); + if cursors.is_empty() { + HARDCODED_CURSOR_LEFT_TABLE + } else { + *cursors.iter().max().unwrap() + } +}