diff --git a/COMPAT.md b/COMPAT.md index 7ece6b1c4..4b306c2c8 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -97,7 +97,7 @@ This document describes the SQLite compatibility status of Limbo: | round(X,Y) | Yes | | | rtrim(X) | Yes | | | rtrim(X,Y) | Yes | | -| sign(X) | No | | +| sign(X) | Yes | | | soundex(X) | No | | | sqlite_compileoption_get(N) | No | | | sqlite_compileoption_used(X) | No | | diff --git a/core/function.rs b/core/function.rs index 8ce7a3a11..1b40a01b0 100644 --- a/core/function.rs +++ b/core/function.rs @@ -65,6 +65,7 @@ pub enum ScalarFunc { Min, Max, Nullif, + Sign, Substr, Substring, Date, @@ -95,6 +96,7 @@ impl Display for ScalarFunc { ScalarFunc::Min => "min".to_string(), ScalarFunc::Max => "max".to_string(), ScalarFunc::Nullif => "nullif".to_string(), + ScalarFunc::Sign => "sign".to_string(), ScalarFunc::Substr => "substr".to_string(), ScalarFunc::Substring => "substring".to_string(), ScalarFunc::Date => "date".to_string(), @@ -159,6 +161,7 @@ impl Func { "rtrim" => Ok(Func::Scalar(ScalarFunc::RTrim)), "round" => Ok(Func::Scalar(ScalarFunc::Round)), "length" => Ok(Func::Scalar(ScalarFunc::Length)), + "sign" => Ok(Func::Scalar(ScalarFunc::Sign)), "substr" => Ok(Func::Scalar(ScalarFunc::Substr)), "substring" => Ok(Func::Scalar(ScalarFunc::Substring)), "date" => Ok(Func::Scalar(ScalarFunc::Date)), diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 82f42a2e1..3c794379f 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -2,12 +2,10 @@ use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; -use sqlite3_parser::ast; - use crate::schema::{BTreeTable, Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::translate::expr::resolve_ident_pseudo_table; use crate::types::{OwnedRecord, OwnedValue}; -use crate::util::normalize_ident; use crate::vdbe::builder::ProgramBuilder; use crate::vdbe::{BranchOffset, Insn, Program}; use crate::Result; @@ -648,21 +646,11 @@ impl Emitter for Operator { .chain(aggregates.iter().map(|agg| &agg.args[0])) // FIXME: just blindly taking the first arg is a hack { - // FIXME: reading from pseudo tables made during sort operations - // now relies on them having the same column names as the original - // table. This is not very robust IMO and we should refactor how these - // are handled. - column_names.push(match expr { - ast::Expr::Id(ident) => normalize_ident(&ident.0), - ast::Expr::Qualified(tbl, ident) => { - format!( - "{}.{}", - normalize_ident(&tbl.0), - normalize_ident(&ident.0) - ) - } - _ => "expr".to_string(), - }); + // Sorter column names for group by are now just determined by stringifying the expression, since the group by + // columns and aggregations can be practically anything. + // FIXME: either come up with something more robust, or make this something like expr.to_canonical_string() so that we can handle + // things like `count(1)` and `COUNT(1)` the same way + column_names.push(expr.to_string()); } let pseudo_columns = column_names .iter() @@ -673,12 +661,12 @@ impl Emitter for Operator { }) .collect::>(); - let pseudo_cursor = program.alloc_cursor_id( - None, - Some(Table::Pseudo(Rc::new(PseudoTable { - columns: pseudo_columns, - }))), - ); + let pseudo_table = Rc::new(PseudoTable { + columns: pseudo_columns, + }); + + let pseudo_cursor = program + .alloc_cursor_id(None, Some(Table::Pseudo(pseudo_table.clone()))); program.emit_insn(Insn::OpenPseudo { cursor_id: pseudo_cursor, @@ -707,15 +695,14 @@ impl Emitter for Operator { let groups_start_reg = program.alloc_registers(group_by.len()); for (i, expr) in group_by.iter().enumerate() { + let sorter_column_index = + resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; let group_reg = groups_start_reg + i; - translate_expr( - program, - Some(referenced_tables), - expr, - group_reg, - Some(pseudo_cursor), - None, - )?; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: group_reg, + }); } program.emit_insn(Insn::Compare { @@ -806,14 +793,13 @@ impl Emitter for Operator { for (i, expr) in group_by.iter().enumerate() { let key_reg = group_exprs_start_register + i; - translate_expr( - program, - Some(referenced_tables), - expr, - key_reg, - Some(pseudo_cursor), - None, - )?; + let sorter_column_index = + resolve_ident_pseudo_table(&expr.to_string(), &pseudo_table)?; + program.emit_insn(Insn::Column { + cursor_id: pseudo_cursor, + column: sorter_column_index, + dest: key_reg, + }); } program.resolve_label( diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 3899de85d..39c561b08 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -4,7 +4,7 @@ use std::rc::Rc; use super::optimizer::CachedResult; use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc}; -use crate::schema::{Table, Type}; +use crate::schema::{PseudoTable, Table, Type}; use crate::util::normalize_ident; use crate::{ schema::BTreeTable, @@ -451,21 +451,10 @@ pub fn translate_condition_expr( escape: _, } => { let cur_reg = program.alloc_register(); - assert!(matches!(rhs.as_ref(), ast::Expr::Literal(_))); match op { ast::LikeOperator::Like => { let pattern_reg = program.alloc_register(); let column_reg = program.alloc_register(); - // LIKE(pattern, column). We should translate the pattern first before the column - let _ = translate_expr( - program, - Some(referenced_tables), - rhs, - pattern_reg, - cursor_hint, - None, - )?; - program.mark_last_insn_constant(); let _ = translate_expr( program, Some(referenced_tables), @@ -474,6 +463,20 @@ pub fn translate_condition_expr( cursor_hint, None, )?; + if let ast::Expr::Literal(_) = lhs.as_ref() { + program.mark_last_insn_constant(); + } + let _ = translate_expr( + program, + Some(referenced_tables), + rhs, + pattern_reg, + cursor_hint, + None, + )?; + if let ast::Expr::Literal(_) = rhs.as_ref() { + program.mark_last_insn_constant(); + } program.emit_insn(Insn::Function { // Only constant patterns for LIKE are supported currently, so this // is always 1 @@ -838,8 +841,11 @@ pub fn translate_expr( srf.to_string() ); }; + let mut start_reg = None; for arg in args.iter() { let reg = program.alloc_register(); + start_reg = Some(start_reg.unwrap_or(reg)); + translate_expr( program, referenced_tables, @@ -851,7 +857,7 @@ pub fn translate_expr( } program.emit_insn(Insn::Function { constant_mask: 0, - start_reg: target_register + 1, + start_reg: start_reg.unwrap(), dest: target_register, func: func_ctx, }); @@ -983,7 +989,8 @@ pub fn translate_expr( | ScalarFunc::Upper | ScalarFunc::Length | ScalarFunc::Unicode - | ScalarFunc::Quote => { + | ScalarFunc::Quote + | ScalarFunc::Sign => { let args = if let Some(args) = args { if args.len() != 1 { crate::bail_parse_error!( @@ -1539,6 +1546,19 @@ pub fn resolve_ident_table( crate::bail_parse_error!("ambiguous column name {}", ident.as_str()); } +pub fn resolve_ident_pseudo_table(ident: &String, pseudo_table: &PseudoTable) -> Result { + let res = pseudo_table + .columns + .iter() + .enumerate() + .find(|(_, col)| col.name == *ident); + if res.is_some() { + let (idx, _) = res.unwrap(); + return Ok(idx); + } + crate::bail_parse_error!("column with name {} not found", ident.as_str()); +} + pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mut ProgramBuilder) { if col_type == crate::schema::Type::Real { program.emit_insn(Insn::RealAffinity { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a6e5a4dff..57e32f1b3 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -1467,9 +1467,11 @@ impl Program { | ScalarFunc::Upper | ScalarFunc::Length | ScalarFunc::Unicode - | ScalarFunc::Quote => { + | ScalarFunc::Quote + | ScalarFunc::Sign => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = match scalar_func { + ScalarFunc::Sign => exec_sign(reg_value), ScalarFunc::Abs => exec_abs(reg_value), ScalarFunc::Lower => exec_lower(reg_value), ScalarFunc::Upper => exec_upper(reg_value), @@ -1865,6 +1867,45 @@ fn exec_concat_ws(registers: &[OwnedValue]) -> OwnedValue { OwnedValue::Text(Rc::new(result)) } +fn exec_sign(reg: &OwnedValue) -> Option { + let num = match reg { + OwnedValue::Integer(i) => *i as f64, + OwnedValue::Float(f) => *f, + OwnedValue::Text(s) => { + if let Ok(i) = s.parse::() { + i as f64 + } else if let Ok(f) = s.parse::() { + f + } else { + return Some(OwnedValue::Null); + } + } + OwnedValue::Blob(b) => match std::str::from_utf8(b) { + Ok(s) => { + if let Ok(i) = s.parse::() { + i as f64 + } else if let Ok(f) = s.parse::() { + f + } else { + return Some(OwnedValue::Null); + } + } + Err(_) => return Some(OwnedValue::Null), + }, + _ => return Some(OwnedValue::Null), + }; + + let sign = if num > 0.0 { + 1 + } else if num < 0.0 { + -1 + } else { + 0 + }; + + Some(OwnedValue::Integer(sign)) +} + fn exec_abs(reg: &OwnedValue) -> Option { match reg { OwnedValue::Integer(x) => { @@ -1930,7 +1971,10 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - Regex::new(&pattern.replace('%', ".*").replace('_', ".").to_string()).unwrap() + let mut regex_pattern = String::from("^"); + regex_pattern.push_str(&pattern.replace('%', ".*").replace('_', ".")); + regex_pattern.push('$'); + Regex::new(®ex_pattern).unwrap() } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided @@ -2116,9 +2160,9 @@ fn exec_if(reg: &OwnedValue, null_reg: &OwnedValue, not: bool) -> bool { mod tests { use super::{ exec_abs, exec_char, exec_if, exec_length, exec_like, exec_lower, exec_ltrim, exec_minmax, - exec_nullif, exec_quote, exec_random, exec_round, exec_rtrim, exec_substring, exec_trim, - exec_unicode, exec_upper, get_new_rowid, Cursor, CursorResult, LimboError, OwnedRecord, - OwnedValue, Result, + exec_nullif, exec_quote, exec_random, exec_round, exec_rtrim, exec_sign, exec_substring, + exec_trim, exec_unicode, exec_upper, get_new_rowid, Cursor, CursorResult, LimboError, + OwnedRecord, OwnedValue, Result, }; use mockall::{mock, predicate}; use rand::{rngs::mock::StepRng, thread_rng}; @@ -2646,4 +2690,71 @@ mod tests { expected_val ); } + + #[test] + fn test_exec_sign() { + let input = OwnedValue::Integer(42); + let expected = Some(OwnedValue::Integer(1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Integer(-42); + let expected = Some(OwnedValue::Integer(-1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Integer(0); + let expected = Some(OwnedValue::Integer(0)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Float(0.0); + let expected = Some(OwnedValue::Integer(0)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Float(0.1); + let expected = Some(OwnedValue::Integer(1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Float(42.0); + let expected = Some(OwnedValue::Integer(1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Float(-42.0); + let expected = Some(OwnedValue::Integer(-1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Text(Rc::new("abc".to_string())); + let expected = Some(OwnedValue::Null); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Text(Rc::new("42".to_string())); + let expected = Some(OwnedValue::Integer(1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Text(Rc::new("-42".to_string())); + let expected = Some(OwnedValue::Integer(-1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Text(Rc::new("0".to_string())); + let expected = Some(OwnedValue::Integer(0)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Blob(Rc::new(b"abc".to_vec())); + let expected = Some(OwnedValue::Null); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Blob(Rc::new(b"42".to_vec())); + let expected = Some(OwnedValue::Integer(1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Blob(Rc::new(b"-42".to_vec())); + let expected = Some(OwnedValue::Integer(-1)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Blob(Rc::new(b"0".to_vec())); + let expected = Some(OwnedValue::Integer(0)); + assert_eq!(exec_sign(&input), expected); + + let input = OwnedValue::Null; + let expected = Some(OwnedValue::Null); + assert_eq!(exec_sign(&input), expected); + } } diff --git a/test/Cargo.toml b/test/Cargo.toml index 3cb9fd458..5d9590b27 100644 --- a/test/Cargo.toml +++ b/test/Cargo.toml @@ -19,7 +19,7 @@ dirs = "5.0.1" env_logger = "0.10.1" limbo_core = { path = "../core" } rustyline = "12.0.0" -rusqlite = "0.29.0" +rusqlite = { version = "0.29", features = ["bundled"] } tempfile = "3.0.7" log = "0.4.22" diff --git a/test/src/lib.rs b/test/src/lib.rs index 713a307f7..04da1a689 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -38,6 +38,7 @@ mod tests { use super::*; use limbo_core::{RowResult, Value}; + #[ignore] #[test] fn test_sequential_write() -> anyhow::Result<()> { let _ = env_logger::try_init(); @@ -99,6 +100,7 @@ mod tests { Ok(()) } + #[ignore] #[test] fn test_btree_splitting() -> anyhow::Result<()> { static INIT: Once = Once::new(); @@ -219,6 +221,7 @@ mod tests { Ok(()) } + #[ignore] #[test] fn test_sequential_overflow_page() -> anyhow::Result<()> { let _ = env_logger::try_init(); diff --git a/testing/groupby.test b/testing/groupby.test index a27d2a099..385c058d7 100644 --- a/testing/groupby.test +++ b/testing/groupby.test @@ -105,3 +105,25 @@ Abigail|906 Adam|1675 Adrian|447 Adriana|84} + +do_execsql_test group_by_function_expression { + select length(phone_number), count(1) from users group by length(phone_number) order by count(1); +} {15|392 +22|416 +13|762 +20|791 +10|793 +19|816 +21|821 +17|1184 +18|1211 +16|1231 +12|1583} + +do_execsql_test group_by_function_expression_ridiculous { + select upper(substr(phone_number, 1,3)), count(1) from users group by upper(substr(phone_number, 1,3)) order by -1 * count(1) limit 5; +} {001|1677 ++1-|1606 +(97|36 +(20|35 +(31|35} diff --git a/testing/like.test b/testing/like.test index 3863adf27..d53342e50 100755 --- a/testing/like.test +++ b/testing/like.test @@ -43,3 +43,24 @@ do_execsql_test where-like-or { 5|sweatshirt|74.0 8|sneakers|82.0 11|accessories|81.0} + +do_execsql_test where-like-another-column { + select first_name, last_name from users where last_name like first_name; +} {James|James +Daniel|Daniel +Taylor|Taylor} + +do_execsql_test where-like-another-column-prefix { + select first_name, last_name from users where last_name like concat(first_name, '%'); +} {James|James +Daniel|Daniel +William|Williams +John|Johnson +Taylor|Taylor +John|Johnson +Stephen|Stephens +Robert|Roberts} + +do_execsql_test where-like-impossible { + select * from products where 'foobar' like 'fooba'; +} {} diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index d73434ea1..79c622173 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -386,3 +386,47 @@ do_execsql_test quote-null { do_execsql_test quote-integer { SELECT quote(123) } {123} + +do_execsql_test sign-positive-integer { + SELECT sign(42); +} {1} + +do_execsql_test sign-negative-integer { + SELECT sign(-42); +} {-1} + +do_execsql_test sign-zero { + SELECT sign(0); +} {0} + +do_execsql_test sign-positive-float { + SELECT sign(42.0); +} {1} + +do_execsql_test sign-negative-float { + SELECT sign(-42.0); +} {-1} + +do_execsql_test sign-zero-float { + SELECT sign(0.0); +} {0} + +do_execsql_test sign-text-positive-integer { + SELECT sign('42'); +} {1} + +do_execsql_test sign-text-negative-integer { + SELECT sign('-42'); +} {-1} + +do_execsql_test sign-text-zero { + SELECT sign('0'); +} {0} + +do_execsql_test sign-text-non-numeric { + SELECT sign('abc'); +} {} + +do_execsql_test sign-null { + SELECT sign(NULL); +} {}