diff --git a/core/util.rs b/core/util.rs index 2ee09189d..7a258c6e3 100644 --- a/core/util.rs +++ b/core/util.rs @@ -1,8 +1,10 @@ +use core::num::IntErrorKind; use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal}; use std::{rc::Rc, sync::Arc}; use crate::{ schema::{self, Column, Schema, Type}, + types::OwnedValue, Result, Statement, StepResult, IO, }; @@ -380,6 +382,147 @@ pub fn columns_from_create_table_body(body: ast::CreateTableBody) -> Result>()) } +#[derive(Debug, PartialEq)] +/// Reference: +/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L798 +pub enum CastTextToIntResultCode { + NotInt = -1, + Success = 0, + ExcessSpace = 1, + TooLargeOrMalformed = 2, + #[allow(dead_code)] + SpecialCase = 3, +} + +pub fn text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) { + let text = text.trim(); + if text.is_empty() { + return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt); + } + let mut accum = String::new(); + let mut sign = false; + let mut has_digit = false; + let mut excess_space = false; + + let chars = text.chars(); + + for c in chars { + match c { + '0'..='9' => { + has_digit = true; + accum.push(c); + } + '+' | '-' if !has_digit && !sign => { + sign = true; + accum.push(c); + } + _ => { + excess_space = true; + break; + } + } + } + + match accum.parse::() { + Ok(num) => { + if excess_space { + return ( + OwnedValue::Integer(num), + CastTextToIntResultCode::ExcessSpace, + ); + } + + return (OwnedValue::Integer(num), CastTextToIntResultCode::Success); + } + Err(e) => match e.kind() { + IntErrorKind::NegOverflow | IntErrorKind::PosOverflow => ( + OwnedValue::Integer(0), + CastTextToIntResultCode::TooLargeOrMalformed, + ), + _ => (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt), + }, + } +} + +#[derive(Debug, PartialEq)] +/// Reference +/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L529 +pub enum CastTextToRealResultCode { + PureInt = 1, + HasDecimal = 2, + NotValid = 0, + NotValidButPrefix = -1, +} + +pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { + let text = text.trim(); + if text.is_empty() { + return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); + } + let mut accum = String::new(); + let mut has_decimal_separator = false; + let mut sign = false; + let mut exp_sign = false; + let mut has_exponent = false; + let mut has_digit = false; + let mut has_decimal_digit = false; + let mut excess_space = false; + + let chars = text.chars(); + + for c in chars { + match c { + '0'..='9' if !has_decimal_separator => { + has_digit = true; + accum.push(c); + } + '0'..='9' => { + has_decimal_digit = true; + accum.push(c); + } + '+' | '-' if !has_digit && !sign => { + sign = true; + accum.push(c); + } + '+' | '-' if has_exponent && !exp_sign => { + exp_sign = true; + accum.push(c); + } + '.' if !has_decimal_separator => { + has_decimal_separator = true; + accum.push(c); + } + 'E' | 'e' if !has_decimal_separator || has_decimal_digit => { + has_exponent = true; + accum.push(c); + } + _ => { + excess_space = true; + break; + } + } + } + + if let Ok(num) = accum.parse::() { + if !has_decimal_separator && !exp_sign && !has_exponent { + return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); + } + + if excess_space { + // TODO see if this branch satisfies: not a valid number, but has a valid prefix which + // includes a decimal point and/or an eNNN clause + return ( + OwnedValue::Float(num), + CastTextToRealResultCode::NotValidButPrefix, + ); + } + + return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal); + } + + return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); +} + #[cfg(test)] pub mod tests { use super::*; @@ -635,4 +778,196 @@ pub mod tests { assert!(!check_ident_equivalency("\"foo\"", "[bar]")); assert!(!check_ident_equivalency("foo", "\"bar\"")); } + + #[test] + fn test_text_to_integer() { + let pairs = vec![ + ( + text_to_integer("1"), + (OwnedValue::Integer(1), CastTextToIntResultCode::Success), + ), + ( + text_to_integer("-1"), + (OwnedValue::Integer(-1), CastTextToIntResultCode::Success), + ), + ( + text_to_integer("10000000"), + ( + OwnedValue::Integer(10000000), + CastTextToIntResultCode::Success, + ), + ), + ( + text_to_integer("-10000000"), + ( + OwnedValue::Integer(-10000000), + CastTextToIntResultCode::Success, + ), + ), + ( + text_to_integer("xxx"), + (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt), + ), + ( + text_to_integer("123xxx"), + ( + OwnedValue::Integer(123), + CastTextToIntResultCode::ExcessSpace, + ), + ), + ( + text_to_integer("9223372036854775807"), + ( + OwnedValue::Integer(i64::MAX), + CastTextToIntResultCode::Success, + ), + ), + ( + text_to_integer("9223372036854775808"), + ( + OwnedValue::Integer(0), + CastTextToIntResultCode::TooLargeOrMalformed, + ), + ), + ( + text_to_integer("-9223372036854775808"), + ( + OwnedValue::Integer(i64::MIN), + CastTextToIntResultCode::Success, + ), + ), + ( + text_to_integer("-9223372036854775809"), + ( + OwnedValue::Integer(0), + CastTextToIntResultCode::TooLargeOrMalformed, + ), + ), + ]; + + for (left, right) in pairs { + assert_eq!(left, right); + } + } + + #[test] + fn test_text_to_real() { + let pairs = vec![ + ( + text_to_real("1"), + (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt), + ), + ( + text_to_real("-1"), + (OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt), + ), + ( + text_to_real("1.0"), + (OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal), + ), + ( + text_to_real("-1.0"), + ( + OwnedValue::Float(-1.0), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1e10"), + ( + OwnedValue::Float(1e10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1e10"), + ( + OwnedValue::Float(-1e10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1e-10"), + ( + OwnedValue::Float(1e-10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1e-10"), + ( + OwnedValue::Float(-1e-10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1.123e10"), + ( + OwnedValue::Float(1.123e10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1.123e10"), + ( + OwnedValue::Float(-1.123e10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1.123e-10"), + ( + OwnedValue::Float(1.123e-10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1.123e-10"), + ( + OwnedValue::Float(-1.123e-10), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1-282584294928"), + (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt), + ), + ( + text_to_real("xxx"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid), + ), + ( + text_to_real("1.7976931348623157e308"), + ( + OwnedValue::Float(f64::MAX), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("1.7976931348623157e309"), + ( + OwnedValue::Float(f64::INFINITY), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1.7976931348623157e308"), + ( + OwnedValue::Float(f64::MIN), + CastTextToRealResultCode::HasDecimal, + ), + ), + ( + text_to_real("-1.7976931348623157e309"), + ( + OwnedValue::Float(f64::NEG_INFINITY), + CastTextToRealResultCode::HasDecimal, + ), + ), + ]; + + for (left, right) in pairs { + assert_eq!(left, right); + } + } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 14214b0f0..25f7cb6ac 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -41,7 +41,10 @@ use crate::translate::plan::{ResultSetColumn, TableReference}; use crate::types::{ AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, Record, SeekKey, SeekOp, }; -use crate::util::parse_schema_rows; +use crate::util::{ + parse_schema_rows, text_to_integer, text_to_real, CastTextToIntResultCode, + CastTextToRealResultCode, +}; use crate::vdbe::builder::CursorType; use crate::vdbe::insn::Insn; use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; @@ -403,28 +406,6 @@ macro_rules! must_be_btree_cursor { }}; } -/// Reference: -/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L798 -enum CastTextToIntResultCode { - NotInt = -1, - Success = 0, - ExcessSpace = 1, - #[allow(dead_code)] - TooLargeOrMalformed = 2, - #[allow(dead_code)] - SpecialCase = 3, -} - -/// Reference -/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L529 -enum CastTextToRealResultCode { - PureInt = 1, - HasDecimal = 2, - NotValid = 0, - #[allow(dead_code)] - NotValidButPrefix = -1, -} - #[derive(Debug)] pub struct Program { pub max_registers: usize, @@ -3652,35 +3633,7 @@ fn exec_replace(source: &OwnedValue, pattern: &OwnedValue, replacement: &OwnedVa /// because it is no part of the integer prefix. For example, "CAST('123e+5' AS INTEGER)" results in 123, not in 12300000. /// The CAST operator understands decimal integers only — conversion of hexadecimal integers stops at the "x" in the "0x" prefix of the hexadecimal integer string and thus result of the CAST is always zero. fn cast_text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) { - let text = text.trim(); - if text.is_empty() { - return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt); - } - if let Ok(i) = text.parse::() { - // Compare if the text value has more characters that the number of digits + the sign in the parsed int - if i.to_string().len() < text.len() { - // Means it was probably casted from a real or some malformed number. - return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace); - } - - return (OwnedValue::Integer(i), CastTextToIntResultCode::Success); - } - // Try to find longest valid prefix that parses as an integer - // TODO: inefficient - let mut end_index = text.len().saturating_sub(1) as isize; - while end_index >= 0 { - if let Ok(i) = text[..=end_index as usize].parse::() { - // Compare if the text value has more characters that the number of digits + the sign in the parsed int - if i.to_string().len() < text.len() { - // Means it was probably casted from a real or some malformed number. - return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace); - } - - return (OwnedValue::Integer(i), CastTextToIntResultCode::Success); - } - end_index -= 1; - } - return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt); + text_to_integer(text) } /// When casting a TEXT value to REAL, the longest possible prefix of the value that can be interpreted @@ -3688,31 +3641,7 @@ fn cast_text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) { /// the TEXT value are ignored when converging from TEXT to REAL. /// If there is no prefix that can be interpreted as a real number, the result of the conversion is 0.0. fn cast_text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { - let trimmed = text.trim_start(); - if trimmed.is_empty() { - return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); - } - if let Ok(num) = trimmed.parse::() { - if num.fract() == 0.0 { - return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); - } - - return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal); - } - // Try to find longest valid prefix that parses as a float - // TODO: inefficient - let mut end_index = trimmed.len().saturating_sub(1) as isize; - while end_index >= 0 { - if let Ok(num) = trimmed[..=end_index as usize].parse::() { - if num.fract() == 0.0 { - return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); - } - - return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal); - } - end_index -= 1; - } - return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); + text_to_real(text) } /// NUMERIC Casting a TEXT or BLOB value into NUMERIC yields either an INTEGER or a REAL result. @@ -3747,6 +3676,7 @@ fn checked_cast_text_to_numeric(text: &str) -> std::result::Result OwnedValue { let (real_cast, rc_real) = cast_text_to_real(text); let (int_cast, rc_int) = cast_text_to_integer(text); + match (rc_real, rc_int) { ( CastTextToRealResultCode::NotValid,