diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 6b04ef462..7b037ffb6 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -1,6 +1,6 @@ use std::num::NonZero; -use super::{AggFunc, BranchOffset, CursorID, FuncCtx, PageIdx}; +use super::{cast_text_to_numeric, AggFunc, BranchOffset, CursorID, FuncCtx, PageIdx}; use crate::storage::wal::CheckpointMode; use crate::types::{OwnedValue, Record}; use limbo_macros::Description; @@ -688,15 +688,9 @@ pub enum Cookie { UserVersion = 6, } -fn cast_text_to_numerical(value: &str) -> OwnedValue { - if let Ok(x) = value.parse::() { - OwnedValue::Integer(x) - } else if let Ok(x) = value.parse::() { - OwnedValue::Float(x) - } else { - OwnedValue::Integer(0) - } -} +// fn cast_text_to_numeric(value: &str) -> OwnedValue { +// cast_text_to_numeric(value) +// } pub fn exec_add(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { if let OwnedValue::Agg(agg) = lhs { @@ -719,11 +713,11 @@ pub fn exec_add(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { | (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64), (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_add(&cast_text_to_numerical(text.as_str()), other) + exec_add(&cast_text_to_numeric(text.as_str()), other) } _ => todo!(), } @@ -750,14 +744,14 @@ pub fn exec_subtract(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs), (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) => { - exec_subtract(&cast_text_to_numerical(text.as_str()), other) + exec_subtract(&cast_text_to_numeric(text.as_str()), other) } (other, OwnedValue::Text(text)) => { - exec_subtract(other, &cast_text_to_numerical(text.as_str())) + exec_subtract(other, &cast_text_to_numeric(text.as_str())) } _ => todo!(), } @@ -783,11 +777,11 @@ pub fn exec_multiply(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { | (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }), (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_multiply(&cast_text_to_numerical(text.as_str()), other) + exec_multiply(&cast_text_to_numeric(text.as_str()), other) } _ => todo!(), @@ -816,15 +810,11 @@ pub fn exec_divide(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { (OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs), (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), - (OwnedValue::Text(text), other) => { - exec_divide(&cast_text_to_numerical(text.as_str()), other) - } - (other, OwnedValue::Text(text)) => { - exec_divide(other, &cast_text_to_numerical(text.as_str())) - } + (OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other), + (other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())), _ => todo!(), } } @@ -849,11 +839,11 @@ pub fn exec_bit_and(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { (OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh), (OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64), (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_and(&cast_text_to_numerical(text.as_str()), other) + exec_bit_and(&cast_text_to_numeric(text.as_str()), other) } _ => todo!(), } @@ -875,11 +865,11 @@ pub fn exec_bit_or(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { OwnedValue::Integer(*lh as i64 | *rh as i64) } (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_bit_or(&cast_text_to_numerical(text.as_str()), other) + exec_bit_or(&cast_text_to_numeric(text.as_str()), other) } _ => todo!(), } @@ -939,7 +929,7 @@ pub fn exec_bit_not(mut reg: &OwnedValue) -> OwnedValue { OwnedValue::Null => OwnedValue::Null, OwnedValue::Integer(i) => OwnedValue::Integer(!i), OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)), - OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numerical(text.as_str())), + OwnedValue::Text(text) => exec_bit_not(&cast_text_to_numeric(text.as_str())), _ => todo!(), } } @@ -966,14 +956,14 @@ pub fn exec_shift_left(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64)) } (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) => { - exec_shift_left(&cast_text_to_numerical(text.as_str()), other) + exec_shift_left(&cast_text_to_numeric(text.as_str()), other) } (other, OwnedValue::Text(text)) => { - exec_shift_left(other, &cast_text_to_numerical(text.as_str())) + exec_shift_left(other, &cast_text_to_numeric(text.as_str())) } _ => todo!(), } @@ -1005,14 +995,14 @@ pub fn exec_shift_right(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValu OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64)) } (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) => { - exec_shift_right(&cast_text_to_numerical(text.as_str()), other) + exec_shift_right(&cast_text_to_numeric(text.as_str()), other) } (other, OwnedValue::Text(text)) => { - exec_shift_right(other, &cast_text_to_numerical(text.as_str())) + exec_shift_right(other, &cast_text_to_numeric(text.as_str())) } _ => todo!(), } @@ -1043,7 +1033,7 @@ pub fn exec_boolean_not(mut reg: &OwnedValue) -> OwnedValue { OwnedValue::Null => OwnedValue::Null, OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64), OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64), - OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numerical(text.as_str())), + OwnedValue::Text(text) => exec_boolean_not(&cast_text_to_numeric(text.as_str())), _ => todo!(), } } @@ -1125,11 +1115,11 @@ pub fn exec_and(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { | (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0), (OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null, (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_and(&cast_text_to_numerical(text.as_str()), other) + exec_and(&cast_text_to_numeric(text.as_str()), other) } _ => OwnedValue::Integer(1), } @@ -1154,11 +1144,11 @@ pub fn exec_or(mut lhs: &OwnedValue, mut rhs: &OwnedValue) -> OwnedValue { | (OwnedValue::Float(0.0), OwnedValue::Float(0.0)) | (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0), (OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or( - &cast_text_to_numerical(lhs.as_str()), - &cast_text_to_numerical(rhs.as_str()), + &cast_text_to_numeric(lhs.as_str()), + &cast_text_to_numeric(rhs.as_str()), ), (OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => { - exec_or(&cast_text_to_numerical(text.as_str()), other) + exec_or(&cast_text_to_numeric(text.as_str()), other) } _ => OwnedValue::Integer(1), } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a07145dc9..5c16c7f09 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -403,6 +403,28 @@ macro_rules! must_be_btree_cursor { }}; } +/// Reference: +/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L798 +enum CastTextToIntResultCode { + NotInt = -1, + Success = 0, + ExcessSpace = 1, + #[allow(dead_code)] + TooLargeOrMalformed = 2, + #[allow(dead_code)] + SpecialCase = 3, +} + +/// Reference +/// https://github.com/sqlite/sqlite/blob/master/src/util.c#L529 +enum CastTextToRealResultCode { + PureInt = 1, + HasDecimal = 2, + NotValid = 0, + #[allow(dead_code)] + NotValidButPrefix = -1, +} + #[derive(Debug)] pub struct Program { pub max_registers: usize, @@ -3544,9 +3566,9 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { OwnedValue::Blob(b) => { // Convert BLOB to TEXT first let text = String::from_utf8_lossy(b); - cast_text_to_real(&text) + cast_text_to_real(&text).0 } - OwnedValue::Text(t) => cast_text_to_real(t.as_str()), + OwnedValue::Text(t) => cast_text_to_real(t.as_str()).0, OwnedValue::Integer(i) => OwnedValue::Float(*i as f64), OwnedValue::Float(f) => OwnedValue::Float(*f), _ => OwnedValue::Float(0.0), @@ -3555,9 +3577,9 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { OwnedValue::Blob(b) => { // Convert BLOB to TEXT first let text = String::from_utf8_lossy(b); - cast_text_to_integer(&text) + cast_text_to_integer(&text).0 } - OwnedValue::Text(t) => cast_text_to_integer(t.as_str()), + OwnedValue::Text(t) => cast_text_to_integer(t.as_str()).0, OwnedValue::Integer(i) => OwnedValue::Integer(*i), // A cast of a REAL value into an INTEGER results in the integer between the REAL value and zero // that is closest to the REAL value. If a REAL is greater than the greatest possible signed integer (+9223372036854775807) @@ -3629,48 +3651,68 @@ fn exec_replace(source: &OwnedValue, pattern: &OwnedValue, replacement: &OwnedVa /// When casting to INTEGER, if the text looks like a floating point value with an exponent, the exponent will be ignored /// because it is no part of the integer prefix. For example, "CAST('123e+5' AS INTEGER)" results in 123, not in 12300000. /// The CAST operator understands decimal integers only — conversion of hexadecimal integers stops at the "x" in the "0x" prefix of the hexadecimal integer string and thus result of the CAST is always zero. -fn cast_text_to_integer(text: &str) -> OwnedValue { +fn cast_text_to_integer(text: &str) -> (OwnedValue, CastTextToIntResultCode) { let text = text.trim(); if text.is_empty() { - return OwnedValue::Integer(0); + return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt); } if let Ok(i) = text.parse::() { - return OwnedValue::Integer(i); + // Compare if the text value has more characters that the number of digits + the sign in the parsed int + if i.to_string().len() < text.len() { + // Means it was probably casted from a real or some malformed number. + return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace); + } + + return (OwnedValue::Integer(i), CastTextToIntResultCode::Success); } // Try to find longest valid prefix that parses as an integer // TODO: inefficient let mut end_index = text.len().saturating_sub(1) as isize; while end_index >= 0 { if let Ok(i) = text[..=end_index as usize].parse::() { - return OwnedValue::Integer(i); + // Compare if the text value has more characters that the number of digits + the sign in the parsed int + if i.to_string().len() < text.len() { + // Means it was probably casted from a real or some malformed number. + return (OwnedValue::Integer(i), CastTextToIntResultCode::ExcessSpace); + } + + return (OwnedValue::Integer(i), CastTextToIntResultCode::Success); } end_index -= 1; } - OwnedValue::Integer(0) + return (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt); } /// When casting a TEXT value to REAL, the longest possible prefix of the value that can be interpreted /// as a real number is extracted from the TEXT value and the remainder ignored. Any leading spaces in /// the TEXT value are ignored when converging from TEXT to REAL. /// If there is no prefix that can be interpreted as a real number, the result of the conversion is 0.0. -fn cast_text_to_real(text: &str) -> OwnedValue { +fn cast_text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { let trimmed = text.trim_start(); if trimmed.is_empty() { - return OwnedValue::Float(0.0); + return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); } if let Ok(num) = trimmed.parse::() { - return OwnedValue::Float(num); + if num.fract() == 0.0 { + return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); + } + + return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal); } // Try to find longest valid prefix that parses as a float // TODO: inefficient let mut end_index = trimmed.len().saturating_sub(1) as isize; while end_index >= 0 { if let Ok(num) = trimmed[..=end_index as usize].parse::() { - return OwnedValue::Float(num); + if num.fract() == 0.0 { + return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); + } + + return (OwnedValue::Float(num), CastTextToRealResultCode::HasDecimal); } end_index -= 1; } - OwnedValue::Float(0.0) + return (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid); } /// NUMERIC Casting a TEXT or BLOB value into NUMERIC yields either an INTEGER or a REAL result. @@ -3700,9 +3742,27 @@ fn checked_cast_text_to_numeric(text: &str) -> std::result::Result OwnedValue { - checked_cast_text_to_numeric(text).unwrap_or(OwnedValue::Integer(0)) + let (real_cast, rc_real) = cast_text_to_real(text); + let (int_cast, rc_int) = cast_text_to_integer(text); + match (rc_real, rc_int) { + ( + CastTextToRealResultCode::NotValid, + CastTextToIntResultCode::ExcessSpace + | CastTextToIntResultCode::Success + | CastTextToIntResultCode::NotInt, + ) => int_cast, + (CastTextToRealResultCode::NotValidButPrefix, _) => real_cast, + ( + CastTextToRealResultCode::NotValid, + CastTextToIntResultCode::TooLargeOrMalformed | CastTextToIntResultCode::SpecialCase, + ) => real_cast, + (CastTextToRealResultCode::PureInt, CastTextToIntResultCode::Success) => int_cast, + // CastTextToRealResultCode::NotValid => (), + _ => real_cast, + } } // Check if float can be losslessly converted to 51-bit integer