From 13639899a52a0d9de20343238462d645efa2df7a Mon Sep 17 00:00:00 2001 From: pedrocarlo Date: Thu, 20 Feb 2025 16:05:50 -0300 Subject: [PATCH] more adjustments to parser to handle edge cases --- core/util.rs | 407 ++++++++++++++++++++++++++++++----------------- core/vdbe/mod.rs | 5 +- 2 files changed, 264 insertions(+), 148 deletions(-) diff --git a/core/util.rs b/core/util.rs index 7a258c6e3..64c01e5e9 100644 --- a/core/util.rs +++ b/core/util.rs @@ -468,15 +468,16 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { let mut has_decimal_digit = false; let mut excess_space = false; - let chars = text.chars(); + let mut chars = text.chars(); - for c in chars { + 'outer: while let Some(c) = chars.next() { match c { '0'..='9' if !has_decimal_separator => { has_digit = true; accum.push(c); } '0'..='9' => { + // This pattern is used for both decimal and exponent digits has_decimal_digit = true; accum.push(c); } @@ -484,17 +485,53 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { sign = true; accum.push(c); } - '+' | '-' if has_exponent && !exp_sign => { - exp_sign = true; - accum.push(c); - } '.' if !has_decimal_separator => { - has_decimal_separator = true; - accum.push(c); + // Check if next char is a number + if let Some(ch) = chars.next() { + match ch { + '0'..='9' => { + has_decimal_separator = true; + accum.push(c); + accum.push(ch); + } + _ => { + excess_space = true; + break; + } + } + } else { + excess_space = true; + } } - 'E' | 'e' if !has_decimal_separator || has_decimal_digit => { - has_exponent = true; - accum.push(c); + 'E' | 'e' if !has_exponent && (!has_decimal_separator || has_decimal_digit) => { + // Lookahead if next char is a number or sign + let mut curr_sign = None; + loop { + if let Some(ch) = chars.next() { + match ch { + '0'..='9' => { + has_exponent = true; + accum.push(c); + if let Some(sign) = curr_sign { + exp_sign = true; + accum.push(sign); + } + accum.push(ch); + break; + } + '+' | '-' => { + curr_sign = Some(ch); + } + _ => { + excess_space = true; + break 'outer; + } + } + } else { + excess_space = true; + break 'outer; + } + } } _ => { excess_space = true; @@ -503,8 +540,18 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) { } } + dbg!( + &has_decimal_separator, + &sign, + &exp_sign, + &has_exponent, + &has_digit, + &has_decimal_digit, + &excess_space + ); + if let Ok(num) = accum.parse::() { - if !has_decimal_separator && !exp_sign && !has_exponent { + if !has_decimal_separator && !exp_sign && !has_exponent && !excess_space { return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt); } @@ -781,193 +828,261 @@ pub mod tests { #[test] fn test_text_to_integer() { - let pairs = vec![ + assert_eq!( + text_to_integer("1"), + (OwnedValue::Integer(1), CastTextToIntResultCode::Success), + ); + assert_eq!( + text_to_integer("-1"), + (OwnedValue::Integer(-1), CastTextToIntResultCode::Success), + ); + assert_eq!( + text_to_integer("10000000"), ( - text_to_integer("1"), - (OwnedValue::Integer(1), CastTextToIntResultCode::Success), + OwnedValue::Integer(10000000), + CastTextToIntResultCode::Success, ), + ); + assert_eq!( + text_to_integer("-10000000"), ( - text_to_integer("-1"), - (OwnedValue::Integer(-1), CastTextToIntResultCode::Success), + OwnedValue::Integer(-10000000), + CastTextToIntResultCode::Success, ), + ); + assert_eq!( + text_to_integer("xxx"), + (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt), + ); + assert_eq!( + text_to_integer("123xxx"), ( - text_to_integer("10000000"), - ( - OwnedValue::Integer(10000000), - CastTextToIntResultCode::Success, - ), + OwnedValue::Integer(123), + CastTextToIntResultCode::ExcessSpace, ), + ); + assert_eq!( + text_to_integer("9223372036854775807"), ( - text_to_integer("-10000000"), - ( - OwnedValue::Integer(-10000000), - CastTextToIntResultCode::Success, - ), + OwnedValue::Integer(i64::MAX), + CastTextToIntResultCode::Success, ), + ); + assert_eq!( + text_to_integer("9223372036854775808"), ( - text_to_integer("xxx"), - (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt), + OwnedValue::Integer(0), + CastTextToIntResultCode::TooLargeOrMalformed, ), + ); + assert_eq!( + text_to_integer("-9223372036854775808"), ( - text_to_integer("123xxx"), - ( - OwnedValue::Integer(123), - CastTextToIntResultCode::ExcessSpace, - ), + OwnedValue::Integer(i64::MIN), + CastTextToIntResultCode::Success, ), + ); + assert_eq!( + text_to_integer("-9223372036854775809"), ( - text_to_integer("9223372036854775807"), - ( - OwnedValue::Integer(i64::MAX), - CastTextToIntResultCode::Success, - ), + OwnedValue::Integer(0), + CastTextToIntResultCode::TooLargeOrMalformed, ), - ( - text_to_integer("9223372036854775808"), - ( - OwnedValue::Integer(0), - CastTextToIntResultCode::TooLargeOrMalformed, - ), - ), - ( - text_to_integer("-9223372036854775808"), - ( - OwnedValue::Integer(i64::MIN), - CastTextToIntResultCode::Success, - ), - ), - ( - text_to_integer("-9223372036854775809"), - ( - OwnedValue::Integer(0), - CastTextToIntResultCode::TooLargeOrMalformed, - ), - ), - ]; - - for (left, right) in pairs { - assert_eq!(left, right); - } + ); + assert_eq!( + text_to_integer("-"), + (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt,), + ); } #[test] fn test_text_to_real() { - let pairs = vec![ + assert_eq!( + text_to_real("1"), + (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt), + ); + assert_eq!( + text_to_real("-1"), + (OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt), + ); + assert_eq!( + text_to_real("1.0"), + (OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal), + ); + assert_eq!( + text_to_real("-1.0"), ( - text_to_real("1"), - (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt), + OwnedValue::Float(-1.0), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1e10"), ( - text_to_real("-1"), - (OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt), + OwnedValue::Float(1e10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1e10"), ( - text_to_real("1.0"), - (OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal), + OwnedValue::Float(-1e10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1e-10"), ( - text_to_real("-1.0"), - ( - OwnedValue::Float(-1.0), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1e-10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1e-10"), ( - text_to_real("1e10"), - ( - OwnedValue::Float(1e10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(-1e-10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1.123e10"), ( - text_to_real("-1e10"), - ( - OwnedValue::Float(-1e10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.123e10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1.123e10"), ( - text_to_real("1e-10"), - ( - OwnedValue::Float(1e-10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(-1.123e10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1.123e-10"), ( - text_to_real("-1e-10"), - ( - OwnedValue::Float(-1e-10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.123e-10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1.123e-10"), ( - text_to_real("1.123e10"), - ( - OwnedValue::Float(1.123e10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(-1.123e-10), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1-282584294928"), ( - text_to_real("-1.123e10"), - ( - OwnedValue::Float(-1.123e10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.0), + CastTextToRealResultCode::NotValidButPrefix ), + ); + assert_eq!( + text_to_real("xxx"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid), + ); + assert_eq!( + text_to_real("1.7976931348623157e308"), ( - text_to_real("1.123e-10"), - ( - OwnedValue::Float(1.123e-10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(f64::MAX), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1.7976931348623157e309"), ( - text_to_real("-1.123e-10"), - ( - OwnedValue::Float(-1.123e-10), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(f64::INFINITY), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1.7976931348623157e308"), ( - text_to_real("1-282584294928"), - (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt), + OwnedValue::Float(f64::MIN), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("-1.7976931348623157e309"), ( - text_to_real("xxx"), - (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid), + OwnedValue::Float(f64::NEG_INFINITY), + CastTextToRealResultCode::HasDecimal, ), + ); + assert_eq!( + text_to_real("1E"), ( - text_to_real("1.7976931348623157e308"), - ( - OwnedValue::Float(f64::MAX), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.0), + CastTextToRealResultCode::NotValidButPrefix, ), + ); + assert_eq!( + text_to_real("1EE"), ( - text_to_real("1.7976931348623157e309"), - ( - OwnedValue::Float(f64::INFINITY), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.0), + CastTextToRealResultCode::NotValidButPrefix, ), + ); + assert_eq!( + text_to_real("-1E"), ( - text_to_real("-1.7976931348623157e308"), - ( - OwnedValue::Float(f64::MIN), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(-1.0), + CastTextToRealResultCode::NotValidButPrefix, ), + ); + assert_eq!( + text_to_real("1."), ( - text_to_real("-1.7976931348623157e309"), - ( - OwnedValue::Float(f64::NEG_INFINITY), - CastTextToRealResultCode::HasDecimal, - ), + OwnedValue::Float(1.0), + CastTextToRealResultCode::NotValidButPrefix, ), - ]; - - for (left, right) in pairs { - assert_eq!(left, right); - } + ); + assert_eq!( + text_to_real("-1."), + ( + OwnedValue::Float(-1.0), + CastTextToRealResultCode::NotValidButPrefix, + ), + ); + assert_eq!( + text_to_real("1.23E"), + ( + OwnedValue::Float(1.23), + CastTextToRealResultCode::NotValidButPrefix, + ), + ); + assert_eq!( + text_to_real("1.23E-"), + ( + OwnedValue::Float(1.23), + CastTextToRealResultCode::NotValidButPrefix, + ), + ); + assert_eq!( + text_to_real("0"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::PureInt,), + ); + assert_eq!( + text_to_real("-0"), + (OwnedValue::Float(-0.0), CastTextToRealResultCode::PureInt,), + ); + assert_eq!( + text_to_real("-0"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::PureInt,), + ); + assert_eq!( + text_to_real("-0.0"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::HasDecimal,), + ); + assert_eq!( + text_to_real("0.0"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::HasDecimal,), + ); + assert_eq!( + text_to_real("-"), + (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid,), + ); } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 25f7cb6ac..84a98ab83 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -3676,7 +3676,7 @@ fn checked_cast_text_to_numeric(text: &str) -> std::result::Result OwnedValue { let (real_cast, rc_real) = cast_text_to_real(text); let (int_cast, rc_int) = cast_text_to_integer(text); - + dbg!(&real_cast, &rc_real, &int_cast, &rc_int); match (rc_real, rc_int) { ( CastTextToRealResultCode::NotValid, @@ -3684,12 +3684,13 @@ fn cast_text_to_numeric(text: &str) -> OwnedValue { | CastTextToIntResultCode::Success | CastTextToIntResultCode::NotInt, ) => int_cast, - (CastTextToRealResultCode::NotValidButPrefix, _) => real_cast, ( CastTextToRealResultCode::NotValid, CastTextToIntResultCode::TooLargeOrMalformed | CastTextToIntResultCode::SpecialCase, ) => real_cast, + (CastTextToRealResultCode::NotValidButPrefix, _) => real_cast, (CastTextToRealResultCode::PureInt, CastTextToIntResultCode::Success) => int_cast, + (CastTextToRealResultCode::HasDecimal, _) => real_cast, _ => real_cast, } }