From 13639899a52a0d9de20343238462d645efa2df7a Mon Sep 17 00:00:00 2001
From: pedrocarlo <pedro.muniz.carlo@gmail.com>
Date: Thu, 20 Feb 2025 16:05:50 -0300
Subject: [PATCH] more adjustments to parser to handle edge cases

---
 core/util.rs     | 407 ++++++++++++++++++++++++++++++-----------------
 core/vdbe/mod.rs |   5 +-
 2 files changed, 264 insertions(+), 148 deletions(-)
diff --git a/core/util.rs b/core/util.rs
index 7a258c6e3..64c01e5e9 100644
--- a/core/util.rs
+++ b/core/util.rs
@@ -468,15 +468,16 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) {
     let mut has_decimal_digit = false;
     let mut excess_space = false;
 
-    let chars = text.chars();
+    let mut chars = text.chars();
 
-    for c in chars {
+    'outer: while let Some(c) = chars.next() {
         match c {
             '0'..='9' if !has_decimal_separator => {
                 has_digit = true;
                 accum.push(c);
             }
             '0'..='9' => {
+                // This pattern is used for both decimal and exponent digits
                 has_decimal_digit = true;
                 accum.push(c);
             }
@@ -484,17 +485,53 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) {
                 sign = true;
                 accum.push(c);
             }
-            '+' | '-' if has_exponent && !exp_sign => {
-                exp_sign = true;
-                accum.push(c);
-            }
             '.' if !has_decimal_separator => {
-                has_decimal_separator = true;
-                accum.push(c);
+                // Check if next char is a number
+                if let Some(ch) = chars.next() {
+                    match ch {
+                        '0'..='9' => {
+                            has_decimal_separator = true;
+                            accum.push(c);
+                            accum.push(ch);
+                        }
+                        _ => {
+                            excess_space = true;
+                            break;
+                        }
+                    }
+                } else {
+                    excess_space = true;
+                }
             }
-            'E' | 'e' if !has_decimal_separator || has_decimal_digit => {
-                has_exponent = true;
-                accum.push(c);
+            'E' | 'e' if !has_exponent && (!has_decimal_separator || has_decimal_digit) => {
+                // Lookahead if next char is a number or sign
+                let mut curr_sign = None;
+                loop {
+                    if let Some(ch) = chars.next() {
+                        match ch {
+                            '0'..='9' => {
+                                has_exponent = true;
+                                accum.push(c);
+                                if let Some(sign) = curr_sign {
+                                    exp_sign = true;
+                                    accum.push(sign);
+                                }
+                                accum.push(ch);
+                                break;
+                            }
+                            '+' | '-' => {
+                                curr_sign = Some(ch);
+                            }
+                            _ => {
+                                excess_space = true;
+                                break 'outer;
+                            }
+                        }
+                    } else {
+                        excess_space = true;
+                        break 'outer;
+                    }
+                }
             }
             _ => {
                 excess_space = true;
@@ -503,8 +540,18 @@ pub fn text_to_real(text: &str) -> (OwnedValue, CastTextToRealResultCode) {
         }
     }
 
+    dbg!(
+        &has_decimal_separator,
+        &sign,
+        &exp_sign,
+        &has_exponent,
+        &has_digit,
+        &has_decimal_digit,
+        &excess_space
+    );
+
     if let Ok(num) = accum.parse::<f64>() {
-        if !has_decimal_separator && !exp_sign && !has_exponent {
+        if !has_decimal_separator && !exp_sign && !has_exponent && !excess_space {
             return (OwnedValue::Float(num), CastTextToRealResultCode::PureInt);
         }
 
@@ -781,193 +828,261 @@ pub mod tests {
 
     #[test]
     fn test_text_to_integer() {
-        let pairs = vec![
+        assert_eq!(
+            text_to_integer("1"),
+            (OwnedValue::Integer(1), CastTextToIntResultCode::Success),
+        );
+        assert_eq!(
+            text_to_integer("-1"),
+            (OwnedValue::Integer(-1), CastTextToIntResultCode::Success),
+        );
+        assert_eq!(
+            text_to_integer("10000000"),
             (
-                text_to_integer("1"),
-                (OwnedValue::Integer(1), CastTextToIntResultCode::Success),
+                OwnedValue::Integer(10000000),
+                CastTextToIntResultCode::Success,
             ),
+        );
+        assert_eq!(
+            text_to_integer("-10000000"),
             (
-                text_to_integer("-1"),
-                (OwnedValue::Integer(-1), CastTextToIntResultCode::Success),
+                OwnedValue::Integer(-10000000),
+                CastTextToIntResultCode::Success,
             ),
+        );
+        assert_eq!(
+            text_to_integer("xxx"),
+            (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt),
+        );
+        assert_eq!(
+            text_to_integer("123xxx"),
             (
-                text_to_integer("10000000"),
-                (
-                    OwnedValue::Integer(10000000),
-                    CastTextToIntResultCode::Success,
-                ),
+                OwnedValue::Integer(123),
+                CastTextToIntResultCode::ExcessSpace,
             ),
+        );
+        assert_eq!(
+            text_to_integer("9223372036854775807"),
             (
-                text_to_integer("-10000000"),
-                (
-                    OwnedValue::Integer(-10000000),
-                    CastTextToIntResultCode::Success,
-                ),
+                OwnedValue::Integer(i64::MAX),
+                CastTextToIntResultCode::Success,
             ),
+        );
+        assert_eq!(
+            text_to_integer("9223372036854775808"),
             (
-                text_to_integer("xxx"),
-                (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt),
+                OwnedValue::Integer(0),
+                CastTextToIntResultCode::TooLargeOrMalformed,
             ),
+        );
+        assert_eq!(
+            text_to_integer("-9223372036854775808"),
             (
-                text_to_integer("123xxx"),
-                (
-                    OwnedValue::Integer(123),
-                    CastTextToIntResultCode::ExcessSpace,
-                ),
+                OwnedValue::Integer(i64::MIN),
+                CastTextToIntResultCode::Success,
             ),
+        );
+        assert_eq!(
+            text_to_integer("-9223372036854775809"),
             (
-                text_to_integer("9223372036854775807"),
-                (
-                    OwnedValue::Integer(i64::MAX),
-                    CastTextToIntResultCode::Success,
-                ),
+                OwnedValue::Integer(0),
+                CastTextToIntResultCode::TooLargeOrMalformed,
             ),
-            (
-                text_to_integer("9223372036854775808"),
-                (
-                    OwnedValue::Integer(0),
-                    CastTextToIntResultCode::TooLargeOrMalformed,
-                ),
-            ),
-            (
-                text_to_integer("-9223372036854775808"),
-                (
-                    OwnedValue::Integer(i64::MIN),
-                    CastTextToIntResultCode::Success,
-                ),
-            ),
-            (
-                text_to_integer("-9223372036854775809"),
-                (
-                    OwnedValue::Integer(0),
-                    CastTextToIntResultCode::TooLargeOrMalformed,
-                ),
-            ),
-        ];
-
-        for (left, right) in pairs {
-            assert_eq!(left, right);
-        }
+        );
+        assert_eq!(
+            text_to_integer("-"),
+            (OwnedValue::Integer(0), CastTextToIntResultCode::NotInt,),
+        );
     }
 
     #[test]
     fn test_text_to_real() {
-        let pairs = vec![
+        assert_eq!(
+            text_to_real("1"),
+            (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt),
+        );
+        assert_eq!(
+            text_to_real("-1"),
+            (OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt),
+        );
+        assert_eq!(
+            text_to_real("1.0"),
+            (OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal),
+        );
+        assert_eq!(
+            text_to_real("-1.0"),
             (
-                text_to_real("1"),
-                (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt),
+                OwnedValue::Float(-1.0),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1e10"),
             (
-                text_to_real("-1"),
-                (OwnedValue::Float(-1.0), CastTextToRealResultCode::PureInt),
+                OwnedValue::Float(1e10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1e10"),
             (
-                text_to_real("1.0"),
-                (OwnedValue::Float(1.0), CastTextToRealResultCode::HasDecimal),
+                OwnedValue::Float(-1e10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1e-10"),
             (
-                text_to_real("-1.0"),
-                (
-                    OwnedValue::Float(-1.0),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1e-10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1e-10"),
             (
-                text_to_real("1e10"),
-                (
-                    OwnedValue::Float(1e10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(-1e-10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1.123e10"),
             (
-                text_to_real("-1e10"),
-                (
-                    OwnedValue::Float(-1e10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.123e10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1.123e10"),
             (
-                text_to_real("1e-10"),
-                (
-                    OwnedValue::Float(1e-10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(-1.123e10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1.123e-10"),
             (
-                text_to_real("-1e-10"),
-                (
-                    OwnedValue::Float(-1e-10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.123e-10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1.123e-10"),
             (
-                text_to_real("1.123e10"),
-                (
-                    OwnedValue::Float(1.123e10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(-1.123e-10),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1-282584294928"),
             (
-                text_to_real("-1.123e10"),
-                (
-                    OwnedValue::Float(-1.123e10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.0),
+                CastTextToRealResultCode::NotValidButPrefix
             ),
+        );
+        assert_eq!(
+            text_to_real("xxx"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid),
+        );
+        assert_eq!(
+            text_to_real("1.7976931348623157e308"),
             (
-                text_to_real("1.123e-10"),
-                (
-                    OwnedValue::Float(1.123e-10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(f64::MAX),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1.7976931348623157e309"),
             (
-                text_to_real("-1.123e-10"),
-                (
-                    OwnedValue::Float(-1.123e-10),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(f64::INFINITY),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1.7976931348623157e308"),
             (
-                text_to_real("1-282584294928"),
-                (OwnedValue::Float(1.0), CastTextToRealResultCode::PureInt),
+                OwnedValue::Float(f64::MIN),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1.7976931348623157e309"),
             (
-                text_to_real("xxx"),
-                (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid),
+                OwnedValue::Float(f64::NEG_INFINITY),
+                CastTextToRealResultCode::HasDecimal,
             ),
+        );
+        assert_eq!(
+            text_to_real("1E"),
             (
-                text_to_real("1.7976931348623157e308"),
-                (
-                    OwnedValue::Float(f64::MAX),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.0),
+                CastTextToRealResultCode::NotValidButPrefix,
             ),
+        );
+        assert_eq!(
+            text_to_real("1EE"),
             (
-                text_to_real("1.7976931348623157e309"),
-                (
-                    OwnedValue::Float(f64::INFINITY),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.0),
+                CastTextToRealResultCode::NotValidButPrefix,
             ),
+        );
+        assert_eq!(
+            text_to_real("-1E"),
             (
-                text_to_real("-1.7976931348623157e308"),
-                (
-                    OwnedValue::Float(f64::MIN),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(-1.0),
+                CastTextToRealResultCode::NotValidButPrefix,
             ),
+        );
+        assert_eq!(
+            text_to_real("1."),
             (
-                text_to_real("-1.7976931348623157e309"),
-                (
-                    OwnedValue::Float(f64::NEG_INFINITY),
-                    CastTextToRealResultCode::HasDecimal,
-                ),
+                OwnedValue::Float(1.0),
+                CastTextToRealResultCode::NotValidButPrefix,
             ),
-        ];
-
-        for (left, right) in pairs {
-            assert_eq!(left, right);
-        }
+        );
+        assert_eq!(
+            text_to_real("-1."),
+            (
+                OwnedValue::Float(-1.0),
+                CastTextToRealResultCode::NotValidButPrefix,
+            ),
+        );
+        assert_eq!(
+            text_to_real("1.23E"),
+            (
+                OwnedValue::Float(1.23),
+                CastTextToRealResultCode::NotValidButPrefix,
+            ),
+        );
+        assert_eq!(
+            text_to_real("1.23E-"),
+            (
+                OwnedValue::Float(1.23),
+                CastTextToRealResultCode::NotValidButPrefix,
+            ),
+        );
+        assert_eq!(
+            text_to_real("0"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::PureInt,),
+        );
+        assert_eq!(
+            text_to_real("-0"),
+            (OwnedValue::Float(-0.0), CastTextToRealResultCode::PureInt,),
+        );
+        assert_eq!(
+            text_to_real("-0"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::PureInt,),
+        );
+        assert_eq!(
+            text_to_real("-0.0"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::HasDecimal,),
+        );
+        assert_eq!(
+            text_to_real("0.0"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::HasDecimal,),
+        );
+        assert_eq!(
+            text_to_real("-"),
+            (OwnedValue::Float(0.0), CastTextToRealResultCode::NotValid,),
+        );
     }
 }
diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs
index 25f7cb6ac..84a98ab83 100644
--- a/core/vdbe/mod.rs
+++ b/core/vdbe/mod.rs
@@ -3676,7 +3676,7 @@ fn checked_cast_text_to_numeric(text: &str) -> std::result::Result<OwnedValue, (
 fn cast_text_to_numeric(text: &str) -> OwnedValue {
     let (real_cast, rc_real) = cast_text_to_real(text);
     let (int_cast, rc_int) = cast_text_to_integer(text);
-
+    dbg!(&real_cast, &rc_real, &int_cast, &rc_int);
     match (rc_real, rc_int) {
         (
             CastTextToRealResultCode::NotValid,
@@ -3684,12 +3684,13 @@ fn cast_text_to_numeric(text: &str) -> OwnedValue {
             | CastTextToIntResultCode::Success
             | CastTextToIntResultCode::NotInt,
         ) => int_cast,
-        (CastTextToRealResultCode::NotValidButPrefix, _) => real_cast,
         (
             CastTextToRealResultCode::NotValid,
             CastTextToIntResultCode::TooLargeOrMalformed | CastTextToIntResultCode::SpecialCase,
         ) => real_cast,
+        (CastTextToRealResultCode::NotValidButPrefix, _) => real_cast,
         (CastTextToRealResultCode::PureInt, CastTextToIntResultCode::Success) => int_cast,
+        (CastTextToRealResultCode::HasDecimal, _) => real_cast,
         _ => real_cast,
     }
 }