fix: float formating and float comparison

2026-01-07 10:14:21 +01:00 · 2025-09-04 20:11:07 -03:00
parent 2ea2be6f85
commit 73e901010c
4 changed files with 206 additions and 152 deletions
--- a/core/numeric/mod.rs
+++ b/core/numeric/mod.rs
@@ -1,3 +1,5 @@
+use std::str;
+
 use crate::Value;

 pub mod nonnan;
@@ -352,6 +354,16 @@ const VERTICAL_TAB: char = '\u{b}';
 #[derive(Debug, Clone, Copy)]
 struct DoubleDouble(f64, f64);

+impl DoubleDouble {
+    pub const E100: Self = DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83);
+    pub const E10: Self = DoubleDouble(1.0e+10, 0.0);
+    pub const E1: Self = DoubleDouble(1.0e+01, 0.0);
+
+    pub const NEG_E100: Self = DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117);
+    pub const NEG_E10: Self = DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27);
+    pub const NEG_E1: Self = DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18);
+}
+
 impl From<u64> for DoubleDouble {
    fn from(value: u64) -> Self {
        let r = value as f64;
@@ -371,6 +383,16 @@ impl From<u64> for DoubleDouble {
    }
 }

+impl From<DoubleDouble> for u64 {
+    fn from(value: DoubleDouble) -> Self {
+        if value.1 < 0.0 {
+            value.0 as u64 - value.1.abs() as u64
+        } else {
+            value.0 as u64 + value.1 as u64
+        }
+    }
+}
+
 impl From<DoubleDouble> for f64 {
    fn from(DoubleDouble(a, aa): DoubleDouble) -> Self {
        a + aa
@@ -489,6 +511,10 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
    }

    if input.next_if(|ch| matches!(ch, '.')).is_some() {
+        if matches!(input.peek(), Some('e' | 'E')) {
+            return None;
+        }
+
        if had_digits || input.peek().is_some_and(char::is_ascii_digit) {
            is_fractional = true
        }
@@ -539,28 +565,28 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
    if exponent > 0 {
        while exponent >= 100 {
            exponent -= 100;
-            result *= DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83);
+            result *= DoubleDouble::E100;
        }
        while exponent >= 10 {
            exponent -= 10;
-            result *= DoubleDouble(1.0e+10, 0.0);
+            result *= DoubleDouble::E10;
        }
        while exponent >= 1 {
            exponent -= 1;
-            result *= DoubleDouble(1.0e+01, 0.0);
+            result *= DoubleDouble::E1;
        }
    } else {
        while exponent <= -100 {
            exponent += 100;
-            result *= DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117);
+            result *= DoubleDouble::NEG_E100;
        }
        while exponent <= -10 {
            exponent += 10;
-            result *= DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27);
+            result *= DoubleDouble::NEG_E10;
        }
        while exponent <= -1 {
            exponent += 1;
-            result *= DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18);
+            result *= DoubleDouble::NEG_E1;
        }
    }

@@ -573,3 +599,128 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
        StrToF64::Decimal(result)
    })
 }
+
+pub fn format_float(v: f64) -> String {
+    if v.is_nan() {
+        return "NaN".to_string();
+    }
+
+    if v.is_infinite() {
+        return if v.is_sign_negative() { "-Inf" } else { "Inf" }.to_string();
+    }
+
+    if v == 0.0 {
+        return "0.0".to_string();
+    }
+
+    let negative = v < 0.0;
+    let mut d = DoubleDouble(v.abs(), 0.0);
+    let mut exp = 0;
+
+    if d.0 > 9.223372036854774784e+18 {
+        while d.0 > 9.223372036854774784e+118 {
+            exp += 100;
+            d *= DoubleDouble::NEG_E100;
+        }
+        while d.0 > 9.223372036854774784e+28 {
+            exp += 10;
+            d *= DoubleDouble::NEG_E10;
+        }
+        while d.0 > 9.223372036854774784e+18 {
+            exp += 1;
+            d *= DoubleDouble::NEG_E1;
+        }
+    } else {
+        while d.0 < 9.223372036854774784e-83 {
+            exp -= 100;
+            d *= DoubleDouble::E100;
+        }
+        while d.0 < 9.223372036854774784e+07 {
+            exp -= 10;
+            d *= DoubleDouble::E10;
+        }
+        while d.0 < 9.22337203685477478e+17 {
+            exp -= 1;
+            d *= DoubleDouble::E1;
+        }
+    }
+
+    let v = u64::from(d);
+
+    let mut digits = v.to_string().into_bytes();
+
+    let precision = 15;
+
+    let mut decimal_pos = digits.len() as i32 + exp;
+
+    'out: {
+        if digits.len() > precision {
+            let round_up = digits[precision] >= b'5';
+            digits.truncate(precision);
+
+            if round_up {
+                for i in (0..precision).rev() {
+                    if digits[i] < b'9' {
+                        digits[i] += 1;
+                        break 'out;
+                    }
+                    digits[i] = b'0';
+                }
+
+                digits.insert(0, b'1');
+                decimal_pos += 1;
+            }
+        }
+    }
+
+    while digits.len() > 1 && digits[digits.len() - 1] == b'0' {
+        digits.pop();
+    }
+
+    let exp = decimal_pos - 1;
+
+    if (-4..=14).contains(&exp) {
+        format!(
+            "{}{}.{}{}",
+            negative.then_some("-").unwrap_or_default(),
+            if decimal_pos > 0 {
+                let zeroes = (decimal_pos - digits.len() as i32).max(0) as usize;
+                let digits = digits.get(0..(decimal_pos.min(digits.len() as i32) as usize)).unwrap();
+                (unsafe { str::from_utf8_unchecked(digits) }).to_owned() + &"0".repeat(zeroes)
+            } else {
+                "0".to_string()
+            },
+            "0".repeat(decimal_pos.min(0).abs() as usize),
+            digits
+                .get((decimal_pos.max(0) as usize)..)
+                .filter(|v| !v.is_empty())
+                .map(|v| unsafe { str::from_utf8_unchecked(v) })
+                .unwrap_or("0")
+        )
+    } else {
+        format!(
+            "{}{}.{}e{}{:0width$}",
+            negative.then_some("-").unwrap_or_default(),
+            digits.get(0).cloned().unwrap_or(b'0') as char,
+            digits
+                .get(1..)
+                .filter(|v| !v.is_empty())
+                .map(|v| unsafe { str::from_utf8_unchecked(v) })
+                .unwrap_or("0"),
+            if exp.is_positive() { "+" } else { "-" },
+            exp.abs(),
+            width = if exp > 100 { 3 } else { 2 }
+        )
+    }
+}
+
+#[test]
+fn test_decode_float() {
+    assert_eq!(format_float(9.93e-322), "9.93071948140905e-322");
+    assert_eq!(format_float(9.93), "9.93");
+    assert_eq!(format_float(0.093), "0.093");
+    assert_eq!(format_float(-0.093), "-0.093");
+    assert_eq!(format_float(0.0), "0.0");
+    assert_eq!(format_float(4.94e-322), "4.94065645841247e-322");
+    assert_eq!(format_float(-20228007.0), "-20228007.0");
+}
--- a/core/types.rs
+++ b/core/types.rs
@@ -5,6 +5,7 @@ use turso_parser::ast::SortOrder;

 use crate::error::LimboError;
 use crate::ext::{ExtValue, ExtValueType};
+use crate::numeric::format_float;
 use crate::pseudo::PseudoCursor;
 use crate::schema::Index;
 use crate::storage::btree::BTreeCursor;
@@ -17,7 +18,6 @@ use crate::vtab::VirtualTableCursor;
 use crate::{turso_assert, Completion, CompletionError, Result, IO};
 use std::fmt::{Debug, Display};

-const MAX_REAL_SIZE: u8 = 15;

 /// SQLite by default uses 2000 as maximum numbers in a row.
 /// It controlld by the constant called SQLITE_MAX_COLUMN
@@ -390,6 +390,13 @@ impl Value {
            Value::Blob(b) => out.extend_from_slice(b),
        };
    }
+
+    pub fn cast_text(&self) -> Option<String> {
+        Some(match self {
+            Value::Null => return None,
+            v => v.to_string(),
+        })
+    }
 }

 #[derive(Debug, Clone, PartialEq)]
@@ -425,108 +432,7 @@ impl Display for Value {
            Self::Integer(i) => {
                write!(f, "{i}")
            }
-            Self::Float(fl) => {
-                let fl = *fl;
-                if fl == f64::INFINITY {
-                    return write!(f, "Inf");
-                }
-                if fl == f64::NEG_INFINITY {
-                    return write!(f, "-Inf");
-                }
-                if fl.is_nan() {
-                    return write!(f, "");
-                }
-                // handle negative 0
-                if fl == -0.0 {
-                    return write!(f, "{:.1}", fl.abs());
-                }
-
-                // handle scientific notation without trailing zeros
-                if (fl.abs() < 1e-4 || fl.abs() >= 1e15) && fl != 0.0 {
-                    let sci_notation = format!("{fl:.14e}");
-                    let parts: Vec<&str> = sci_notation.split('e').collect();
-
-                    if parts.len() == 2 {
-                        let mantissa = parts[0];
-                        let exponent = parts[1];
-
-                        let decimal_parts: Vec<&str> = mantissa.split('.').collect();
-                        if decimal_parts.len() == 2 {
-                            let whole = decimal_parts[0];
-                            // 1.{this part}
-                            let mut fraction = String::from(decimal_parts[1]);
-
-                            //removing trailing 0 from fraction
-                            while fraction.ends_with('0') {
-                                fraction.pop();
-                            }
-
-                            let trimmed_mantissa = if fraction.is_empty() {
-                                whole.to_string()
-                            } else {
-                                format!("{whole}.{fraction}")
-                            };
-                            let (prefix, exponent) =
-                                if let Some(stripped_exponent) = exponent.strip_prefix('-') {
-                                    ("-0", &stripped_exponent[1..])
-                                } else {
-                                    ("+", exponent)
-                                };
-                            return write!(f, "{trimmed_mantissa}e{prefix}{exponent}");
-                        }
-                    }
-
-                    // fallback
-                    return write!(f, "{sci_notation}");
-                }
-
-                // handle floating point max size is 15.
-                // If left > right && right + left > 15 go to sci notation
-                // If right > left && right + left > 15 truncate left so right + left == 15
-                let rounded = fl.round();
-                if (fl - rounded).abs() < 1e-14 {
-                    // if we very close to integer trim decimal part to 1 digit
-                    if rounded == rounded as i64 as f64 {
-                        return write!(f, "{fl:.1}");
-                    }
-                }
-
-                let fl_str = format!("{fl}");
-                let splitted = fl_str.split('.').collect::<Vec<&str>>();
-                // fallback
-                if splitted.len() != 2 {
-                    return write!(f, "{fl:.14e}");
-                }
-
-                let first_part = if fl < 0.0 {
-                    // remove -
-                    &splitted[0][1..]
-                } else {
-                    splitted[0]
-                };
-
-                let second = splitted[1];
-
-                // We want more precision for smaller numbers. in SQLite case we want 15 non zero digits in 0 < number < 1
-                // leading zeroes added to max real size. But if float < 1e-4 we go to scientific notation
-                let leading_zeros = second.chars().take_while(|c| c == &'0').count();
-                let reminder = if first_part != "0" {
-                    MAX_REAL_SIZE as isize - first_part.len() as isize
-                } else {
-                    MAX_REAL_SIZE as isize + leading_zeros as isize
-                };
-                // float that have integer part > 15 converted to sci notation
-                if reminder < 0 {
-                    return write!(f, "{fl:.14e}");
-                }
-                // trim decimal part to reminder or self len so total digits is 15;
-                let mut fl = format!("{:.*}", second.len().min(reminder as usize), fl);
-                // if decimal part ends with 0 we trim it
-                while fl.ends_with('0') {
-                    fl.pop();
-                }
-                write!(f, "{fl}")
-            }
+            Self::Float(fl) => f.write_str(&format_float(*fl)),
            Self::Text(s) => {
                write!(f, "{}", s.as_str())
            }
@@ -761,11 +667,8 @@ impl PartialEq<Value> for Value {
    fn eq(&self, other: &Value) -> bool {
        match (self, other) {
            (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right,
-            (Self::Integer(int_left), Self::Float(float_right)) => {
-                (*int_left as f64) == (*float_right)
-            }
-            (Self::Float(float_left), Self::Integer(int_right)) => {
-                float_left == (&(*int_right as f64))
+            (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int))=> {
+                int_float_cmp(*int, *float).is_eq()
            }
            (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right,
            (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false,
@@ -780,17 +683,32 @@ impl PartialEq<Value> for Value {
    }
 }

+fn int_float_cmp(int: i64, float: f64) -> std::cmp::Ordering {
+    if float.is_nan() {
+        return std::cmp::Ordering::Greater;
+    }
+
+    if float < -9223372036854775808.0 {
+        return std::cmp::Ordering::Greater;
+    }
+
+    if float >= 9223372036854775808.0 {
+        return std::cmp::Ordering::Less;
+    }
+
+    match int.cmp(&(float as i64)) {
+        std::cmp::Ordering::Equal => (int as f64).total_cmp(&float),
+        cmp => cmp,
+    }
+}
+
 #[allow(clippy::non_canonical_partial_ord_impl)]
 impl PartialOrd<Value> for Value {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        match (self, other) {
            (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right),
-            (Self::Integer(int_left), Self::Float(float_right)) => {
-                (*int_left as f64).partial_cmp(float_right)
-            }
-            (Self::Float(float_left), Self::Integer(int_right)) => {
-                float_left.partial_cmp(&(*int_right as f64))
-            }
+            (Self::Float(float), Self::Integer(int)) => Some(int_float_cmp(*int, *float).reverse()),
+            (Self::Integer(int), Self::Float(float)) => Some(int_float_cmp(*int, *float)),
            (Self::Float(float_left), Self::Float(float_right)) => {
                float_left.partial_cmp(float_right)
            }
--- a/core/vdbe/execute.rs
+++ b/core/vdbe/execute.rs
@@ -8146,39 +8146,23 @@ impl Value {
    }

    pub fn exec_concat(&self, rhs: &Value) -> Value {
-        match (self, rhs) {
-            (Value::Text(lhs_text), Value::Text(rhs_text)) => {
-                Value::build_text(lhs_text.as_str().to_string() + rhs_text.as_str())
-            }
-            (Value::Text(lhs_text), Value::Integer(rhs_int)) => {
-                Value::build_text(lhs_text.as_str().to_string() + &rhs_int.to_string())
-            }
-            (Value::Text(lhs_text), Value::Float(rhs_float)) => {
-                Value::build_text(lhs_text.as_str().to_string() + &rhs_float.to_string())
-            }
-            (Value::Integer(lhs_int), Value::Text(rhs_text)) => {
-                Value::build_text(lhs_int.to_string() + rhs_text.as_str())
-            }
-            (Value::Integer(lhs_int), Value::Integer(rhs_int)) => {
-                Value::build_text(lhs_int.to_string() + &rhs_int.to_string())
-            }
-            (Value::Integer(lhs_int), Value::Float(rhs_float)) => {
-                Value::build_text(lhs_int.to_string() + &rhs_float.to_string())
-            }
-            (Value::Float(lhs_float), Value::Text(rhs_text)) => {
-                Value::build_text(lhs_float.to_string() + rhs_text.as_str())
-            }
-            (Value::Float(lhs_float), Value::Integer(rhs_int)) => {
-                Value::build_text(lhs_float.to_string() + &rhs_int.to_string())
-            }
-            (Value::Float(lhs_float), Value::Float(rhs_float)) => {
-                Value::build_text(lhs_float.to_string() + &rhs_float.to_string())
-            }
-            (Value::Null, _) | (_, Value::Null) => Value::Null,
-            (Value::Blob(_), _) | (_, Value::Blob(_)) => {
-                todo!("TODO: Handle Blob conversion to String")
-            }
+        if let (Value::Blob(lhs), Value::Blob(rhs)) = (self, rhs) {
+            return Value::build_text(String::from_utf8_lossy(dbg!(&[
+                lhs.as_slice(),
+                rhs.as_slice()
+            ]
+            .concat())));
        }
+
+        let Some(lhs) = self.cast_text() else {
+            return Value::Null;
+        };
+
+        let Some(rhs) = rhs.cast_text() else {
+            return Value::Null;
+        };
+
+        Value::build_text(lhs + &rhs)
    }

    pub fn exec_and(&self, rhs: &Value) -> Value {