From 73e901010c81dd331e91c9e9614ab2ea0461d17a Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 4 Sep 2025 20:11:07 -0300 Subject: [PATCH] fix: float formating and float comparison --- core/numeric/mod.rs | 163 +++++++++++++++++++++++++++++++++++++++++-- core/types.rs | 146 +++++++++----------------------------- core/vdbe/execute.rs | 48 +++++-------- parser/src/parser.rs | 1 + 4 files changed, 206 insertions(+), 152 deletions(-) diff --git a/core/numeric/mod.rs b/core/numeric/mod.rs index 7f7beac10..c08f8aabb 100644 --- a/core/numeric/mod.rs +++ b/core/numeric/mod.rs @@ -1,3 +1,5 @@ +use std::str; + use crate::Value; pub mod nonnan; @@ -352,6 +354,16 @@ const VERTICAL_TAB: char = '\u{b}'; #[derive(Debug, Clone, Copy)] struct DoubleDouble(f64, f64); +impl DoubleDouble { + pub const E100: Self = DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83); + pub const E10: Self = DoubleDouble(1.0e+10, 0.0); + pub const E1: Self = DoubleDouble(1.0e+01, 0.0); + + pub const NEG_E100: Self = DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117); + pub const NEG_E10: Self = DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27); + pub const NEG_E1: Self = DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18); +} + impl From for DoubleDouble { fn from(value: u64) -> Self { let r = value as f64; @@ -371,6 +383,16 @@ impl From for DoubleDouble { } } +impl From for u64 { + fn from(value: DoubleDouble) -> Self { + if value.1 < 0.0 { + value.0 as u64 - value.1.abs() as u64 + } else { + value.0 as u64 + value.1 as u64 + } + } +} + impl From for f64 { fn from(DoubleDouble(a, aa): DoubleDouble) -> Self { a + aa @@ -489,6 +511,10 @@ pub fn str_to_f64(input: impl AsRef) -> Option { } if input.next_if(|ch| matches!(ch, '.')).is_some() { + if matches!(input.peek(), Some('e' | 'E')) { + return None; + } + if had_digits || input.peek().is_some_and(char::is_ascii_digit) { is_fractional = true } @@ -539,28 +565,28 @@ pub fn str_to_f64(input: impl AsRef) -> Option { if exponent > 0 { while exponent >= 100 { exponent -= 100; - result *= DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83); + result *= DoubleDouble::E100; } while exponent >= 10 { exponent -= 10; - result *= DoubleDouble(1.0e+10, 0.0); + result *= DoubleDouble::E10; } while exponent >= 1 { exponent -= 1; - result *= DoubleDouble(1.0e+01, 0.0); + result *= DoubleDouble::E1; } } else { while exponent <= -100 { exponent += 100; - result *= DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117); + result *= DoubleDouble::NEG_E100; } while exponent <= -10 { exponent += 10; - result *= DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27); + result *= DoubleDouble::NEG_E10; } while exponent <= -1 { exponent += 1; - result *= DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18); + result *= DoubleDouble::NEG_E1; } } @@ -573,3 +599,128 @@ pub fn str_to_f64(input: impl AsRef) -> Option { StrToF64::Decimal(result) }) } + +pub fn format_float(v: f64) -> String { + if v.is_nan() { + return "NaN".to_string(); + } + + if v.is_infinite() { + return if v.is_sign_negative() { "-Inf" } else { "Inf" }.to_string(); + } + + if v == 0.0 { + return "0.0".to_string(); + } + + let negative = v < 0.0; + let mut d = DoubleDouble(v.abs(), 0.0); + let mut exp = 0; + + if d.0 > 9.223372036854774784e+18 { + while d.0 > 9.223372036854774784e+118 { + exp += 100; + d *= DoubleDouble::NEG_E100; + } + while d.0 > 9.223372036854774784e+28 { + exp += 10; + d *= DoubleDouble::NEG_E10; + } + while d.0 > 9.223372036854774784e+18 { + exp += 1; + d *= DoubleDouble::NEG_E1; + } + } else { + while d.0 < 9.223372036854774784e-83 { + exp -= 100; + d *= DoubleDouble::E100; + } + while d.0 < 9.223372036854774784e+07 { + exp -= 10; + d *= DoubleDouble::E10; + } + while d.0 < 9.22337203685477478e+17 { + exp -= 1; + d *= DoubleDouble::E1; + } + } + + let v = u64::from(d); + + let mut digits = v.to_string().into_bytes(); + + let precision = 15; + + let mut decimal_pos = digits.len() as i32 + exp; + + 'out: { + if digits.len() > precision { + let round_up = digits[precision] >= b'5'; + digits.truncate(precision); + + if round_up { + for i in (0..precision).rev() { + if digits[i] < b'9' { + digits[i] += 1; + break 'out; + } + digits[i] = b'0'; + } + + digits.insert(0, b'1'); + decimal_pos += 1; + } + } + } + + while digits.len() > 1 && digits[digits.len() - 1] == b'0' { + digits.pop(); + } + + let exp = decimal_pos - 1; + + if (-4..=14).contains(&exp) { + format!( + "{}{}.{}{}", + negative.then_some("-").unwrap_or_default(), + if decimal_pos > 0 { + let zeroes = (decimal_pos - digits.len() as i32).max(0) as usize; + let digits = digits.get(0..(decimal_pos.min(digits.len() as i32) as usize)).unwrap(); + (unsafe { str::from_utf8_unchecked(digits) }).to_owned() + &"0".repeat(zeroes) + } else { + "0".to_string() + }, + "0".repeat(decimal_pos.min(0).abs() as usize), + digits + .get((decimal_pos.max(0) as usize)..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0") + ) + } else { + format!( + "{}{}.{}e{}{:0width$}", + negative.then_some("-").unwrap_or_default(), + digits.get(0).cloned().unwrap_or(b'0') as char, + digits + .get(1..) + .filter(|v| !v.is_empty()) + .map(|v| unsafe { str::from_utf8_unchecked(v) }) + .unwrap_or("0"), + if exp.is_positive() { "+" } else { "-" }, + exp.abs(), + width = if exp > 100 { 3 } else { 2 } + ) + } +} + +#[test] +fn test_decode_float() { + assert_eq!(format_float(9.93e-322), "9.93071948140905e-322"); + assert_eq!(format_float(9.93), "9.93"); + assert_eq!(format_float(0.093), "0.093"); + assert_eq!(format_float(-0.093), "-0.093"); + assert_eq!(format_float(0.0), "0.0"); + assert_eq!(format_float(4.94e-322), "4.94065645841247e-322"); + assert_eq!(format_float(-20228007.0), "-20228007.0"); +} diff --git a/core/types.rs b/core/types.rs index 25ad4655e..55979748f 100644 --- a/core/types.rs +++ b/core/types.rs @@ -5,6 +5,7 @@ use turso_parser::ast::SortOrder; use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; +use crate::numeric::format_float; use crate::pseudo::PseudoCursor; use crate::schema::Index; use crate::storage::btree::BTreeCursor; @@ -17,7 +18,6 @@ use crate::vtab::VirtualTableCursor; use crate::{turso_assert, Completion, CompletionError, Result, IO}; use std::fmt::{Debug, Display}; -const MAX_REAL_SIZE: u8 = 15; /// SQLite by default uses 2000 as maximum numbers in a row. /// It controlld by the constant called SQLITE_MAX_COLUMN @@ -390,6 +390,13 @@ impl Value { Value::Blob(b) => out.extend_from_slice(b), }; } + + pub fn cast_text(&self) -> Option { + Some(match self { + Value::Null => return None, + v => v.to_string(), + }) + } } #[derive(Debug, Clone, PartialEq)] @@ -425,108 +432,7 @@ impl Display for Value { Self::Integer(i) => { write!(f, "{i}") } - Self::Float(fl) => { - let fl = *fl; - if fl == f64::INFINITY { - return write!(f, "Inf"); - } - if fl == f64::NEG_INFINITY { - return write!(f, "-Inf"); - } - if fl.is_nan() { - return write!(f, ""); - } - // handle negative 0 - if fl == -0.0 { - return write!(f, "{:.1}", fl.abs()); - } - - // handle scientific notation without trailing zeros - if (fl.abs() < 1e-4 || fl.abs() >= 1e15) && fl != 0.0 { - let sci_notation = format!("{fl:.14e}"); - let parts: Vec<&str> = sci_notation.split('e').collect(); - - if parts.len() == 2 { - let mantissa = parts[0]; - let exponent = parts[1]; - - let decimal_parts: Vec<&str> = mantissa.split('.').collect(); - if decimal_parts.len() == 2 { - let whole = decimal_parts[0]; - // 1.{this part} - let mut fraction = String::from(decimal_parts[1]); - - //removing trailing 0 from fraction - while fraction.ends_with('0') { - fraction.pop(); - } - - let trimmed_mantissa = if fraction.is_empty() { - whole.to_string() - } else { - format!("{whole}.{fraction}") - }; - let (prefix, exponent) = - if let Some(stripped_exponent) = exponent.strip_prefix('-') { - ("-0", &stripped_exponent[1..]) - } else { - ("+", exponent) - }; - return write!(f, "{trimmed_mantissa}e{prefix}{exponent}"); - } - } - - // fallback - return write!(f, "{sci_notation}"); - } - - // handle floating point max size is 15. - // If left > right && right + left > 15 go to sci notation - // If right > left && right + left > 15 truncate left so right + left == 15 - let rounded = fl.round(); - if (fl - rounded).abs() < 1e-14 { - // if we very close to integer trim decimal part to 1 digit - if rounded == rounded as i64 as f64 { - return write!(f, "{fl:.1}"); - } - } - - let fl_str = format!("{fl}"); - let splitted = fl_str.split('.').collect::>(); - // fallback - if splitted.len() != 2 { - return write!(f, "{fl:.14e}"); - } - - let first_part = if fl < 0.0 { - // remove - - &splitted[0][1..] - } else { - splitted[0] - }; - - let second = splitted[1]; - - // We want more precision for smaller numbers. in SQLite case we want 15 non zero digits in 0 < number < 1 - // leading zeroes added to max real size. But if float < 1e-4 we go to scientific notation - let leading_zeros = second.chars().take_while(|c| c == &'0').count(); - let reminder = if first_part != "0" { - MAX_REAL_SIZE as isize - first_part.len() as isize - } else { - MAX_REAL_SIZE as isize + leading_zeros as isize - }; - // float that have integer part > 15 converted to sci notation - if reminder < 0 { - return write!(f, "{fl:.14e}"); - } - // trim decimal part to reminder or self len so total digits is 15; - let mut fl = format!("{:.*}", second.len().min(reminder as usize), fl); - // if decimal part ends with 0 we trim it - while fl.ends_with('0') { - fl.pop(); - } - write!(f, "{fl}") - } + Self::Float(fl) => f.write_str(&format_float(*fl)), Self::Text(s) => { write!(f, "{}", s.as_str()) } @@ -761,11 +667,8 @@ impl PartialEq for Value { fn eq(&self, other: &Value) -> bool { match (self, other) { (Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right, - (Self::Integer(int_left), Self::Float(float_right)) => { - (*int_left as f64) == (*float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - float_left == (&(*int_right as f64)) + (Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int))=> { + int_float_cmp(*int, *float).is_eq() } (Self::Float(float_left), Self::Float(float_right)) => float_left == float_right, (Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false, @@ -780,17 +683,32 @@ impl PartialEq for Value { } } +fn int_float_cmp(int: i64, float: f64) -> std::cmp::Ordering { + if float.is_nan() { + return std::cmp::Ordering::Greater; + } + + if float < -9223372036854775808.0 { + return std::cmp::Ordering::Greater; + } + + if float >= 9223372036854775808.0 { + return std::cmp::Ordering::Less; + } + + match int.cmp(&(float as i64)) { + std::cmp::Ordering::Equal => (int as f64).total_cmp(&float), + cmp => cmp, + } +} + #[allow(clippy::non_canonical_partial_ord_impl)] impl PartialOrd for Value { fn partial_cmp(&self, other: &Self) -> Option { match (self, other) { (Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right), - (Self::Integer(int_left), Self::Float(float_right)) => { - (*int_left as f64).partial_cmp(float_right) - } - (Self::Float(float_left), Self::Integer(int_right)) => { - float_left.partial_cmp(&(*int_right as f64)) - } + (Self::Float(float), Self::Integer(int)) => Some(int_float_cmp(*int, *float).reverse()), + (Self::Integer(int), Self::Float(float)) => Some(int_float_cmp(*int, *float)), (Self::Float(float_left), Self::Float(float_right)) => { float_left.partial_cmp(float_right) } diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7a989d0c5..60dd736fc 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -8146,39 +8146,23 @@ impl Value { } pub fn exec_concat(&self, rhs: &Value) -> Value { - match (self, rhs) { - (Value::Text(lhs_text), Value::Text(rhs_text)) => { - Value::build_text(lhs_text.as_str().to_string() + rhs_text.as_str()) - } - (Value::Text(lhs_text), Value::Integer(rhs_int)) => { - Value::build_text(lhs_text.as_str().to_string() + &rhs_int.to_string()) - } - (Value::Text(lhs_text), Value::Float(rhs_float)) => { - Value::build_text(lhs_text.as_str().to_string() + &rhs_float.to_string()) - } - (Value::Integer(lhs_int), Value::Text(rhs_text)) => { - Value::build_text(lhs_int.to_string() + rhs_text.as_str()) - } - (Value::Integer(lhs_int), Value::Integer(rhs_int)) => { - Value::build_text(lhs_int.to_string() + &rhs_int.to_string()) - } - (Value::Integer(lhs_int), Value::Float(rhs_float)) => { - Value::build_text(lhs_int.to_string() + &rhs_float.to_string()) - } - (Value::Float(lhs_float), Value::Text(rhs_text)) => { - Value::build_text(lhs_float.to_string() + rhs_text.as_str()) - } - (Value::Float(lhs_float), Value::Integer(rhs_int)) => { - Value::build_text(lhs_float.to_string() + &rhs_int.to_string()) - } - (Value::Float(lhs_float), Value::Float(rhs_float)) => { - Value::build_text(lhs_float.to_string() + &rhs_float.to_string()) - } - (Value::Null, _) | (_, Value::Null) => Value::Null, - (Value::Blob(_), _) | (_, Value::Blob(_)) => { - todo!("TODO: Handle Blob conversion to String") - } + if let (Value::Blob(lhs), Value::Blob(rhs)) = (self, rhs) { + return Value::build_text(String::from_utf8_lossy(dbg!(&[ + lhs.as_slice(), + rhs.as_slice() + ] + .concat()))); } + + let Some(lhs) = self.cast_text() else { + return Value::Null; + }; + + let Some(rhs) = rhs.cast_text() else { + return Value::Null; + }; + + Value::build_text(lhs + &rhs) } pub fn exec_and(&self, rhs: &Value) -> Value { diff --git a/parser/src/parser.rs b/parser/src/parser.rs index cfb10f28e..157d388f3 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -15,6 +15,7 @@ use crate::lexer::{Lexer, Token}; use crate::token::TokenType::{self, *}; use crate::Result; use turso_macros::match_ignore_ascii_case; +use std::str; macro_rules! peek_expect { ( $parser:expr, $( $x:ident ),* $(,)?) => {