fix: float formating and float comparison

This commit is contained in:
Levy A.
2025-09-04 20:11:07 -03:00
parent 2ea2be6f85
commit 73e901010c
4 changed files with 206 additions and 152 deletions

View File

@@ -1,3 +1,5 @@
use std::str;
use crate::Value;
pub mod nonnan;
@@ -352,6 +354,16 @@ const VERTICAL_TAB: char = '\u{b}';
#[derive(Debug, Clone, Copy)]
struct DoubleDouble(f64, f64);
impl DoubleDouble {
pub const E100: Self = DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83);
pub const E10: Self = DoubleDouble(1.0e+10, 0.0);
pub const E1: Self = DoubleDouble(1.0e+01, 0.0);
pub const NEG_E100: Self = DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117);
pub const NEG_E10: Self = DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27);
pub const NEG_E1: Self = DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18);
}
impl From<u64> for DoubleDouble {
fn from(value: u64) -> Self {
let r = value as f64;
@@ -371,6 +383,16 @@ impl From<u64> for DoubleDouble {
}
}
impl From<DoubleDouble> for u64 {
fn from(value: DoubleDouble) -> Self {
if value.1 < 0.0 {
value.0 as u64 - value.1.abs() as u64
} else {
value.0 as u64 + value.1 as u64
}
}
}
impl From<DoubleDouble> for f64 {
fn from(DoubleDouble(a, aa): DoubleDouble) -> Self {
a + aa
@@ -489,6 +511,10 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
}
if input.next_if(|ch| matches!(ch, '.')).is_some() {
if matches!(input.peek(), Some('e' | 'E')) {
return None;
}
if had_digits || input.peek().is_some_and(char::is_ascii_digit) {
is_fractional = true
}
@@ -539,28 +565,28 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
if exponent > 0 {
while exponent >= 100 {
exponent -= 100;
result *= DoubleDouble(1.0e+100, -1.590_289_110_975_991_8e83);
result *= DoubleDouble::E100;
}
while exponent >= 10 {
exponent -= 10;
result *= DoubleDouble(1.0e+10, 0.0);
result *= DoubleDouble::E10;
}
while exponent >= 1 {
exponent -= 1;
result *= DoubleDouble(1.0e+01, 0.0);
result *= DoubleDouble::E1;
}
} else {
while exponent <= -100 {
exponent += 100;
result *= DoubleDouble(1.0e-100, -1.999_189_980_260_288_3e-117);
result *= DoubleDouble::NEG_E100;
}
while exponent <= -10 {
exponent += 10;
result *= DoubleDouble(1.0e-10, -3.643_219_731_549_774e-27);
result *= DoubleDouble::NEG_E10;
}
while exponent <= -1 {
exponent += 1;
result *= DoubleDouble(1.0e-01, -5.551_115_123_125_783e-18);
result *= DoubleDouble::NEG_E1;
}
}
@@ -573,3 +599,128 @@ pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
StrToF64::Decimal(result)
})
}
pub fn format_float(v: f64) -> String {
if v.is_nan() {
return "NaN".to_string();
}
if v.is_infinite() {
return if v.is_sign_negative() { "-Inf" } else { "Inf" }.to_string();
}
if v == 0.0 {
return "0.0".to_string();
}
let negative = v < 0.0;
let mut d = DoubleDouble(v.abs(), 0.0);
let mut exp = 0;
if d.0 > 9.223372036854774784e+18 {
while d.0 > 9.223372036854774784e+118 {
exp += 100;
d *= DoubleDouble::NEG_E100;
}
while d.0 > 9.223372036854774784e+28 {
exp += 10;
d *= DoubleDouble::NEG_E10;
}
while d.0 > 9.223372036854774784e+18 {
exp += 1;
d *= DoubleDouble::NEG_E1;
}
} else {
while d.0 < 9.223372036854774784e-83 {
exp -= 100;
d *= DoubleDouble::E100;
}
while d.0 < 9.223372036854774784e+07 {
exp -= 10;
d *= DoubleDouble::E10;
}
while d.0 < 9.22337203685477478e+17 {
exp -= 1;
d *= DoubleDouble::E1;
}
}
let v = u64::from(d);
let mut digits = v.to_string().into_bytes();
let precision = 15;
let mut decimal_pos = digits.len() as i32 + exp;
'out: {
if digits.len() > precision {
let round_up = digits[precision] >= b'5';
digits.truncate(precision);
if round_up {
for i in (0..precision).rev() {
if digits[i] < b'9' {
digits[i] += 1;
break 'out;
}
digits[i] = b'0';
}
digits.insert(0, b'1');
decimal_pos += 1;
}
}
}
while digits.len() > 1 && digits[digits.len() - 1] == b'0' {
digits.pop();
}
let exp = decimal_pos - 1;
if (-4..=14).contains(&exp) {
format!(
"{}{}.{}{}",
negative.then_some("-").unwrap_or_default(),
if decimal_pos > 0 {
let zeroes = (decimal_pos - digits.len() as i32).max(0) as usize;
let digits = digits.get(0..(decimal_pos.min(digits.len() as i32) as usize)).unwrap();
(unsafe { str::from_utf8_unchecked(digits) }).to_owned() + &"0".repeat(zeroes)
} else {
"0".to_string()
},
"0".repeat(decimal_pos.min(0).abs() as usize),
digits
.get((decimal_pos.max(0) as usize)..)
.filter(|v| !v.is_empty())
.map(|v| unsafe { str::from_utf8_unchecked(v) })
.unwrap_or("0")
)
} else {
format!(
"{}{}.{}e{}{:0width$}",
negative.then_some("-").unwrap_or_default(),
digits.get(0).cloned().unwrap_or(b'0') as char,
digits
.get(1..)
.filter(|v| !v.is_empty())
.map(|v| unsafe { str::from_utf8_unchecked(v) })
.unwrap_or("0"),
if exp.is_positive() { "+" } else { "-" },
exp.abs(),
width = if exp > 100 { 3 } else { 2 }
)
}
}
#[test]
fn test_decode_float() {
assert_eq!(format_float(9.93e-322), "9.93071948140905e-322");
assert_eq!(format_float(9.93), "9.93");
assert_eq!(format_float(0.093), "0.093");
assert_eq!(format_float(-0.093), "-0.093");
assert_eq!(format_float(0.0), "0.0");
assert_eq!(format_float(4.94e-322), "4.94065645841247e-322");
assert_eq!(format_float(-20228007.0), "-20228007.0");
}

View File

@@ -5,6 +5,7 @@ use turso_parser::ast::SortOrder;
use crate::error::LimboError;
use crate::ext::{ExtValue, ExtValueType};
use crate::numeric::format_float;
use crate::pseudo::PseudoCursor;
use crate::schema::Index;
use crate::storage::btree::BTreeCursor;
@@ -17,7 +18,6 @@ use crate::vtab::VirtualTableCursor;
use crate::{turso_assert, Completion, CompletionError, Result, IO};
use std::fmt::{Debug, Display};
const MAX_REAL_SIZE: u8 = 15;
/// SQLite by default uses 2000 as maximum numbers in a row.
/// It controlld by the constant called SQLITE_MAX_COLUMN
@@ -390,6 +390,13 @@ impl Value {
Value::Blob(b) => out.extend_from_slice(b),
};
}
pub fn cast_text(&self) -> Option<String> {
Some(match self {
Value::Null => return None,
v => v.to_string(),
})
}
}
#[derive(Debug, Clone, PartialEq)]
@@ -425,108 +432,7 @@ impl Display for Value {
Self::Integer(i) => {
write!(f, "{i}")
}
Self::Float(fl) => {
let fl = *fl;
if fl == f64::INFINITY {
return write!(f, "Inf");
}
if fl == f64::NEG_INFINITY {
return write!(f, "-Inf");
}
if fl.is_nan() {
return write!(f, "");
}
// handle negative 0
if fl == -0.0 {
return write!(f, "{:.1}", fl.abs());
}
// handle scientific notation without trailing zeros
if (fl.abs() < 1e-4 || fl.abs() >= 1e15) && fl != 0.0 {
let sci_notation = format!("{fl:.14e}");
let parts: Vec<&str> = sci_notation.split('e').collect();
if parts.len() == 2 {
let mantissa = parts[0];
let exponent = parts[1];
let decimal_parts: Vec<&str> = mantissa.split('.').collect();
if decimal_parts.len() == 2 {
let whole = decimal_parts[0];
// 1.{this part}
let mut fraction = String::from(decimal_parts[1]);
//removing trailing 0 from fraction
while fraction.ends_with('0') {
fraction.pop();
}
let trimmed_mantissa = if fraction.is_empty() {
whole.to_string()
} else {
format!("{whole}.{fraction}")
};
let (prefix, exponent) =
if let Some(stripped_exponent) = exponent.strip_prefix('-') {
("-0", &stripped_exponent[1..])
} else {
("+", exponent)
};
return write!(f, "{trimmed_mantissa}e{prefix}{exponent}");
}
}
// fallback
return write!(f, "{sci_notation}");
}
// handle floating point max size is 15.
// If left > right && right + left > 15 go to sci notation
// If right > left && right + left > 15 truncate left so right + left == 15
let rounded = fl.round();
if (fl - rounded).abs() < 1e-14 {
// if we very close to integer trim decimal part to 1 digit
if rounded == rounded as i64 as f64 {
return write!(f, "{fl:.1}");
}
}
let fl_str = format!("{fl}");
let splitted = fl_str.split('.').collect::<Vec<&str>>();
// fallback
if splitted.len() != 2 {
return write!(f, "{fl:.14e}");
}
let first_part = if fl < 0.0 {
// remove -
&splitted[0][1..]
} else {
splitted[0]
};
let second = splitted[1];
// We want more precision for smaller numbers. in SQLite case we want 15 non zero digits in 0 < number < 1
// leading zeroes added to max real size. But if float < 1e-4 we go to scientific notation
let leading_zeros = second.chars().take_while(|c| c == &'0').count();
let reminder = if first_part != "0" {
MAX_REAL_SIZE as isize - first_part.len() as isize
} else {
MAX_REAL_SIZE as isize + leading_zeros as isize
};
// float that have integer part > 15 converted to sci notation
if reminder < 0 {
return write!(f, "{fl:.14e}");
}
// trim decimal part to reminder or self len so total digits is 15;
let mut fl = format!("{:.*}", second.len().min(reminder as usize), fl);
// if decimal part ends with 0 we trim it
while fl.ends_with('0') {
fl.pop();
}
write!(f, "{fl}")
}
Self::Float(fl) => f.write_str(&format_float(*fl)),
Self::Text(s) => {
write!(f, "{}", s.as_str())
}
@@ -761,11 +667,8 @@ impl PartialEq<Value> for Value {
fn eq(&self, other: &Value) -> bool {
match (self, other) {
(Self::Integer(int_left), Self::Integer(int_right)) => int_left == int_right,
(Self::Integer(int_left), Self::Float(float_right)) => {
(*int_left as f64) == (*float_right)
}
(Self::Float(float_left), Self::Integer(int_right)) => {
float_left == (&(*int_right as f64))
(Self::Integer(int), Self::Float(float)) | (Self::Float(float), Self::Integer(int))=> {
int_float_cmp(*int, *float).is_eq()
}
(Self::Float(float_left), Self::Float(float_right)) => float_left == float_right,
(Self::Integer(_) | Self::Float(_), Self::Text(_) | Self::Blob(_)) => false,
@@ -780,17 +683,32 @@ impl PartialEq<Value> for Value {
}
}
fn int_float_cmp(int: i64, float: f64) -> std::cmp::Ordering {
if float.is_nan() {
return std::cmp::Ordering::Greater;
}
if float < -9223372036854775808.0 {
return std::cmp::Ordering::Greater;
}
if float >= 9223372036854775808.0 {
return std::cmp::Ordering::Less;
}
match int.cmp(&(float as i64)) {
std::cmp::Ordering::Equal => (int as f64).total_cmp(&float),
cmp => cmp,
}
}
#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd<Value> for Value {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
(Self::Integer(int_left), Self::Integer(int_right)) => int_left.partial_cmp(int_right),
(Self::Integer(int_left), Self::Float(float_right)) => {
(*int_left as f64).partial_cmp(float_right)
}
(Self::Float(float_left), Self::Integer(int_right)) => {
float_left.partial_cmp(&(*int_right as f64))
}
(Self::Float(float), Self::Integer(int)) => Some(int_float_cmp(*int, *float).reverse()),
(Self::Integer(int), Self::Float(float)) => Some(int_float_cmp(*int, *float)),
(Self::Float(float_left), Self::Float(float_right)) => {
float_left.partial_cmp(float_right)
}

View File

@@ -8146,39 +8146,23 @@ impl Value {
}
pub fn exec_concat(&self, rhs: &Value) -> Value {
match (self, rhs) {
(Value::Text(lhs_text), Value::Text(rhs_text)) => {
Value::build_text(lhs_text.as_str().to_string() + rhs_text.as_str())
}
(Value::Text(lhs_text), Value::Integer(rhs_int)) => {
Value::build_text(lhs_text.as_str().to_string() + &rhs_int.to_string())
}
(Value::Text(lhs_text), Value::Float(rhs_float)) => {
Value::build_text(lhs_text.as_str().to_string() + &rhs_float.to_string())
}
(Value::Integer(lhs_int), Value::Text(rhs_text)) => {
Value::build_text(lhs_int.to_string() + rhs_text.as_str())
}
(Value::Integer(lhs_int), Value::Integer(rhs_int)) => {
Value::build_text(lhs_int.to_string() + &rhs_int.to_string())
}
(Value::Integer(lhs_int), Value::Float(rhs_float)) => {
Value::build_text(lhs_int.to_string() + &rhs_float.to_string())
}
(Value::Float(lhs_float), Value::Text(rhs_text)) => {
Value::build_text(lhs_float.to_string() + rhs_text.as_str())
}
(Value::Float(lhs_float), Value::Integer(rhs_int)) => {
Value::build_text(lhs_float.to_string() + &rhs_int.to_string())
}
(Value::Float(lhs_float), Value::Float(rhs_float)) => {
Value::build_text(lhs_float.to_string() + &rhs_float.to_string())
}
(Value::Null, _) | (_, Value::Null) => Value::Null,
(Value::Blob(_), _) | (_, Value::Blob(_)) => {
todo!("TODO: Handle Blob conversion to String")
}
if let (Value::Blob(lhs), Value::Blob(rhs)) = (self, rhs) {
return Value::build_text(String::from_utf8_lossy(dbg!(&[
lhs.as_slice(),
rhs.as_slice()
]
.concat())));
}
let Some(lhs) = self.cast_text() else {
return Value::Null;
};
let Some(rhs) = rhs.cast_text() else {
return Value::Null;
};
Value::build_text(lhs + &rhs)
}
pub fn exec_and(&self, rhs: &Value) -> Value {