Merge 'Numeric Types Overhaul' from Levy A.

### Summary
  - Sqlite compatible string to float conversion
    - Accompanied with the new `cast_real` fuzz target
  - `NonNan` wrapper type over `f64`
    - Now we can guarantee that operations that can make result in a NaN
need to be handled
  - `Numeric` and `NullableInteger` types that encapsulate all numeric
and bitwise operations
    - This is now guaranteed to be 100% compatible with sqlite with the
`expression` fuzz target (with the exception of the commented out
operation that will be implemented in a later PR)
One thing that might be reworked here is the heavy use of traits and
operator overloading, but looks reasonable to me.

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #1386
This commit is contained in:
Jussi Saurio
2025-04-23 18:34:32 +03:00
9 changed files with 770 additions and 376 deletions

View File

@@ -27,6 +27,7 @@ series = ["limbo_series/static"]
ipaddr = ["limbo_ipaddr/static"]
completion = ["limbo_completion/static"]
testvfs = ["limbo_ext_tests/static"]
fuzz = []
[target.'cfg(target_os = "linux")'.dependencies]
io-uring = { version = "0.6.1", optional = true }

View File

@@ -20,6 +20,12 @@ mod util;
mod vdbe;
mod vector;
#[cfg(feature = "fuzz")]
pub mod numeric;
#[cfg(not(feature = "fuzz"))]
mod numeric;
#[cfg(not(target_family = "wasm"))]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;

575
core/numeric.rs Normal file
View File

@@ -0,0 +1,575 @@
use crate::OwnedValue;
mod nonnan;
use nonnan::NonNan;
// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available
trait SaturatingShl {
fn saturating_shl(self, rhs: u32) -> Self;
}
impl SaturatingShl for i64 {
fn saturating_shl(self, rhs: u32) -> Self {
if rhs >= Self::BITS {
0
} else {
self << rhs
}
}
}
// TODO: Remove when https://github.com/rust-lang/libs-team/issues/230 is available
trait SaturatingShr {
fn saturating_shr(self, rhs: u32) -> Self;
}
impl SaturatingShr for i64 {
fn saturating_shr(self, rhs: u32) -> Self {
if rhs >= Self::BITS {
if self >= 0 {
0
} else {
-1
}
} else {
self >> rhs
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum Numeric {
Null,
Integer(i64),
Float(NonNan),
}
impl Numeric {
pub fn try_into_bool(&self) -> Option<bool> {
match self {
Numeric::Null => None,
Numeric::Integer(0) => Some(false),
Numeric::Float(non_nan) if *non_nan == 0.0 => Some(false),
_ => Some(true),
}
}
}
impl From<Numeric> for NullableInteger {
fn from(value: Numeric) -> Self {
match value {
Numeric::Null => NullableInteger::Null,
Numeric::Integer(v) => NullableInteger::Integer(v),
Numeric::Float(v) => NullableInteger::Integer(f64::from(v) as i64),
}
}
}
impl From<Numeric> for OwnedValue {
fn from(value: Numeric) -> Self {
match value {
Numeric::Null => OwnedValue::Null,
Numeric::Integer(v) => OwnedValue::Integer(v),
Numeric::Float(v) => OwnedValue::Float(v.into()),
}
}
}
impl<T: AsRef<str>> From<T> for Numeric {
fn from(value: T) -> Self {
let text = value.as_ref();
match str_to_f64(text) {
None => Self::Integer(0),
Some(StrToF64::Fractional(value)) => Self::Float(value),
Some(StrToF64::Decimal(real)) => {
let integer = str_to_i64(text).unwrap_or(0);
if real == integer as f64 {
Self::Integer(integer)
} else {
Self::Float(real)
}
}
}
}
}
impl From<OwnedValue> for Numeric {
fn from(value: OwnedValue) -> Self {
Self::from(&value)
}
}
impl From<&OwnedValue> for Numeric {
fn from(value: &OwnedValue) -> Self {
match value {
OwnedValue::Null => Self::Null,
OwnedValue::Integer(v) => Self::Integer(*v),
OwnedValue::Float(v) => match NonNan::new(*v) {
Some(v) => Self::Float(v),
None => Self::Null,
},
OwnedValue::Text(text) => Numeric::from(text.as_str()),
OwnedValue::Blob(blob) => {
let text = String::from_utf8_lossy(blob.as_slice());
Numeric::from(&text)
}
}
}
}
impl std::ops::Add for Numeric {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null,
(Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_add(rhs) {
None => Numeric::Float(lhs.into()) + Numeric::Float(rhs.into()),
Some(i) => Numeric::Integer(i),
},
(Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs + rhs {
Some(v) => Numeric::Float(v),
None => Numeric::Null,
},
(f @ Numeric::Float(_), Numeric::Integer(i))
| (Numeric::Integer(i), f @ Numeric::Float(_)) => f + Numeric::Float(i.into()),
}
}
}
impl std::ops::Sub for Numeric {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null,
(Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs - rhs {
Some(v) => Numeric::Float(v),
None => Numeric::Null,
},
(Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_sub(rhs) {
None => Numeric::Float(lhs.into()) - Numeric::Float(rhs.into()),
Some(i) => Numeric::Integer(i),
},
(f @ Numeric::Float(_), Numeric::Integer(i)) => f - Numeric::Float(i.into()),
(Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) - f,
}
}
}
impl std::ops::Mul for Numeric {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null,
(Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs * rhs {
Some(v) => Numeric::Float(v),
None => Numeric::Null,
},
(Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_mul(rhs) {
None => Numeric::Float(lhs.into()) * Numeric::Float(rhs.into()),
Some(i) => Numeric::Integer(i),
},
(f @ Numeric::Float(_), Numeric::Integer(i))
| (Numeric::Integer(i), f @ Numeric::Float(_)) => f * Numeric::Float(i.into()),
}
}
}
impl std::ops::Div for Numeric {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(Numeric::Null, _) | (_, Numeric::Null) => Numeric::Null,
(Numeric::Float(lhs), Numeric::Float(rhs)) => match lhs / rhs {
Some(v) if rhs != 0.0 => Numeric::Float(v),
_ => Numeric::Null,
},
(Numeric::Integer(lhs), Numeric::Integer(rhs)) => match lhs.checked_div(rhs) {
None => Numeric::Float(lhs.into()) / Numeric::Float(rhs.into()),
Some(v) => Numeric::Integer(v),
},
(f @ Numeric::Float(_), Numeric::Integer(i)) => f / Numeric::Float(i.into()),
(Numeric::Integer(i), f @ Numeric::Float(_)) => Numeric::Float(i.into()) / f,
}
}
}
impl std::ops::Neg for Numeric {
type Output = Self;
fn neg(self) -> Self::Output {
match self {
Numeric::Null => Numeric::Null,
Numeric::Integer(v) => match v.checked_neg() {
None => -Numeric::Float(v.into()),
Some(i) => Numeric::Integer(i),
},
Numeric::Float(v) => Numeric::Float(-v),
}
}
}
#[derive(Debug)]
pub enum NullableInteger {
Null,
Integer(i64),
}
impl From<NullableInteger> for OwnedValue {
fn from(value: NullableInteger) -> Self {
match value {
NullableInteger::Null => OwnedValue::Null,
NullableInteger::Integer(v) => OwnedValue::Integer(v),
}
}
}
impl<T: AsRef<str>> From<T> for NullableInteger {
fn from(value: T) -> Self {
Self::Integer(str_to_i64(value.as_ref()).unwrap_or(0))
}
}
impl From<OwnedValue> for NullableInteger {
fn from(value: OwnedValue) -> Self {
Self::from(&value)
}
}
impl From<&OwnedValue> for NullableInteger {
fn from(value: &OwnedValue) -> Self {
match value {
OwnedValue::Null => Self::Null,
OwnedValue::Integer(v) => Self::Integer(*v),
OwnedValue::Float(v) => Self::Integer(*v as i64),
OwnedValue::Text(text) => Self::from(text.as_str()),
OwnedValue::Blob(blob) => {
let text = String::from_utf8_lossy(blob.as_slice());
Self::from(text)
}
}
}
}
impl std::ops::Not for NullableInteger {
type Output = Self;
fn not(self) -> Self::Output {
match self {
NullableInteger::Null => NullableInteger::Null,
NullableInteger::Integer(lhs) => NullableInteger::Integer(!lhs),
}
}
}
impl std::ops::BitAnd for NullableInteger {
type Output = Self;
fn bitand(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null,
(NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => {
NullableInteger::Integer(lhs & rhs)
}
}
}
}
impl std::ops::BitOr for NullableInteger {
type Output = Self;
fn bitor(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null,
(NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => {
NullableInteger::Integer(lhs | rhs)
}
}
}
}
impl std::ops::Shl for NullableInteger {
type Output = Self;
fn shl(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null,
(NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => {
NullableInteger::Integer(if rhs.is_positive() {
lhs.saturating_shl(rhs.try_into().unwrap_or(u32::MAX))
} else {
lhs.saturating_shr(rhs.saturating_abs().try_into().unwrap_or(u32::MAX))
})
}
}
}
}
impl std::ops::Shr for NullableInteger {
type Output = Self;
fn shr(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null,
(NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => {
NullableInteger::Integer(if rhs.is_positive() {
lhs.saturating_shr(rhs.try_into().unwrap_or(u32::MAX))
} else {
lhs.saturating_shl(rhs.saturating_abs().try_into().unwrap_or(u32::MAX))
})
}
}
}
}
impl std::ops::Rem for NullableInteger {
type Output = Self;
fn rem(self, rhs: Self) -> Self::Output {
match (self, rhs) {
(NullableInteger::Null, _) | (_, NullableInteger::Null) => NullableInteger::Null,
(_, NullableInteger::Integer(0)) => NullableInteger::Null,
(lhs, NullableInteger::Integer(-1)) => lhs % NullableInteger::Integer(1),
(NullableInteger::Integer(lhs), NullableInteger::Integer(rhs)) => {
NullableInteger::Integer(lhs % rhs)
}
}
}
}
// Maximum u64 that can survive a f64 round trip
const MAX_EXACT: u64 = u64::MAX << 11;
const VERTICAL_TAB: char = '\u{b}';
/// Encapsulates Dekker's arithmetic for higher precision. This is spiritually the same as using a
/// f128 for arithmetic, but cross platform and compatible with sqlite.
#[derive(Debug, Clone, Copy)]
struct DoubleDouble(f64, f64);
impl From<u64> for DoubleDouble {
fn from(value: u64) -> Self {
let r = value as f64;
// If the value is smaller than MAX_EXACT, the error isn't significant
let rr = if r <= MAX_EXACT as f64 {
let round_tripped = value as f64 as u64;
let sign = if value >= round_tripped { 1.0 } else { -1.0 };
// Error term is the signed distance of the round tripped value and itself
sign * value.abs_diff(round_tripped) as f64
} else {
0.0
};
DoubleDouble(r, rr)
}
}
impl From<DoubleDouble> for f64 {
fn from(DoubleDouble(a, aa): DoubleDouble) -> Self {
a + aa
}
}
impl std::ops::Mul for DoubleDouble {
type Output = Self;
/// Double-Double multiplication. (self.0, self.1) *= (rhs.0, rhs.1)
///
/// Reference:
/// T. J. Dekker, "A Floating-Point Technique for Extending the Available Precision".
/// 1971-07-26.
///
fn mul(self, rhs: Self) -> Self::Output {
// TODO: Better variable naming
let mask = u64::MAX << 26;
let hx = f64::from_bits(self.0.to_bits() & mask);
let tx = self.0 - hx;
let hy = f64::from_bits(rhs.0.to_bits() & mask);
let ty = rhs.0 - hy;
let p = hx * hy;
let q = hx * ty + tx * hy;
let c = p + q;
let cc = p - c + q + tx * ty;
let cc = self.0 * rhs.1 + self.1 * rhs.0 + cc;
let r = c + cc;
let rr = (c - r) + cc;
DoubleDouble(r, rr)
}
}
impl std::ops::MulAssign for DoubleDouble {
fn mul_assign(&mut self, rhs: Self) {
*self = self.clone() * rhs;
}
}
pub fn str_to_i64(input: impl AsRef<str>) -> Option<i64> {
let input = input
.as_ref()
.trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB);
let mut iter = input.chars().enumerate().peekable();
iter.next_if(|(_, ch)| matches!(ch, '+' | '-'));
let Some((end, _)) = iter.take_while(|(_, ch)| ch.is_ascii_digit()).last() else {
return Some(0);
};
input[0..=end].parse::<i64>().map_or_else(
|err| match err.kind() {
std::num::IntErrorKind::PosOverflow => Some(i64::MAX),
std::num::IntErrorKind::NegOverflow => Some(i64::MIN),
std::num::IntErrorKind::Empty => unreachable!(),
_ => Some(0),
},
Some,
)
}
pub enum StrToF64 {
Fractional(NonNan),
Decimal(NonNan),
}
pub fn str_to_f64(input: impl AsRef<str>) -> Option<StrToF64> {
let mut input = input
.as_ref()
.trim_matches(|ch: char| ch.is_ascii_whitespace() || ch == VERTICAL_TAB)
.chars()
.peekable();
let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) {
Some('-') => -1.0,
_ => 1.0,
};
let mut had_digits = false;
let mut is_fractional = false;
if matches!(input.peek(), Some('e' | 'E')) {
return None;
}
let mut significant: u64 = 0;
// Copy as many significant digits as we can
while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) {
had_digits = true;
match significant
.checked_mul(10)
.and_then(|v| v.checked_add(digit as u64))
{
Some(new) => significant = new,
None => break,
}
input.next();
}
let mut exponent = 0;
// Increment the exponent for every non significant digit we skipped
while input.next_if(char::is_ascii_digit).is_some() {
exponent += 1
}
if input.next_if(|ch| matches!(ch, '.')).is_some() {
if had_digits || input.peek().is_some_and(char::is_ascii_digit) {
is_fractional = true
}
while let Some(digit) = input.peek().and_then(|ch| ch.to_digit(10)) {
if significant < (u64::MAX - 9) / 10 {
significant = significant * 10 + digit as u64;
exponent -= 1;
}
input.next();
}
};
if input.next_if(|ch| matches!(ch, 'e' | 'E')).is_some() {
let sign = match input.next_if(|ch| matches!(ch, '-' | '+')) {
Some('-') => -1,
_ => 1,
};
if input.peek().is_some_and(char::is_ascii_digit) {
is_fractional = true
}
let e = input.map_while(|ch| ch.to_digit(10)).fold(0, |acc, digit| {
if acc < 1000 {
acc * 10 + digit as i32
} else {
1000
}
});
exponent += sign * e;
};
while exponent.is_positive() && significant < MAX_EXACT / 10 {
significant *= 10;
exponent -= 1;
}
while exponent.is_negative() && significant % 10 == 0 {
significant /= 10;
exponent += 1;
}
let mut result = DoubleDouble::from(significant);
if exponent > 0 {
while exponent >= 100 {
exponent -= 100;
result *= DoubleDouble(1.0e+100, -1.5902891109759918046e+83);
}
while exponent >= 10 {
exponent -= 10;
result *= DoubleDouble(1.0e+10, 0.0);
}
while exponent >= 1 {
exponent -= 1;
result *= DoubleDouble(1.0e+01, 0.0);
}
} else {
while exponent <= -100 {
exponent += 100;
result *= DoubleDouble(1.0e-100, -1.99918998026028836196e-117);
}
while exponent <= -10 {
exponent += 10;
result *= DoubleDouble(1.0e-10, -3.6432197315497741579e-27);
}
while exponent <= -1 {
exponent += 1;
result *= DoubleDouble(1.0e-01, -5.5511151231257827021e-18);
}
}
let result = NonNan::new(f64::from(result) * sign)
.unwrap_or_else(|| NonNan::new(sign * f64::INFINITY).unwrap());
Some(if is_fractional {
StrToF64::Fractional(result)
} else {
StrToF64::Decimal(result)
})
}

105
core/numeric/nonnan.rs Normal file
View File

@@ -0,0 +1,105 @@
#[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct NonNan(f64);
impl NonNan {
pub fn new(value: f64) -> Option<Self> {
if value.is_nan() {
return None;
}
Some(NonNan(value))
}
}
impl PartialEq<NonNan> for f64 {
fn eq(&self, other: &NonNan) -> bool {
*self == other.0
}
}
impl PartialEq<f64> for NonNan {
fn eq(&self, other: &f64) -> bool {
self.0 == *other
}
}
impl PartialOrd<f64> for NonNan {
fn partial_cmp(&self, other: &f64) -> Option<std::cmp::Ordering> {
self.0.partial_cmp(other)
}
}
impl PartialOrd<NonNan> for f64 {
fn partial_cmp(&self, other: &NonNan) -> Option<std::cmp::Ordering> {
self.partial_cmp(&other.0)
}
}
impl From<i64> for NonNan {
fn from(value: i64) -> Self {
NonNan(value as f64)
}
}
impl From<NonNan> for f64 {
fn from(value: NonNan) -> Self {
value.0
}
}
impl std::ops::Deref for NonNan {
type Target = f64;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl std::ops::Add for NonNan {
type Output = Option<NonNan>;
fn add(self, rhs: Self) -> Self::Output {
Self::new(self.0 + rhs.0)
}
}
impl std::ops::Sub for NonNan {
type Output = Option<NonNan>;
fn sub(self, rhs: Self) -> Self::Output {
Self::new(self.0 - rhs.0)
}
}
impl std::ops::Mul for NonNan {
type Output = Option<NonNan>;
fn mul(self, rhs: Self) -> Self::Output {
Self::new(self.0 * rhs.0)
}
}
impl std::ops::Div for NonNan {
type Output = Option<NonNan>;
fn div(self, rhs: Self) -> Self::Output {
Self::new(self.0 / rhs.0)
}
}
impl std::ops::Rem for NonNan {
type Output = Option<NonNan>;
fn rem(self, rhs: Self) -> Self::Output {
Self::new(self.0 % rhs.0)
}
}
impl std::ops::Neg for NonNan {
type Output = Self;
fn neg(self) -> Self::Output {
Self(-self.0)
}
}

View File

@@ -1,4 +1,5 @@
#![allow(unused_variables)]
use crate::numeric::{NullableInteger, Numeric};
use crate::storage::database::FileMemoryStorage;
use crate::storage::page_cache::DumbLruPageCache;
use crate::storage::pager::CreateBTreeFlags;
@@ -5482,357 +5483,61 @@ fn exec_likelihood(reg: &OwnedValue, _probability: &OwnedValue) -> OwnedValue {
}
pub fn exec_add(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
let result = match (lhs, rhs) {
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
let result = lhs.overflowing_add(*rhs);
if result.1 {
OwnedValue::Float(*lhs as f64 + *rhs as f64)
} else {
OwnedValue::Integer(result.0)
}
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs + rhs),
(OwnedValue::Float(f), OwnedValue::Integer(i))
| (OwnedValue::Integer(i), OwnedValue::Float(f)) => OwnedValue::Float(*f + *i as f64),
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_add(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_add(&cast_text_to_numeric(text.as_str()), other)
}
_ => todo!(),
};
match result {
OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null,
_ => result,
}
(Numeric::from(lhs) + Numeric::from(rhs)).into()
}
pub fn exec_subtract(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
let result = match (lhs, rhs) {
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
let result = lhs.overflowing_sub(*rhs);
if result.1 {
OwnedValue::Float(*lhs as f64 - *rhs as f64)
} else {
OwnedValue::Integer(result.0)
}
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs - rhs),
(OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs - *rhs as f64),
(OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 - rhs),
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_subtract(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) => {
exec_subtract(&cast_text_to_numeric(text.as_str()), other)
}
(other, OwnedValue::Text(text)) => {
exec_subtract(other, &cast_text_to_numeric(text.as_str()))
}
(other, OwnedValue::Blob(blob)) => {
let text = String::from_utf8_lossy(&blob);
exec_subtract(other, &cast_text_to_numeric(&text))
}
(OwnedValue::Blob(blob), other) => {
let text = String::from_utf8_lossy(&blob);
exec_subtract(&cast_text_to_numeric(&text), other)
}
};
match result {
OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null,
_ => result,
}
(Numeric::from(lhs) - Numeric::from(rhs)).into()
}
pub fn exec_multiply(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
let result = match (lhs, rhs) {
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
let result = lhs.overflowing_mul(*rhs);
if result.1 {
OwnedValue::Float(*lhs as f64 * *rhs as f64)
} else {
OwnedValue::Integer(result.0)
}
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs * rhs),
(OwnedValue::Integer(i), OwnedValue::Float(f))
| (OwnedValue::Float(f), OwnedValue::Integer(i)) => OwnedValue::Float(*i as f64 * { *f }),
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_multiply(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_multiply(&cast_text_to_numeric(text.as_str()), other)
}
_ => todo!(),
};
match result {
OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null,
_ => result,
}
(Numeric::from(lhs) * Numeric::from(rhs)).into()
}
pub fn exec_divide(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
let result = match (lhs, rhs) {
(_, OwnedValue::Integer(0)) | (_, OwnedValue::Float(0.0)) => OwnedValue::Null,
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
let result = lhs.overflowing_div(*rhs);
if result.1 {
OwnedValue::Float(*lhs as f64 / *rhs as f64)
} else {
OwnedValue::Integer(result.0)
}
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(lhs / rhs),
(OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => OwnedValue::Float(lhs / *rhs as f64),
(OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => OwnedValue::Float(*lhs as f64 / rhs),
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_divide(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) => exec_divide(&cast_text_to_numeric(text.as_str()), other),
(other, OwnedValue::Text(text)) => exec_divide(other, &cast_text_to_numeric(text.as_str())),
_ => todo!(),
};
match result {
OwnedValue::Float(f) if f.is_nan() => OwnedValue::Null,
_ => result,
}
(Numeric::from(lhs) / Numeric::from(rhs)).into()
}
pub fn exec_bit_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(_, OwnedValue::Integer(0))
| (OwnedValue::Integer(0), _)
| (_, OwnedValue::Float(0.0))
| (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0),
(OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh & rh),
(OwnedValue::Float(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(*lh as i64 & *rh as i64)
}
(OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 & rh),
(OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh & *rh as i64),
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_and(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_bit_and(&cast_text_to_numeric(text.as_str()), other)
}
_ => todo!(),
}
(NullableInteger::from(lhs) & NullableInteger::from(rhs)).into()
}
pub fn exec_bit_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(lh | rh),
(OwnedValue::Float(lh), OwnedValue::Integer(rh)) => OwnedValue::Integer(*lh as i64 | rh),
(OwnedValue::Integer(lh), OwnedValue::Float(rh)) => OwnedValue::Integer(lh | *rh as i64),
(OwnedValue::Float(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(*lh as i64 | *rh as i64)
}
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_bit_or(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_bit_or(&cast_text_to_numeric(text.as_str()), other)
}
_ => todo!(),
}
(NullableInteger::from(lhs) | NullableInteger::from(rhs)).into()
}
pub fn exec_remainder(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, _)
| (_, OwnedValue::Null)
| (_, OwnedValue::Integer(0))
| (_, OwnedValue::Float(0.0)) => OwnedValue::Null,
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
if rhs == &0 {
OwnedValue::Null
let convert_to_float = matches!(Numeric::from(lhs), Numeric::Float(_))
|| matches!(Numeric::from(rhs), Numeric::Float(_));
match NullableInteger::from(lhs) % NullableInteger::from(rhs) {
NullableInteger::Null => OwnedValue::Null,
NullableInteger::Integer(v) => {
if convert_to_float {
OwnedValue::Float(v as f64)
} else {
OwnedValue::Integer(lhs % rhs.abs())
OwnedValue::Integer(v)
}
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => {
let rhs_int = *rhs as i64;
if rhs_int == 0 {
OwnedValue::Null
} else {
OwnedValue::Float(((*lhs as i64) % rhs_int.abs()) as f64)
}
}
(OwnedValue::Float(lhs), OwnedValue::Integer(rhs)) => {
if rhs == &0 {
OwnedValue::Null
} else {
OwnedValue::Float(((*lhs as i64) % rhs.abs()) as f64)
}
}
(OwnedValue::Integer(lhs), OwnedValue::Float(rhs)) => {
let rhs_int = *rhs as i64;
if rhs_int == 0 {
OwnedValue::Null
} else {
OwnedValue::Float((lhs % rhs_int.abs()) as f64)
}
}
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_remainder(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) => {
exec_remainder(&cast_text_to_numeric(text.as_str()), other)
}
(other, OwnedValue::Text(text)) => {
exec_remainder(other, &cast_text_to_numeric(text.as_str()))
}
other => todo!("remainder not implemented for: {:?} {:?}", lhs, other),
}
}
pub fn exec_bit_not(reg: &OwnedValue) -> OwnedValue {
match reg {
OwnedValue::Null => OwnedValue::Null,
OwnedValue::Integer(i) => OwnedValue::Integer(!i),
OwnedValue::Float(f) => OwnedValue::Integer(!(*f as i64)),
OwnedValue::Text(text) => exec_bit_not(&cast_text_to_integer(text.as_str())),
OwnedValue::Blob(blob) => {
let text = String::from_utf8_lossy(blob);
exec_bit_not(&cast_text_to_integer(&text))
}
}
(!NullableInteger::from(reg)).into()
}
pub fn exec_shift_left(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => {
OwnedValue::Integer(compute_shl(*lh, *rh))
}
(OwnedValue::Float(lh), OwnedValue::Integer(rh)) => {
OwnedValue::Integer(compute_shl(*lh as i64, *rh))
}
(OwnedValue::Integer(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(compute_shl(*lh, *rh as i64))
}
(OwnedValue::Float(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(compute_shl(*lh as i64, *rh as i64))
}
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_left(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) => {
exec_shift_left(&cast_text_to_numeric(text.as_str()), other)
}
(other, OwnedValue::Text(text)) => {
exec_shift_left(other, &cast_text_to_numeric(text.as_str()))
}
_ => todo!(),
}
}
fn compute_shl(lhs: i64, rhs: i64) -> i64 {
if rhs == 0 {
lhs
} else if rhs > 0 {
// for positive shifts, if it's too large return 0
if rhs >= 64 {
0
} else {
lhs << rhs
}
} else {
// for negative shifts, check if it's i64::MIN to avoid overflow on negation
if rhs == i64::MIN || rhs <= -64 {
if lhs < 0 {
-1
} else {
0
}
} else {
lhs >> (-rhs)
}
}
(NullableInteger::from(lhs) << NullableInteger::from(rhs)).into()
}
pub fn exec_shift_right(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Integer(lh), OwnedValue::Integer(rh)) => {
OwnedValue::Integer(compute_shr(*lh, *rh))
}
(OwnedValue::Float(lh), OwnedValue::Integer(rh)) => {
OwnedValue::Integer(compute_shr(*lh as i64, *rh))
}
(OwnedValue::Integer(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(compute_shr(*lh, *rh as i64))
}
(OwnedValue::Float(lh), OwnedValue::Float(rh)) => {
OwnedValue::Integer(compute_shr(*lh as i64, *rh as i64))
}
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_shift_right(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) => {
exec_shift_right(&cast_text_to_numeric(text.as_str()), other)
}
(other, OwnedValue::Text(text)) => {
exec_shift_right(other, &cast_text_to_numeric(text.as_str()))
}
_ => todo!(),
}
}
// compute binary shift to the right if rhs >= 0 and binary shift to the left - if rhs < 0
// note, that binary shift to the right is sign-extended
fn compute_shr(lhs: i64, rhs: i64) -> i64 {
if rhs == 0 {
lhs
} else if rhs > 0 {
// for positive right shifts
if rhs >= 64 {
if lhs < 0 {
-1
} else {
0
}
} else {
lhs >> rhs
}
} else {
// for negative right shifts, check if it's i64::MIN to avoid overflow
if rhs == i64::MIN || -rhs >= 64 {
0
} else {
lhs << (-rhs)
}
}
(NullableInteger::from(lhs) >> NullableInteger::from(rhs)).into()
}
pub fn exec_boolean_not(reg: &OwnedValue) -> OwnedValue {
match reg {
OwnedValue::Null => OwnedValue::Null,
OwnedValue::Integer(i) => OwnedValue::Integer((*i == 0) as i64),
OwnedValue::Float(f) => OwnedValue::Integer((*f == 0.0) as i64),
OwnedValue::Text(text) => exec_boolean_not(&&cast_text_to_real(text.as_str())),
OwnedValue::Blob(blob) => {
let text = String::from_utf8_lossy(blob);
exec_boolean_not(&cast_text_to_real(&text))
}
match Numeric::from(reg).try_into_bool() {
None => OwnedValue::Null,
Some(v) => OwnedValue::Integer(!v as i64),
}
}
pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
@@ -5872,46 +5577,24 @@ pub fn exec_concat(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
}
pub fn exec_and(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(_, OwnedValue::Integer(0))
| (OwnedValue::Integer(0), _)
| (_, OwnedValue::Float(0.0))
| (OwnedValue::Float(0.0), _) => OwnedValue::Integer(0),
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_and(
&cast_text_to_real(lhs.as_str()),
&cast_text_to_real(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_and(&cast_text_to_real(text.as_str()), other)
}
(OwnedValue::Blob(blob), other) | (other, OwnedValue::Blob(blob)) => {
let text = String::from_utf8_lossy(blob);
exec_and(&cast_text_to_real(&text), other)
}
(OwnedValue::Null, _) | (_, OwnedValue::Null) => OwnedValue::Null,
match (
Numeric::from(lhs).try_into_bool(),
Numeric::from(rhs).try_into_bool(),
) {
(Some(false), _) | (_, Some(false)) => OwnedValue::Integer(0),
(None, _) | (_, None) => OwnedValue::Null,
_ => OwnedValue::Integer(1),
}
}
pub fn exec_or(lhs: &OwnedValue, rhs: &OwnedValue) -> OwnedValue {
match (lhs, rhs) {
(OwnedValue::Null, OwnedValue::Null)
| (OwnedValue::Null, OwnedValue::Float(0.0))
| (OwnedValue::Float(0.0), OwnedValue::Null)
| (OwnedValue::Null, OwnedValue::Integer(0))
| (OwnedValue::Integer(0), OwnedValue::Null) => OwnedValue::Null,
(OwnedValue::Float(0.0), OwnedValue::Integer(0))
| (OwnedValue::Integer(0), OwnedValue::Float(0.0))
| (OwnedValue::Float(0.0), OwnedValue::Float(0.0))
| (OwnedValue::Integer(0), OwnedValue::Integer(0)) => OwnedValue::Integer(0),
(OwnedValue::Text(lhs), OwnedValue::Text(rhs)) => exec_or(
&cast_text_to_numeric(lhs.as_str()),
&cast_text_to_numeric(rhs.as_str()),
),
(OwnedValue::Text(text), other) | (other, OwnedValue::Text(text)) => {
exec_or(&cast_text_to_numeric(text.as_str()), other)
}
_ => OwnedValue::Integer(1),
match (
Numeric::from(lhs).try_into_bool(),
Numeric::from(rhs).try_into_bool(),
) {
(Some(true), _) | (_, Some(true)) => OwnedValue::Integer(1),
(None, _) | (_, None) => OwnedValue::Null,
_ => OwnedValue::Integer(0),
}
}

16
fuzz/Cargo.lock generated
View File

@@ -470,9 +470,9 @@ dependencies = [
[[package]]
name = "julian_day_converter"
version = "0.4.4"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aa5652b85ab018289638c6b924db618da9edd2ddfff7fa0ec38a8b51a9192d3"
checksum = "f2987f71b89b85c812c8484cbf0c5d7912589e77bfdc66fd3e52f760e7859f16"
dependencies = [
"chrono",
]
@@ -566,7 +566,7 @@ dependencies = [
[[package]]
name = "limbo_core"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"built",
"cfg_block",
@@ -599,7 +599,7 @@ dependencies = [
[[package]]
name = "limbo_ext"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"chrono",
"getrandom 0.3.1",
@@ -608,7 +608,7 @@ dependencies = [
[[package]]
name = "limbo_macros"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"proc-macro2",
"quote",
@@ -617,7 +617,7 @@ dependencies = [
[[package]]
name = "limbo_sqlite3_parser"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"bitflags",
"cc",
@@ -636,7 +636,7 @@ dependencies = [
[[package]]
name = "limbo_time"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"chrono",
"limbo_ext",
@@ -648,7 +648,7 @@ dependencies = [
[[package]]
name = "limbo_uuid"
version = "0.0.19-pre.4"
version = "0.0.19"
dependencies = [
"limbo_ext",
"mimalloc",

View File

@@ -11,7 +11,7 @@ cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
arbitrary = { version = "1.4.1", features = ["derive"] }
limbo_core = { path = "../core" }
limbo_core = { path = "../core", features = ["fuzz"] }
rusqlite = { version = "0.34.0", features = ["bundled"] }
# Prevent this from interfering with workspaces
@@ -21,3 +21,7 @@ members = ["."]
[[bin]]
name = "expression"
path = "fuzz_targets/expression.rs"
[[bin]]
name = "cast_real"
path = "fuzz_targets/cast_real.rs"

View File

@@ -0,0 +1,22 @@
#![no_main]
use libfuzzer_sys::{fuzz_target, Corpus};
use std::error::Error;
fn do_fuzz(text: String) -> Result<Corpus, Box<dyn Error>> {
let expected = {
let conn = rusqlite::Connection::open_in_memory()?;
conn.query_row(&format!("SELECT cast(? as real)"), (&text,), |row| {
row.get::<_, f64>(0)
})?
};
let actual = limbo_core::numeric::atof(&text)
.map(|(non_nan, _)| f64::from(non_nan))
.unwrap_or(0.0);
assert_eq!(expected, actual);
Ok(Corpus::Keep)
}
fuzz_target!(|blob: String| -> Corpus { do_fuzz(blob).unwrap_or(Corpus::Keep) });

View File

@@ -31,13 +31,15 @@ macro_rules! str_enum {
str_enum! {
enum Binary {
Equal => "=",
Is => "IS",
NotEqual => "<>",
GreaterThan => ">",
GreaterThanOrEqual => ">=",
LessThan => "<",
LessThanOrEqual => "<=",
// TODO: Not compatible yet
// Equal => "=",
// Is => "IS",
// Concat => "||",
// NotEqual => "<>",
// GreaterThan => ">",
// GreaterThanOrEqual => ">=",
// LessThan => "<",
// LessThanOrEqual => "<=",
RightShift => ">>",
LeftShift => "<<",
BitwiseAnd => "&",
@@ -49,13 +51,13 @@ str_enum! {
Multiply => "*",
Divide => "/",
Mod => "%",
Concat => "||",
}
}
str_enum! {
enum Unary {
Not => "~",
Not => "NOT",
BitwiseNot => "~",
Negative => "-",
Positive => "+",
}
@@ -167,7 +169,7 @@ fn do_fuzz(expr: Expr) -> Result<Corpus, Box<dyn Error>> {
let sql = format!("SELECT {}", expr.query);
// FIX: `limbo_core::translate::expr::translate_expr` causes a overflow if this is any higher.
if expr.depth > 153 {
if expr.depth > 140 {
return Ok(Corpus::Reject);
}
@@ -206,12 +208,8 @@ fn do_fuzz(expr: Expr) -> Result<Corpus, Box<dyn Error>> {
assert_eq!(
OwnedValue::from(expected.clone()),
found.clone(),
"with expression {:?} {}",
"with expression {:?}",
expr,
match (expected, found) {
(Value::Real(a), OwnedValue::Float(b)) => format!("float diff: {:?}", (a - b).abs()),
_ => "".to_string(),
}
);
Ok(Corpus::Keep)