From 30ccbe46c7b709704dbc2e63cf40de2cedc6b485 Mon Sep 17 00:00:00 2001 From: krishvishal Date: Mon, 2 Jun 2025 11:19:41 +0530 Subject: [PATCH] Added `apply_numeric_affinity` function to handle string conversion to integer. Exising functions' behavior is tailored to `CAST` ops. SQLite has different behavior when it comes to handling string to `integer` conversion in CAST vs predicate ops. --- core/vdbe/execute.rs | 358 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 358 insertions(+) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 7c235c316..067dd3ace 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -6139,10 +6139,368 @@ pub fn extract_int_value(value: &Value) -> i64 { } } +#[derive(Debug, PartialEq)] +enum NumericParseResult { + NotNumeric, // not a valid number + PureInteger, // pure integer (entire string) + HasDecimalOrExp, // has decimal point or exponent (entire string) + ValidPrefixOnly, // valid prefix but not entire string +} + +#[derive(Debug)] +enum ParsedNumber { + None, + Integer(i64), + Float(f64), +} + +impl ParsedNumber { + fn as_integer(&self) -> Option { + match self { + ParsedNumber::Integer(i) => Some(*i), + _ => None, + } + } + + fn as_float(&self) -> Option { + match self { + ParsedNumber::Float(f) => Some(*f), + _ => None, + } + } +} + +fn try_for_float(text: &str) -> (NumericParseResult, ParsedNumber) { + let bytes = text.as_bytes(); + if bytes.is_empty() { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let mut pos = 0; + let len = bytes.len(); + + while pos < len && is_space(bytes[pos]) { + pos += 1; + } + + if pos >= len { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let start_pos = pos; + + let mut sign = 1i64; + + if bytes[pos] == b'-' { + sign = -1; + pos += 1; + } else if bytes[pos] == b'+' { + pos += 1; + } + + if pos >= len { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + let mut significand = 0u64; + let mut digit_count = 0; + let mut decimal_adjust = 0i32; + let mut has_digits = false; + + // Parse digits before decimal point + while pos < len && bytes[pos].is_ascii_digit() { + has_digits = true; + let digit = (bytes[pos] - b'0') as u64; + + if significand <= (u64::MAX - 9) / 10 { + significand = significand * 10 + digit; + digit_count += 1; + } else { + // Skip overflow digits but adjust exponent + decimal_adjust += 1; + } + pos += 1; + } + + let mut has_decimal = false; + let mut has_exponent = false; + + // Check for decimal point + if pos < len && bytes[pos] == b'.' { + has_decimal = true; + pos += 1; + + // Parse fractional digits + while pos < len && bytes[pos].is_ascii_digit() { + has_digits = true; + let digit = (bytes[pos] - b'0') as u64; + + if significand <= (u64::MAX - 9) / 10 { + significand = significand * 10 + digit; + digit_count += 1; + decimal_adjust -= 1; + } + pos += 1; + } + } + + if !has_digits { + return (NumericParseResult::NotNumeric, ParsedNumber::None); + } + + // Check for exponent + let mut exponent = 0i32; + if pos < len && (bytes[pos] == b'e' || bytes[pos] == b'E') { + has_exponent = true; + pos += 1; + + if pos >= len { + // Incomplete exponent, but we have valid digits before + return create_result_from_significand( + significand, + sign, + decimal_adjust, + has_decimal, + has_exponent, + NumericParseResult::ValidPrefixOnly, + ); + } + + let mut exp_sign = 1i32; + if bytes[pos] == b'-' { + exp_sign = -1; + pos += 1; + } else if bytes[pos] == b'+' { + pos += 1; + } + + if pos >= len || !bytes[pos].is_ascii_digit() { + // Incomplete exponent + return create_result_from_significand( + significand, + sign, + decimal_adjust, + has_decimal, + false, + NumericParseResult::ValidPrefixOnly, + ); + } + + // Parse exponent digits + while pos < len && bytes[pos].is_ascii_digit() { + let digit = (bytes[pos] - b'0') as i32; + if exponent < 10000 { + exponent = exponent * 10 + digit; + } else { + exponent = 10000; // Cap at large value + } + pos += 1; + } + exponent *= exp_sign; + } + + // Skip trailing whitespace + while pos < len && is_space(bytes[pos]) { + pos += 1; + } + + // Determine if we consumed the entire string + let consumed_all = pos >= len; + let final_exponent = decimal_adjust + exponent; + + let parse_result = if !consumed_all { + NumericParseResult::ValidPrefixOnly + } else if has_decimal || has_exponent { + NumericParseResult::HasDecimalOrExp + } else { + NumericParseResult::PureInteger + }; + + create_result_from_significand( + significand, + sign, + final_exponent, + has_decimal, + has_exponent, + parse_result, + ) +} + +fn create_result_from_significand( + significand: u64, + sign: i64, + exponent: i32, + has_decimal: bool, + has_exponent: bool, + parse_result: NumericParseResult, +) -> (NumericParseResult, ParsedNumber) { + if significand == 0 { + match parse_result { + NumericParseResult::PureInteger => { + return (parse_result, ParsedNumber::Integer(0)); + } + _ => { + return (parse_result, ParsedNumber::Float(0.0)); + } + } + } + + // For pure integers without exponent, try to return as integer + if !has_decimal && !has_exponent && exponent == 0 { + let signed_val = (significand as i64).wrapping_mul(sign); + if (significand as i64) * sign == signed_val { + return (parse_result, ParsedNumber::Integer(signed_val)); + } + } + + // Convert to float + let mut result = significand as f64; + + let mut exp = exponent; + if exp > 0 { + while exp >= 100 { + result *= 1e100; + exp -= 100; + } + while exp >= 10 { + result *= 1e10; + exp -= 10; + } + while exp >= 1 { + result *= 10.0; + exp -= 1; + } + } else if exp < 0 { + while exp <= -100 { + result *= 1e-100; + exp += 100; + } + while exp <= -10 { + result *= 1e-10; + exp += 10; + } + while exp <= -1 { + result *= 0.1; + exp += 1; + } + } + + if sign < 0 { + result = -result; + } + + (parse_result, ParsedNumber::Float(result)) +} + +pub fn is_space(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b'\x0c') +} + +fn real_to_i64(r: f64) -> i64 { + if r < -9223372036854774784.0 { + i64::MIN + } else if r > 9223372036854774784.0 { + i64::MAX + } else { + r as i64 + } +} + +fn apply_integer_affinity(register: &mut Register) -> bool { + let Register::Value(Value::Float(f)) = register else { + return false; + }; + + let ix = real_to_i64(*f); + + // Only convert if round-trip is exact and not at extreme values + if *f == (ix as f64) && ix > i64::MIN && ix < i64::MAX { + *register = Register::Value(Value::Integer(ix)); + true + } else { + false + } +} + +/// Try to convert a value into a numeric representation if we can +/// do so without loss of information. In other words, if the string +/// looks like a number, convert it into a number. If it does not +/// look like a number, leave it alone. +pub fn apply_numeric_affinity(register: &mut Register, try_for_int: bool) -> bool { + let Register::Value(Value::Text(text)) = register else { + return false; // Only apply to text values + }; + + let text_str = text.as_str(); + let (parse_result, parsed_value) = try_for_float(text_str); + + // Only convert if we have a complete valid number (not just a prefix) + match parse_result { + NumericParseResult::NotNumeric | NumericParseResult::ValidPrefixOnly => { + false // Leave as text + } + NumericParseResult::PureInteger => { + if let Some(int_val) = parsed_value.as_integer() { + *register = Register::Value(Value::Integer(int_val)); + true + } else { + false + } + } + NumericParseResult::HasDecimalOrExp => { + if let Some(float_val) = parsed_value.as_float() { + *register = Register::Value(Value::Float(float_val)); + // If try_for_int is true, try to convert float to int if exact + if try_for_int { + apply_integer_affinity(register); + } + true + } else { + false + } + } + } +} + #[cfg(test)] mod tests { + use super::*; use crate::types::{Text, Value}; + #[test] + fn test_apply_numeric_affinity_partial_numbers() { + let mut reg = Register::Value(Value::Text(Text::from_str("123abc"))); + assert!(!apply_numeric_affinity(&mut reg, false)); + assert!(matches!(reg, Register::Value(Value::Text(_)))); + + let mut reg = Register::Value(Value::Text(Text::from_str("-53093015420544-15062897"))); + assert!(!apply_numeric_affinity(&mut reg, false)); + assert!(matches!(reg, Register::Value(Value::Text(_)))); + + let mut reg = Register::Value(Value::Text(Text::from_str("123.45xyz"))); + assert!(!apply_numeric_affinity(&mut reg, false)); + assert!(matches!(reg, Register::Value(Value::Text(_)))); + } + + #[test] + fn test_apply_numeric_affinity_complete_numbers() { + let mut reg = Register::Value(Value::Text(Text::from_str("123"))); + assert!(apply_numeric_affinity(&mut reg, false)); + assert_eq!(*reg.get_owned_value(), Value::Integer(123)); + + let mut reg = Register::Value(Value::Text(Text::from_str("123.45"))); + assert!(apply_numeric_affinity(&mut reg, false)); + assert_eq!(*reg.get_owned_value(), Value::Float(123.45)); + + let mut reg = Register::Value(Value::Text(Text::from_str(" -456 "))); + assert!(apply_numeric_affinity(&mut reg, false)); + assert_eq!(*reg.get_owned_value(), Value::Integer(-456)); + + let mut reg = Register::Value(Value::Text(Text::from_str("0"))); + assert!(apply_numeric_affinity(&mut reg, false)); + assert_eq!(*reg.get_owned_value(), Value::Integer(0)); + } + #[test] fn test_exec_add() { let inputs = vec![