refactor numeric literal

This commit is contained in:
meteorgan
2025-05-08 18:36:53 +08:00
parent ae2561dbca
commit ef3f004e30
4 changed files with 263 additions and 83 deletions

View File

@@ -8,13 +8,13 @@ use crate::function::JsonFunc;
use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc};
use crate::functions::datetime;
use crate::schema::{Table, Type};
use crate::util::{exprs_are_equivalent, normalize_ident};
use crate::util::{exprs_are_equivalent, normalize_ident, parse_numeric_literal};
use crate::vdbe::{
builder::ProgramBuilder,
insn::{CmpInsFlags, Insn},
BranchOffset,
};
use crate::Result;
use crate::{OwnedValue, Result};
#[derive(Debug, Clone, Copy)]
pub struct ConditionMetadata {
@@ -1967,24 +1967,20 @@ pub fn translate_expr(
}
ast::Expr::Literal(lit) => match lit {
ast::Literal::Numeric(val) => {
if val.starts_with("0x") || val.starts_with("0X") {
// must be a hex decimal
let int_value = i64::from_str_radix(&val[2..], 16)?;
program.emit_insn(Insn::Integer {
value: int_value,
dest: target_register,
});
} else if let Ok(int_value) = val.parse::<i64>() {
program.emit_insn(Insn::Integer {
value: int_value,
dest: target_register,
});
} else {
// must be a float
program.emit_insn(Insn::Real {
value: val.parse()?,
dest: target_register,
});
match parse_numeric_literal(val)? {
OwnedValue::Integer(int_value) => {
program.emit_insn(Insn::Integer {
value: int_value,
dest: target_register,
});
}
OwnedValue::Float(real_value) => {
program.emit_insn(Insn::Real {
value: real_value,
dest: target_register,
});
}
_ => unreachable!(),
}
Ok(target_register)
}
@@ -2073,36 +2069,21 @@ pub fn translate_expr(
translate_expr(program, referenced_tables, expr, target_register, resolver)
}
(UnaryOperator::Negative, ast::Expr::Literal(ast::Literal::Numeric(numeric_value))) => {
// Special case: if we're negating "9223372036854775808", this is exactly MIN_INT64
// If we don't do this -1 * 9223372036854775808 will overflow and parse will fail
// and trigger conversion to Real.
if numeric_value == "9223372036854775808"
|| numeric_value.eq_ignore_ascii_case("0x7fffffffffffffff")
{
program.emit_insn(Insn::Integer {
value: i64::MIN,
dest: target_register,
});
} else {
if numeric_value.starts_with("0x") || numeric_value.starts_with("0X") {
// must be a hex decimal
let int_value = i64::from_str_radix(&numeric_value[2..], 16)?;
let numeric_value = "-".to_owned() + numeric_value;
match parse_numeric_literal(&numeric_value)? {
OwnedValue::Integer(int_value) => {
program.emit_insn(Insn::Integer {
value: -int_value,
dest: target_register,
});
} else if let Ok(value) = numeric_value.parse::<i64>() {
program.emit_insn(Insn::Integer {
value: value * -1,
dest: target_register,
});
} else {
let value = numeric_value.parse::<f64>()?;
program.emit_insn(Insn::Real {
value: value * -1 as f64,
value: int_value,
dest: target_register,
});
}
OwnedValue::Float(real_value) => {
program.emit_insn(Insn::Real {
value: real_value,
dest: target_register,
});
}
_ => unreachable!(),
}
Ok(target_register)
}
@@ -2125,23 +2106,20 @@ pub fn translate_expr(
Ok(target_register)
}
(UnaryOperator::BitwiseNot, ast::Expr::Literal(ast::Literal::Numeric(num_val))) => {
if num_val.starts_with("0x") || num_val.starts_with("0X") {
let int_value = i64::from_str_radix(&num_val[2..], 16)?;
program.emit_insn(Insn::Integer {
value: !int_value,
dest: target_register,
});
} else if let Ok(val) = num_val.parse::<i64>() {
program.emit_insn(Insn::Integer {
value: !val,
dest: target_register,
});
} else {
let num_val = num_val.parse::<f64>()? as i64;
program.emit_insn(Insn::Integer {
value: !num_val,
dest: target_register,
});
match parse_numeric_literal(num_val)? {
OwnedValue::Integer(int_value) => {
program.emit_insn(Insn::Integer {
value: !int_value,
dest: target_register,
});
}
OwnedValue::Float(real_value) => {
program.emit_insn(Insn::Integer {
value: !(real_value as i64),
dest: target_register,
});
}
_ => unreachable!(),
}
Ok(target_register)
}

View File

@@ -1,12 +1,11 @@
use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal};
use std::{rc::Rc, sync::Arc};
use crate::{
function::Func,
schema::{self, Column, Schema, Type},
types::{OwnedValue, OwnedValueType},
LimboError, OpenFlags, Result, Statement, StepResult, SymbolTable, IO,
};
use limbo_sqlite3_parser::ast::{self, CreateTableBody, Expr, FunctionTail, Literal};
use std::{rc::Rc, sync::Arc};
pub trait RoundToPrecision {
fn round_to_precision(self, precision: i32) -> f64;
@@ -882,25 +881,20 @@ pub fn checked_cast_text_to_numeric(text: &str) -> std::result::Result<OwnedValu
// '-100234-2344.23e14' evaluates to -100234 instead of -100234.0
let (kind, text) = parse_numeric_str(text)?;
match kind {
OwnedValueType::Integer => {
match text.parse::<i64>() {
Ok(i) => Ok(OwnedValue::Integer(i)),
Err(e) => {
if matches!(
e.kind(),
std::num::IntErrorKind::PosOverflow | std::num::IntErrorKind::NegOverflow
) {
// if overflow, we return the representation as a real:
// we have to match sqlite exactly here, so we match sqlite3AtoF
let value = text.parse::<f64>().unwrap_or_default();
let factor = 10f64.powi(15 - value.abs().log10().ceil() as i32);
Ok(OwnedValue::Float((value * factor).round() / factor))
} else {
Err(())
}
OwnedValueType::Integer => match text.parse::<i64>() {
Ok(i) => Ok(OwnedValue::Integer(i)),
Err(e) => {
if matches!(
e.kind(),
std::num::IntErrorKind::PosOverflow | std::num::IntErrorKind::NegOverflow
) {
let value = convert_overflow_i64_to_f64(text);
Ok(OwnedValue::Float(value))
} else {
Err(())
}
}
}
},
OwnedValueType::Float => Ok(text
.parse::<f64>()
.map_or(OwnedValue::Float(0.0), OwnedValue::Float)),
@@ -908,6 +902,14 @@ pub fn checked_cast_text_to_numeric(text: &str) -> std::result::Result<OwnedValu
}
}
// if value overflow, we return the representation as a real.
// we have to match sqlite exactly here, so we match sqlite3AtoF
fn convert_overflow_i64_to_f64(value: &str) -> f64 {
let value = value.parse::<f64>().unwrap_or_default();
let factor = 10f64.powi(15 - value.abs().log10().ceil() as i32);
(value * factor).round() / factor
}
fn parse_numeric_str(text: &str) -> Result<(OwnedValueType, &str), ()> {
let text = text.trim();
let bytes = text.as_bytes();
@@ -977,6 +979,39 @@ pub fn cast_real_to_integer(float: f64) -> std::result::Result<i64, ()> {
Err(())
}
// we don't need to verify the numeric literal here, as it is already verified by the parser
pub fn parse_numeric_literal(text: &str) -> Result<OwnedValue> {
// a single extra underscore ("_") character can exist between any two digits
let text = text.replace("_", "");
if text.starts_with("0x") || text.starts_with("0X") {
let value = u64::from_str_radix(&text[2..], 16)? as i64;
return Ok(OwnedValue::Integer(value));
} else if text.starts_with("-0x") || text.starts_with("-0X") {
let value = u64::from_str_radix(&text[3..], 16)? as i64;
if value == i64::MIN {
return Err(LimboError::IntegerOverflow);
}
return Ok(OwnedValue::Integer(-value));
}
match text.parse::<i64>() {
Ok(value) => return Ok(OwnedValue::Integer(value)),
Err(e)
if matches!(
e.kind(),
std::num::IntErrorKind::PosOverflow | std::num::IntErrorKind::NegOverflow
) =>
{
let value = convert_overflow_i64_to_f64(&text);
return Ok(OwnedValue::Float(value));
}
_ => {}
}
let float_value = text.parse::<f64>()?;
Ok(OwnedValue::Float(float_value))
}
// for TVF's we need these at planning time so we cannot emit translate_expr
pub fn vtable_args(args: &[ast::Expr]) -> Vec<limbo_ext::Value> {
let mut vtable_args = Vec::new();
@@ -1929,4 +1964,90 @@ pub mod tests {
unsafe { arg.__free_internal_type() }
}
}
#[test]
fn test_parse_numeric_literal_hex() {
assert_eq!(
parse_numeric_literal("0x1234").unwrap(),
OwnedValue::Integer(4660)
);
assert_eq!(
parse_numeric_literal("0xFFFFFFFF").unwrap(),
OwnedValue::Integer(4294967295)
);
assert_eq!(
parse_numeric_literal("0x7FFFFFFF").unwrap(),
OwnedValue::Integer(2147483647)
);
assert_eq!(
parse_numeric_literal("0x7FFFFFFFFFFFFFFF").unwrap(),
OwnedValue::Integer(9223372036854775807)
);
assert_eq!(
parse_numeric_literal("0xFFFFFFFFFFFFFFFF").unwrap(),
OwnedValue::Integer(-1)
);
assert_eq!(
parse_numeric_literal("0x8000000000000000").unwrap(),
OwnedValue::Integer(-9223372036854775808)
);
assert_eq!(
parse_numeric_literal("-0x1234").unwrap(),
OwnedValue::Integer(-4660)
);
// too big hex
assert!(parse_numeric_literal("-0x8000000000000000").is_err());
}
#[test]
fn test_parse_numeric_literal_integer() {
assert_eq!(
parse_numeric_literal("123").unwrap(),
OwnedValue::Integer(123)
);
assert_eq!(
parse_numeric_literal("9_223_372_036_854_775_807").unwrap(),
OwnedValue::Integer(9223372036854775807)
);
}
#[test]
fn test_parse_numeric_literal_float() {
assert_eq!(
parse_numeric_literal("123.456").unwrap(),
OwnedValue::Float(123.456)
);
assert_eq!(
parse_numeric_literal(".123").unwrap(),
OwnedValue::Float(0.123)
);
assert_eq!(
parse_numeric_literal("1.23e10").unwrap(),
OwnedValue::Float(1.23e10)
);
assert_eq!(
parse_numeric_literal("1e-10").unwrap(),
OwnedValue::Float(1e-10)
);
assert_eq!(
parse_numeric_literal("1.23E+10").unwrap(),
OwnedValue::Float(1.23e10)
);
assert_eq!(
parse_numeric_literal("1.1_1").unwrap(),
OwnedValue::Float(1.11)
);
// > i64::MAX, convert to float
assert_eq!(
parse_numeric_literal("9223372036854775808").unwrap(),
OwnedValue::Float(9.22337203685478e+18)
);
// < i64::MIN, convert to float
assert_eq!(
parse_numeric_literal("-9223372036854775809").unwrap(),
OwnedValue::Float(-9.22337203685478e+18)
);
}
}

View File

@@ -30,3 +30,4 @@ source $testdir/update.test
source $testdir/drop_table.test
source $testdir/default_value.test
source $testdir/boolean.test
source $testdir/literal.test

80
testing/literal.test Executable file
View File

@@ -0,0 +1,80 @@
#!/usr/bin/env tclsh
set testdir [file dirname $argv0]
source $testdir/tester.tcl
do_execsql_test numberic-literal-1 {
SELECT 45;
} {45}
do_execsql_test numberic-literal-2 {
SELECT 0x7FFFFFFFFFFFFFFF;
} {9223372036854775807}
do_execsql_test numberic-literal-3 {
SELECT -0x7FFFFFFFFFFFFFFF;
} {-9223372036854775807}
do_execsql_test numberic-literal-4 {
SELECT +0x7FFFFFFFFFFFFFFF;
} {9223372036854775807}
do_execsql_test numberic-literal-5 {
SELECT 9_223_372_036_854_775_807;
} {9223372036854775807}
do_execsql_test numberic-literal-6 {
SELECT -9_223_372_036_854_775_808;
} {-9223372036854775808}
do_execsql_test numberic-literal-7 {
SELECT 1_000;
} {1000}
do_execsql_test numberic-literal-8 {
SELECT 1.1_1;
} {1.11}
do_execsql_test numberic-literal-9 {
SELECT 1e12;
} {1000000000000.0}
do_execsql_test numberic-literal-10 {
SELECT 1.0;
} {1.0}
do_execsql_test numberic-literal-11 {
SELECT 1e1000;
} {Inf}
do_execsql_test numberic-literal-12 {
SELECT -1e1000;
} {-Inf}
do_execsql_test numberic-literal-13 {
SELECT 1e1_000;
} {Inf}
do_execsql_test numberic-literal-14 {
SELECT 12_3_456.7_8_9;
} {123456.789}
do_execsql_test numberic-literal-exceed-max-i64 {
SELECT 9_223_372_036_854_775_808;
} {9.22337203685478e+18}
do_execsql_test_any_error invalid-numberic-literal-1 {
SELECT 0xFF__FF;
}
do_execsql_test_any_error invalid-numberic-literal-2 {
SELECT 12a3;
}
do_execsql_test_any_error invalid-numberic-literal-3 {
SELECT e12;
}
do_execsql_test_any_error invalid-numberic-literal-4 {
SELECT 1e;
}