Merge 'Add affinity-based type coercion for seek and comparison operation' from Krishna Vishal

This PR implements SQLite-compatible affinity-based type coercion for
seek operations and comparison operators, bringing Limbo's behavior
closer to SQLite's type conversion rules.
### Key Changes
- Added affinity handling to all comparison operators (`Eq`, `Ne`, `Lt`,
`Le`, `Gt`, `Ge`).
- Added `get_expr_affinity()` to determine expression affinity based on
column types and casts.
- Added affinity functionality `CmpInsFlags` which carries the affinity
information from translation layer to execution layer.
- Added comprehensive `apply_numeric_affinity()` function that handles
many edge cases when converting to numerical quantities.
- Implemented `comparison_affinity()` that follows SQLite's affinity
precedence rules.
- Added fuzz tests for partial numeric strings, hex literals, and
various numeric formats.
### SQLite Compatibility Improvements
This implementation now correctly handles cases like:
- `SELECT * FROM t WHERE x < X'41'` (blob comparisons)
- `SELECT * FROM t WHERE x < '123abc'` (partial numeric strings)
- `SELECT * FROM t WHERE x < 4.9 ` (float precision in seeks)
- Mixed-type comparisons with proper affinity conversion
Thanks 🤝@PThorpe92 and @pedrocarlo for helping improve my planner
understanding.
Closes: https://github.com/tursodatabase/limbo/issues/1607

Closes #1680
This commit is contained in:
Jussi Saurio
2025-06-11 09:52:02 +03:00
7 changed files with 1099 additions and 392 deletions

View File

@@ -748,6 +748,22 @@ impl Affinity {
))),
}
}
pub fn to_char_code(&self) -> u8 {
self.aff_mask() as u8
}
pub fn from_char_code(code: u8) -> Result<Self, LimboError> {
Self::from_char(code as char)
}
pub fn is_numeric(&self) -> bool {
matches!(self, Affinity::Integer | Affinity::Real | Affinity::Numeric)
}
pub fn has_affinity(&self) -> bool {
!matches!(self, Affinity::Blob)
}
}
impl fmt::Display for Type {

View File

@@ -1,4 +1,4 @@
use limbo_sqlite3_parser::ast::{self, UnaryOperator};
use limbo_sqlite3_parser::ast::{self, Expr, UnaryOperator};
use tracing::{instrument, Level};
use super::emitter::Resolver;
@@ -8,7 +8,7 @@ use super::plan::TableReferences;
use crate::function::JsonFunc;
use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc};
use crate::functions::datetime;
use crate::schema::{Table, Type};
use crate::schema::{Affinity, Table, Type};
use crate::util::{exprs_are_equivalent, normalize_ident, parse_numeric_literal};
use crate::vdbe::builder::CursorKey;
use crate::vdbe::{
@@ -462,7 +462,16 @@ pub fn translate_expr(
let shared_reg = program.alloc_register();
translate_expr(program, referenced_tables, e1, shared_reg, resolver)?;
emit_binary_insn(program, op, shared_reg, shared_reg, target_register)?;
emit_binary_insn(
program,
op,
shared_reg,
shared_reg,
target_register,
e1,
e2,
referenced_tables,
)?;
program.reset_collation();
Ok(target_register)
} else {
@@ -509,7 +518,16 @@ pub fn translate_expr(
};
program.set_collation(collation_ctx);
emit_binary_insn(program, op, e1_reg, e2_reg, target_register)?;
emit_binary_insn(
program,
op,
e1_reg,
e2_reg,
target_register,
e1,
e2,
referenced_tables,
)?;
program.reset_collation();
Ok(target_register)
}
@@ -2201,7 +2219,15 @@ fn emit_binary_insn(
lhs: usize,
rhs: usize,
target_register: usize,
lhs_expr: &Expr,
rhs_expr: &Expr,
referenced_tables: Option<&TableReferences>,
) -> Result<()> {
let mut affinity = Affinity::Blob;
if op.is_comparison() {
affinity = comparison_affinity(lhs_expr, rhs_expr, referenced_tables);
}
match op {
ast::Operator::NotEquals => {
let if_true_label = program.allocate_label();
@@ -2211,7 +2237,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2228,7 +2254,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2245,7 +2271,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2262,7 +2288,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2279,7 +2305,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2296,7 +2322,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2390,7 +2416,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default().null_eq(),
flags: CmpInsFlags::default().null_eq().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -2405,7 +2431,7 @@ fn emit_binary_insn(
lhs,
rhs,
target_pc: if_true_label,
flags: CmpInsFlags::default().null_eq(),
flags: CmpInsFlags::default().null_eq().with_affinity(affinity),
collation: program.curr_collation(),
},
target_register,
@@ -3023,3 +3049,75 @@ where
Ok(())
}
pub fn get_expr_affinity(
expr: &ast::Expr,
referenced_tables: Option<&TableReferences>,
) -> Affinity {
match expr {
ast::Expr::Column { table, column, .. } => {
if let Some(tables) = referenced_tables {
if let Some(table_ref) = tables.find_table_by_internal_id(*table) {
if let Some(col) = table_ref.get_column_at(*column) {
return col.affinity();
}
}
}
Affinity::Blob
}
ast::Expr::Cast { type_name, .. } => {
if let Some(type_name) = type_name {
crate::schema::affinity(&type_name.name)
} else {
Affinity::Blob
}
}
ast::Expr::Collate(expr, _) => get_expr_affinity(expr, referenced_tables),
// Literals have NO affinity in SQLite!
ast::Expr::Literal(_) => Affinity::Blob, // No affinity!
_ => Affinity::Blob, // This may need to change. For now this works.
}
}
pub fn comparison_affinity(
lhs_expr: &ast::Expr,
rhs_expr: &ast::Expr,
referenced_tables: Option<&TableReferences>,
) -> Affinity {
let mut aff = get_expr_affinity(lhs_expr, referenced_tables);
aff = compare_affinity(rhs_expr, aff, referenced_tables);
// If no affinity determined (both operands are literals), default to BLOB
if !aff.has_affinity() {
Affinity::Blob
} else {
aff
}
}
pub fn compare_affinity(
expr: &ast::Expr,
other_affinity: Affinity,
referenced_tables: Option<&TableReferences>,
) -> Affinity {
let expr_affinity = get_expr_affinity(expr, referenced_tables);
if expr_affinity.has_affinity() && other_affinity.has_affinity() {
// Both sides have affinity - use numeric if either is numeric
if expr_affinity.is_numeric() || other_affinity.is_numeric() {
Affinity::Numeric
} else {
Affinity::Blob
}
} else {
// One or both sides have no affinity - use the one that does, or Blob if neither
if expr_affinity.has_affinity() {
expr_affinity
} else if other_affinity.has_affinity() {
other_affinity
} else {
Affinity::Blob
}
}
}

View File

@@ -4,7 +4,7 @@ use limbo_sqlite3_parser::ast::{self, SortOrder};
use std::sync::Arc;
use crate::{
schema::{Index, IndexColumn, Table},
schema::{Affinity, Index, IndexColumn, Table},
translate::{
plan::{DistinctCtx, Distinctness},
result_row::emit_select_result,
@@ -1285,14 +1285,28 @@ fn emit_seek_termination(
}
program.preassign_label_to_next_insn(loop_start);
let mut rowid_reg = None;
let mut affinity = None;
if !is_index {
rowid_reg = Some(program.alloc_register());
program.emit_insn(Insn::RowId {
cursor_id: seek_cursor_id,
dest: rowid_reg.unwrap(),
});
}
affinity = if let Some(table_ref) = tables
.joined_tables()
.iter()
.find(|t| t.columns().iter().any(|c| c.is_rowid_alias))
{
if let Some(rowid_col_idx) = table_ref.columns().iter().position(|c| c.is_rowid_alias) {
Some(table_ref.columns()[rowid_col_idx].affinity())
} else {
Some(Affinity::Numeric)
}
} else {
Some(Affinity::Numeric)
};
}
match (is_index, termination.op) {
(true, SeekOp::GE { .. }) => program.emit_insn(Insn::IdxGE {
cursor_id: seek_cursor_id,
@@ -1322,28 +1336,36 @@ fn emit_seek_termination(
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default()
.jump_if_null()
.with_affinity(affinity.unwrap()),
collation: program.curr_collation(),
}),
(false, SeekOp::GT) => program.emit_insn(Insn::Gt {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default()
.jump_if_null()
.with_affinity(affinity.unwrap()),
collation: program.curr_collation(),
}),
(false, SeekOp::LE { .. }) => program.emit_insn(Insn::Le {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default()
.jump_if_null()
.with_affinity(affinity.unwrap()),
collation: program.curr_collation(),
}),
(false, SeekOp::LT) => program.emit_insn(Insn::Lt {
lhs: rowid_reg.unwrap(),
rhs: start_reg,
target_pc: loop_end,
flags: CmpInsFlags::default(),
flags: CmpInsFlags::default()
.jump_if_null()
.with_affinity(affinity.unwrap()),
collation: program.curr_collation(),
}),
};

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ use std::{
use super::{execute, AggFunc, BranchOffset, CursorID, FuncCtx, InsnFunction, PageIdx};
use crate::{
schema::{BTreeTable, Index},
schema::{Affinity, BTreeTable, Index},
storage::{pager::CreateBTreeFlags, wal::CheckpointMode},
translate::collate::CollationSeq,
};
@@ -20,6 +20,7 @@ pub struct CmpInsFlags(usize);
impl CmpInsFlags {
const NULL_EQ: usize = 0x80;
const JUMP_IF_NULL: usize = 0x10;
const AFFINITY_MASK: usize = 0x47;
fn has(&self, flag: usize) -> bool {
(self.0 & flag) != 0
@@ -42,6 +43,17 @@ impl CmpInsFlags {
pub fn has_nulleq(&self) -> bool {
self.has(CmpInsFlags::NULL_EQ)
}
pub fn with_affinity(mut self, affinity: Affinity) -> Self {
let aff_code = affinity.to_char_code() as usize;
self.0 = (self.0 & !Self::AFFINITY_MASK) | aff_code;
self
}
pub fn get_affinity(&self) -> Affinity {
let aff_code = (self.0 & Self::AFFINITY_MASK) as u8;
Affinity::from_char_code(aff_code).unwrap_or(Affinity::Blob)
}
}
#[derive(Clone, Copy, Debug, Default)]
@@ -939,12 +951,12 @@ impl Insn {
Insn::Move { .. } => execute::op_move,
Insn::IfPos { .. } => execute::op_if_pos,
Insn::NotNull { .. } => execute::op_not_null,
Insn::Eq { .. } => execute::op_eq,
Insn::Ne { .. } => execute::op_ne,
Insn::Lt { .. } => execute::op_lt,
Insn::Le { .. } => execute::op_le,
Insn::Gt { .. } => execute::op_gt,
Insn::Ge { .. } => execute::op_ge,
Insn::Eq { .. }
| Insn::Ne { .. }
| Insn::Lt { .. }
| Insn::Le { .. }
| Insn::Gt { .. }
| Insn::Ge { .. } => execute::op_comparison,
Insn::If { .. } => execute::op_if,
Insn::IfNot { .. } => execute::op_if_not,
Insn::OpenRead { .. } => execute::op_open_read,
@@ -980,10 +992,10 @@ impl Insn {
Insn::IdxRowId { .. } => execute::op_idx_row_id,
Insn::SeekRowid { .. } => execute::op_seek_rowid,
Insn::DeferredSeek { .. } => execute::op_deferred_seek,
Insn::SeekGE { .. } => execute::op_seek,
Insn::SeekGT { .. } => execute::op_seek,
Insn::SeekLE { .. } => execute::op_seek,
Insn::SeekLT { .. } => execute::op_seek,
Insn::SeekGE { .. }
| Insn::SeekGT { .. }
| Insn::SeekLE { .. }
| Insn::SeekLT { .. } => execute::op_seek,
Insn::SeekEnd { .. } => execute::op_seek_end,
Insn::IdxGE { .. } => execute::op_idx_ge,
Insn::IdxGT { .. } => execute::op_idx_gt,

View File

@@ -51,7 +51,7 @@ mod tests {
let insert = format!(
"INSERT INTO t VALUES {}",
(1..2000)
(1..100)
.map(|x| format!("({})", x))
.collect::<Vec<_>>()
.join(", ")
@@ -69,28 +69,101 @@ mod tests {
Some("ORDER BY x ASC"),
];
for comp in COMPARISONS.iter() {
for order_by in ORDER_BY.iter() {
for max in 0..=2000 {
let query = format!(
"SELECT * FROM t WHERE x {} {} {}",
comp,
max,
order_by.unwrap_or("")
);
log::trace!("query: {}", query);
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
assert_eq!(
limbo, sqlite,
"query: {}, limbo: {:?}, sqlite: {:?}",
query, limbo, sqlite
);
let (mut rng, seed) = rng_from_time_or_env();
tracing::info!("rowid_seek_fuzz seed: {}", seed);
for iteration in 0..2 {
tracing::trace!("rowid_seek_fuzz iteration: {}", iteration);
for comp in COMPARISONS.iter() {
for order_by in ORDER_BY.iter() {
let test_values = generate_random_comparison_values(&mut rng);
for test_value in test_values.iter() {
let query = format!(
"SELECT * FROM t WHERE x {} {} {}",
comp,
test_value,
order_by.unwrap_or("")
);
log::trace!("query: {}", query);
let limbo_result = limbo_exec_rows(&db, &limbo_conn, &query);
let sqlite_result = sqlite_exec_rows(&sqlite_conn, &query);
assert_eq!(
limbo_result, sqlite_result,
"query: {}, limbo: {:?}, sqlite: {:?}, seed: {}",
query, limbo_result, sqlite_result, seed
);
}
}
}
}
}
fn generate_random_comparison_values(rng: &mut ChaCha8Rng) -> Vec<String> {
let mut values = Vec::new();
for _ in 0..1000 {
let val = rng.random_range(-10000..10000);
values.push(val.to_string());
}
values.push(i64::MAX.to_string());
values.push(i64::MIN.to_string());
values.push("0".to_string());
for _ in 0..5 {
let val: f64 = rng.random_range(-10000.0..10000.0);
values.push(val.to_string());
}
values.push("NULL".to_string()); // Man's greatest mistake
values.push("'NULL'".to_string()); // SQLite dared to one up on that mistake
values.push("0.0".to_string());
values.push("-0.0".to_string());
values.push("1.5".to_string());
values.push("-1.5".to_string());
values.push("999.999".to_string());
values.push("'text'".to_string());
values.push("'123'".to_string());
values.push("''".to_string());
values.push("'0'".to_string());
values.push("'hello'".to_string());
values.push("'0x10'".to_string());
values.push("'+123'".to_string());
values.push("' 123 '".to_string());
values.push("'1.5e2'".to_string());
values.push("'inf'".to_string());
values.push("'-inf'".to_string());
values.push("'nan'".to_string());
values.push("X'41'".to_string());
values.push("X''".to_string());
values.push("(1 + 1)".to_string());
// values.push("(SELECT 1)".to_string()); subqueries ain't implemented yet homes.
values
}
fn rng_from_time_or_env() -> (ChaCha8Rng, u64) {
let seed = std::env::var("SEED").map_or(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
|v| {
v.parse()
.expect("Failed to parse SEED environment variable as u64")
},
);
let rng = ChaCha8Rng::seed_from_u64(seed);
(rng, seed)
}
#[test]
pub fn index_scan_fuzz() {
let db = TempDatabase::new_with_rusqlite("CREATE TABLE t(x PRIMARY KEY)");

View File

@@ -731,6 +731,21 @@ impl Operator {
| Operator::NotEquals
)
}
/// Returns true if this operator is a comparison operator that may need affinity conversion
pub fn is_comparison(&self) -> bool {
matches!(
self,
Self::Equals
| Self::NotEquals
| Self::Less
| Self::LessEquals
| Self::Greater
| Self::GreaterEquals
| Self::Is
| Self::IsNot
)
}
}
/// Unary operators