From f88f39082ad5007715aa4cbb1f9504808a301707 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 8 Sep 2025 15:06:52 +0300 Subject: [PATCH 1/4] core/vdbe: Fix MakeRecord affinity handling The MakeRecord instruction now accepts an optional affinity_str parameter that applies column-specific type conversions before creating records. When provided, the affinity string is applied character-by-character to each register using the existing apply_affinity_char() function, matching SQLite's behavior. Fixes #2040 Fixes #2041 --- core/translate/aggregation.rs | 1 + core/translate/alter.rs | 9 +++++++++ core/translate/analyze.rs | 1 + core/translate/compound_select.rs | 1 + core/translate/emitter.rs | 24 +++++++++++++++++++++++ core/translate/index.rs | 1 + core/translate/insert.rs | 32 +++++++++++++++++++++++++++++++ core/translate/main_loop.rs | 1 + core/translate/order_by.rs | 1 + core/translate/plan.rs | 1 + core/translate/result_row.rs | 2 ++ core/translate/schema.rs | 3 +++ core/translate/upsert.rs | 9 +++++++++ core/translate/values.rs | 1 + core/vdbe/execute.rs | 17 ++++++++++++++++ core/vdbe/explain.rs | 1 + core/vdbe/insn.rs | 1 + 17 files changed, 106 insertions(+) diff --git a/core/translate/aggregation.rs b/core/translate/aggregation.rs index 7a1de9776..75a89c63d 100644 --- a/core/translate/aggregation.rs +++ b/core/translate/aggregation.rs @@ -115,6 +115,7 @@ pub fn handle_distinct(program: &mut ProgramBuilder, agg: &Aggregate, agg_arg_re count: num_regs, dest_reg: record_reg, index_name: Some(distinct_ctx.ephemeral_index_name.to_string()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: distinct_ctx.cursor_id, diff --git a/core/translate/alter.rs b/core/translate/alter.rs index a22800c32..9808d375f 100644 --- a/core/translate/alter.rs +++ b/core/translate/alter.rs @@ -141,11 +141,18 @@ pub fn translate_alter_table( let record = program.alloc_register(); + let affinity_str = btree + .columns + .iter() + .map(|col| col.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: first_column, count: column_count, dest_reg: record, index_name: None, + affinity_str: Some(affinity_str), }); program.emit_insn(Insn::Insert { @@ -301,6 +308,7 @@ pub fn translate_alter_table( count: sqlite_schema_column_len, dest_reg: record, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::Insert { @@ -442,6 +450,7 @@ pub fn translate_alter_table( count: sqlite_schema_column_len, dest_reg: record, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::Insert { diff --git a/core/translate/analyze.rs b/core/translate/analyze.rs index 0d8f8de4e..d7e68fc04 100644 --- a/core/translate/analyze.rs +++ b/core/translate/analyze.rs @@ -212,6 +212,7 @@ pub fn translate_analyze( count: 3, dest_reg: record_reg, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::NewRowid { cursor: stat_cursor, diff --git a/core/translate/compound_select.rs b/core/translate/compound_select.rs index 8f816cd1f..f4d9dd0bf 100644 --- a/core/translate/compound_select.rs +++ b/core/translate/compound_select.rs @@ -524,6 +524,7 @@ fn read_intersect_rows( count: column_count, dest_reg: row_content_reg, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: target_cursor_id, diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 02d613beb..9e233a2b5 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -1038,6 +1038,7 @@ fn emit_update_insns( count: num_cols + 1, dest_reg: *record_reg, index_name: Some(index.name.clone()), + affinity_str: None, }); if !index.unique { @@ -1138,11 +1139,19 @@ fn emit_update_insns( } let record_reg = program.alloc_register(); + + let affinity_str = table_ref + .columns() + .iter() + .map(|col| col.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: start, count: table_ref.columns().len(), dest_reg: record_reg, index_name: None, + affinity_str: Some(affinity_str), }); if has_user_provided_rowid { @@ -1282,6 +1291,7 @@ fn emit_update_insns( count: 2 * table_ref.columns().len(), dest_reg: record_reg, index_name: None, + affinity_str: None, }); Some(record_reg) } else { @@ -1398,11 +1408,18 @@ pub fn emit_cdc_patch_record( dst_reg: columns_reg + rowid_alias_position, extra_amount: 0, }); + let affinity_str = table + .columns() + .iter() + .map(|col| col.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: columns_reg, count: table.columns().len(), dest_reg: record_reg, index_name: None, + affinity_str: Some(affinity_str), }); record_reg } else { @@ -1428,11 +1445,17 @@ pub fn emit_cdc_full_record( program.emit_column_or_rowid(table_cursor_id, i, columns_reg + 1 + i); } } + let affinity_str = columns + .iter() + .map(|col| col.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: columns_reg + 1, count: columns.len(), dest_reg: columns_reg, index_name: None, + affinity_str: Some(affinity_str), }); columns_reg } @@ -1535,6 +1558,7 @@ pub fn emit_cdc_insns( count: 8, dest_reg: record_reg, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::Insert { diff --git a/core/translate/index.rs b/core/translate/index.rs index 7d332f0d2..8d1ea14a2 100644 --- a/core/translate/index.rs +++ b/core/translate/index.rs @@ -174,6 +174,7 @@ pub fn translate_create_index( count: columns.len() + 1, dest_reg: record_reg, index_name: Some(idx_name.clone()), + affinity_str: None, }); program.emit_insn(Insn::SorterInsert { cursor_id: sorter_cursor_id, diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 04ce1ef4f..06f3e2bc1 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -253,11 +253,35 @@ pub fn translate_insert( end_offset: yield_label, }); let record_reg = program.alloc_register(); + + let affinity_str = if columns.is_empty() { + btree_table + .columns + .iter() + .filter(|col| !col.hidden) + .map(|col| col.affinity().aff_mask()) + .collect::() + } else { + columns + .iter() + .map(|col_name| { + let column_name = normalize_ident(col_name.as_str()); + table + .get_column_by_name(&column_name) + .unwrap() + .1 + .affinity() + .aff_mask() + }) + .collect::() + }; + program.emit_insn(Insn::MakeRecord { start_reg: yield_reg + 1, count: result.num_result_cols, dest_reg: record_reg, index_name: None, + affinity_str: Some(affinity_str), }); let rowid_reg = program.alloc_register(); @@ -507,6 +531,7 @@ pub fn translate_insert( count: num_cols + 1, dest_reg: record_reg, index_name: Some(index.name.clone()), + affinity_str: None, }); if index.unique { @@ -627,11 +652,18 @@ pub fn translate_insert( }); } // Create and insert the record + let affinity_str = insertion + .col_mappings + .iter() + .map(|col_mapping| col_mapping.column.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: insertion.first_col_register(), count: insertion.col_mappings.len(), dest_reg: insertion.record_register(), index_name: None, + affinity_str: Some(affinity_str), }); program.emit_insn(Insn::Insert { cursor: cursor_id, diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index 3e44ff0bd..e4a5334ee 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -1429,6 +1429,7 @@ fn emit_autoindex( count: num_regs_to_reserve, dest_reg: record_reg, index_name: Some(index.name.clone()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: index_cursor_id, diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index da980bd9a..a4dc56691 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -307,6 +307,7 @@ pub fn sorter_insert( count: column_count, dest_reg: record_reg, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::SorterInsert { cursor_id, diff --git a/core/translate/plan.rs b/core/translate/plan.rs index e96dc4a1a..70844c0e3 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -264,6 +264,7 @@ impl DistinctCtx { count: num_regs, dest_reg: record_reg, index_name: Some(self.ephemeral_index_name.to_string()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: self.cursor_id, diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index 2e636caca..04b7454f5 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -103,6 +103,7 @@ pub fn emit_result_row_and_limit( count: plan.result_columns.len(), dest_reg: record_reg, index_name: Some(dedupe_index.name.clone()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: *index_cursor_id, @@ -124,6 +125,7 @@ pub fn emit_result_row_and_limit( count: plan.result_columns.len() - 1, dest_reg: record_reg, index_name: Some(table.name.clone()), + affinity_str: None, }); } program.emit_insn(Insn::Insert { diff --git a/core/translate/schema.rs b/core/translate/schema.rs index 5356e5a8a..c2297ba39 100644 --- a/core/translate/schema.rs +++ b/core/translate/schema.rs @@ -257,6 +257,7 @@ pub fn emit_schema_entry( count: 5, dest_reg: record_reg, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::Insert { @@ -614,6 +615,7 @@ pub fn translate_create_virtual_table( count: args_vec.len(), dest_reg: args_record_reg, index_name: None, + affinity_str: None, }); Some(args_record_reg) } else { @@ -998,6 +1000,7 @@ pub fn translate_drop_table( count: 5, dest_reg: new_record_register, index_name: None, + affinity_str: None, }); program.emit_insn(Insn::Delete { cursor_id: sqlite_schema_cursor_id_1, diff --git a/core/translate/upsert.rs b/core/translate/upsert.rs index 7e0141ed2..f5fa8ab95 100644 --- a/core/translate/upsert.rs +++ b/core/translate/upsert.rs @@ -379,6 +379,7 @@ pub fn emit_upsert( count: k + 1, dest_reg: rec, index_name: Some((*idx_name).clone()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: *idx_cid, @@ -392,11 +393,19 @@ pub fn emit_upsert( // Write table row (same rowid, new payload) let rec = program.alloc_register(); + + let affinity_str = table + .columns() + .iter() + .map(|col| col.affinity().aff_mask()) + .collect::(); + program.emit_insn(Insn::MakeRecord { start_reg: new_start, count: num_cols, dest_reg: rec, index_name: None, + affinity_str: Some(affinity_str), }); program.emit_insn(Insn::Insert { cursor: tbl_cursor_id, diff --git a/core/translate/values.rs b/core/translate/values.rs index 63f90055e..869a63f31 100644 --- a/core/translate/values.rs +++ b/core/translate/values.rs @@ -199,6 +199,7 @@ fn emit_values_to_index( count: row_len, dest_reg: record_reg, index_name: Some(index.name.clone()), + affinity_str: None, }); program.emit_insn(Insn::IdxInsert { cursor_id: *cursor_id, diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 0b2347aa2..25edbfb7b 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1864,10 +1864,27 @@ pub fn op_make_record( start_reg, count, dest_reg, + affinity_str, .. }, insn ); + + if let Some(affinity_str) = affinity_str { + if affinity_str.len() != *count { + return Err(LimboError::InternalError(format!( + "MakeRecord: the length of affinity string ({}) does not match the count ({})", + affinity_str.len(), + *count + ))); + } + for (i, affinity_ch) in affinity_str.chars().enumerate().take(*count) { + let reg_index = *start_reg + i; + let affinity = Affinity::from_char(affinity_ch); + apply_affinity_char(&mut state.registers[reg_index], affinity); + } + } + let record = make_record(&state.registers, start_reg, count); state.registers[*dest_reg] = Register::Record(record); state.pc += 1; diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 5f160c235..6c850aadd 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -584,6 +584,7 @@ pub fn insn_to_str( count, dest_reg, index_name, + affinity_str: _, } => { let for_index = index_name.as_ref().map(|name| format!("; for {name}")); ( diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 0d01c8a63..e2d4a07c6 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -430,6 +430,7 @@ pub enum Insn { count: usize, // P2 dest_reg: usize, // P3 index_name: Option, + affinity_str: Option, }, /// Emit a row of results. From 7f002548fdc7f3803ffb2f9689612c383dfb0790 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 8 Sep 2025 15:25:46 +0300 Subject: [PATCH 2/4] tests/integration: Enable min_max_agg_fuzz() test case --- tests/integration/fuzz/mod.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index b49bfa962..4fbe92dbe 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1381,9 +1381,6 @@ mod tests { } #[test] - #[ignore] - /// Ignored because of https://github.com/tursodatabase/turso/issues/2040, https://github.com/tursodatabase/turso/issues/2041 - /// TODO: add fuzzing for other aggregate functions pub fn min_max_agg_fuzz() { let _ = env_logger::try_init(); From 0c6398c935634dcbd7a1c901f2157dc8b39c6a34 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 8 Sep 2025 15:50:09 +0300 Subject: [PATCH 3/4] core/vdbe: Fix apply_affinity_char() text parsing We need strict parsing in apply_affinity_char() to avoid transforming non-numeric values (for example, "1a") into numeric values. --- core/vdbe/execute.rs | 10 +++++++--- testing/affinity.test | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) create mode 100755 testing/affinity.test diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 25edbfb7b..0c6db224d 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -8519,15 +8519,19 @@ fn apply_affinity_char(target: &mut Register, affinity: Affinity) -> bool { } if let Value::Text(t) = value { - let text = t.as_str(); + let text = t.as_str().trim(); // Handle hex numbers - they shouldn't be converted if text.starts_with("0x") { return false; } - // Try to parse as number (similar to applyNumericAffinity) - let Ok(num) = checked_cast_text_to_numeric(text) else { + // For affinity conversion, only convert strings that are entirely numeric + let num = if let Ok(i) = text.parse::() { + Value::Integer(i) + } else if let Ok(f) = text.parse::() { + Value::Float(f) + } else { return false; }; diff --git a/testing/affinity.test b/testing/affinity.test new file mode 100755 index 000000000..9a08003cd --- /dev/null +++ b/testing/affinity.test @@ -0,0 +1,14 @@ +#!/usr/bin/env tclsh + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +do_execsql_test_on_specific_db {:memory:} affinity { + CREATE TABLE t1 (c INTEGER); + INSERT INTO t1 VALUES ('1'); + INSERT INTO t1 VALUES ('1a'); + SELECT c, typeof(c) FROM t1; +} { + {1|integer} + {1a|text} +} From 86baa06600737009ecfc711cca8dc5904b07d16c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 8 Sep 2025 18:31:35 +0300 Subject: [PATCH 4/4] tests/integration: Add affinity differential fuzz test --- tests/integration/fuzz/mod.rs | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 4fbe92dbe..be75377de 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1435,6 +1435,107 @@ mod tests { } } } + + #[test] + pub fn affinity_fuzz() { + let _ = env_logger::try_init(); + + let (mut rng, seed) = rng_from_time(); + log::info!("affinity_fuzz seed: {seed}"); + + for iteration in 0..500 { + let db = TempDatabase::new_empty(false); + let limbo_conn = db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + // Test different column affinities - cover all SQLite affinity types + let affinities = [ + "INTEGER", + "TEXT", + "REAL", + "NUMERIC", + "BLOB", + "INT", + "TINYINT", + "SMALLINT", + "MEDIUMINT", + "BIGINT", + "UNSIGNED BIG INT", + "INT2", + "INT8", + "CHARACTER(20)", + "VARCHAR(255)", + "VARYING CHARACTER(255)", + "NCHAR(55)", + "NATIVE CHARACTER(70)", + "NVARCHAR(100)", + "CLOB", + "DOUBLE", + "DOUBLE PRECISION", + "FLOAT", + "DECIMAL(10,5)", + "BOOLEAN", + "DATE", + "DATETIME", + ]; + let affinity = affinities[rng.random_range(0..affinities.len())]; + + let create_table = format!("CREATE TABLE t (x {affinity})"); + limbo_exec_rows(&db, &limbo_conn, &create_table); + sqlite_exec_rows(&sqlite_conn, &create_table); + + // Insert various values that test affinity conversion rules + let mut values = Vec::new(); + for _ in 0..20 { + let value = match rng.random_range(0..9) { + 0 => format!("'{}'", rng.random_range(-10000..10000)), // Pure integer as text + 1 => format!( + "'{}.{}'", + rng.random_range(-1000..1000), + rng.random_range(1..999) // Ensure non-zero decimal part + ), // Float as text with decimal + 2 => format!("'a{}'", rng.random_range(0..1000)), // Text with integer suffix + 3 => format!("' {} '", rng.random_range(-100..100)), // Integer with whitespace + 4 => format!("'-{}'", rng.random_range(1..1000)), // Negative integer as text + 5 => format!("{}", rng.random_range(-10000..10000)), // Direct integer + 6 => format!( + "{}.{}", + rng.random_range(-100..100), + rng.random_range(1..999) // Ensure non-zero decimal part + ), // Direct float + 7 => "'text_value'".to_string(), // Pure text that won't convert + 8 => "NULL".to_string(), // NULL value + _ => unreachable!(), + }; + values.push(format!("({value})")); + } + + let insert = format!("INSERT INTO t VALUES {}", values.join(",")); + limbo_exec_rows(&db, &limbo_conn, &insert); + sqlite_exec_rows(&sqlite_conn, &insert); + + // Query values and their types to verify affinity rules are applied correctly + let query = "SELECT x, typeof(x) FROM t"; + let limbo_result = limbo_exec_rows(&db, &limbo_conn, query); + let sqlite_result = sqlite_exec_rows(&sqlite_conn, query); + + assert_eq!( + limbo_result, sqlite_result, + "iteration: {iteration}, seed: {seed}, affinity: {affinity}, values: {values:?}" + ); + + // Also test with ORDER BY to ensure affinity affects sorting + let query_ordered = "SELECT x FROM t ORDER BY x"; + let limbo_ordered = limbo_exec_rows(&db, &limbo_conn, query_ordered); + let sqlite_ordered = sqlite_exec_rows(&sqlite_conn, query_ordered); + + assert_eq!( + limbo_ordered, sqlite_ordered, + "ORDER BY failed - iteration: {iteration}, seed: {seed}, affinity: {affinity}" + ); + } + } + #[test] // Simple fuzz test for SUM with floats pub fn sum_agg_fuzz_floats() {