diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index 1cbe26908..f365e5f73 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -185,7 +185,6 @@ pub fn order_by_sorter_insert( )?; } let mut cur_reg = start_reg + order_by_len; - let mut translated_result_col_count = 0; for (i, rc) in result_columns.iter().enumerate() { // If the result column is an exact duplicate of a sort key, we skip it. if sort_metadata @@ -203,15 +202,71 @@ pub fn order_by_sorter_insert( cur_reg, resolver, )?; - translated_result_col_count += 1; cur_reg += 1; } // Handle SELECT DISTINCT deduplication if let Distinctness::Distinct { ctx } = &plan.distinctness { let distinct_ctx = ctx.as_ref().expect("distinct context must exist"); - let num_regs = order_by_len + translated_result_col_count; - distinct_ctx.emit_deduplication_insns(program, num_regs, start_reg); + + // For distinctness checking with Insn::Found, we need a contiguous run of registers containing all the result columns. + // The emitted columns are in the ORDER BY sorter order, which may be different from the SELECT order, and obviously the + // ORDER BY clause may not have all the result columns. + // Hence, we need to allocate new registers and Copy from the existing ones to make a contiguous run of registers. + let mut needs_reordering = false; + + // Check if result columns in sorter are in SELECT order + let mut prev = None; + for (select_idx, _rc) in result_columns.iter().enumerate() { + let sorter_idx = sort_metadata + .remappings + .get(select_idx) + .expect("remapping must exist for all result columns") + .orderby_sorter_idx; + + if prev.is_some_and(|p| sorter_idx != p + 1) { + needs_reordering = true; + break; + } + prev = Some(sorter_idx); + } + + if needs_reordering { + // Allocate registers for reordered result columns. + // TODO: it may be possible to optimize this to minimize the number of Insn::Copy we do, but for now + // we will just allocate a new reg for every result column. + let reordered_start_reg = program.alloc_registers(result_columns.len()); + + for (select_idx, _rc) in result_columns.iter().enumerate() { + let src_reg = sort_metadata + .remappings + .get(select_idx) + .map(|r| start_reg + r.orderby_sorter_idx) + .expect("remapping must exist for all result columns"); + + let dst_reg = reordered_start_reg + select_idx; + + program.emit_insn(Insn::Copy { + src_reg, + dst_reg, + extra_amount: 0, + }); + } + + distinct_ctx.emit_deduplication_insns( + program, + result_columns.len(), + reordered_start_reg, + ); + } else { + // Result columns are already in SELECT order, use them directly + let start_reg = sort_metadata + .remappings + .first() + .map(|r| start_reg + r.orderby_sorter_idx) + .expect("remapping must exist for all result columns"); + distinct_ctx.emit_deduplication_insns(program, result_columns.len(), start_reg); + } } let SortMetadata { diff --git a/testing/orderby.test b/testing/orderby.test index 277ac9652..4f58f3f9d 100755 --- a/testing/orderby.test +++ b/testing/orderby.test @@ -211,3 +211,10 @@ do_execsql_test orderby_desc_regression_verify_order { } {99 98 97} + +do_execsql_test_on_specific_db {:memory:} distinct_orderby_regression { + CREATE TABLE t (a,b,c,d); + INSERT INTO t VALUES (1,2,3,4),(2,3,4,5); + SELECT DISTINCT c,b FROM t ORDER BY d,b; +} {3|2 +4|3} diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 7032f4bcb..b49bfa962 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -1720,6 +1720,72 @@ mod tests { } } + #[test] + pub fn fuzz_distinct() { + let db = TempDatabase::new_empty(true); + let limbo_conn = db.connect_limbo(); + let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap(); + + let (mut rng, seed) = rng_from_time_or_env(); + tracing::info!("fuzz_distinct seed: {}", seed); + + let columns = ["a", "b", "c", "d", "e"]; + + // Create table with 3 integer columns + let create_table = format!("CREATE TABLE t ({})", columns.join(", ")); + limbo_exec_rows(&db, &limbo_conn, &create_table); + sqlite_exec_rows(&sqlite_conn, &create_table); + + // Insert some random data + for _ in 0..1000 { + let values = (0..columns.len()) + .map(|_| rng.random_range(1..3)) // intentionally narrow range + .collect::>(); + let query = format!( + "INSERT INTO t VALUES ({})", + values + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(",") + ); + limbo_exec_rows(&db, &limbo_conn, &query); + sqlite_exec_rows(&sqlite_conn, &query); + } + + // Test different DISTINCT + ORDER BY combinations + for _ in 0..300 { + // Randomly select columns for DISTINCT + let num_distinct_cols = rng.random_range(1..=columns.len()); + let mut available_cols = columns.to_vec(); + let mut distinct_cols = Vec::with_capacity(num_distinct_cols); + + for _ in 0..num_distinct_cols { + let idx = rng.random_range(0..available_cols.len()); + distinct_cols.push(available_cols.remove(idx)); + } + let distinct_cols = distinct_cols.join(", "); + + // Randomly select columns for ORDER BY + let num_order_cols = rng.random_range(1..=columns.len()); + let mut available_cols = columns.to_vec(); + let mut order_cols = Vec::with_capacity(num_order_cols); + + for _ in 0..num_order_cols { + let idx = rng.random_range(0..available_cols.len()); + order_cols.push(available_cols.remove(idx)); + } + let order_cols = order_cols.join(", "); + + let query = format!("SELECT DISTINCT {distinct_cols} FROM t ORDER BY {order_cols}"); + + let limbo = limbo_exec_rows(&db, &limbo_conn, &query); + let sqlite = sqlite_exec_rows(&sqlite_conn, &query); + + assert_eq!(limbo, sqlite, "seed: {seed}, query: {query}"); + } + } + #[test] #[ignore] pub fn fuzz_long_create_table_drop_table_alter_table() {