mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-06 16:54:23 +01:00
Fix DISTINCT with ORDER BY
We had a bug where we were checking for duplicates in the DISTINCT index based on both the result column count plus any ORDER BY columns not present in the DISTINCT clause. This is wrong, so fix it by only using the result columns for the dedupe check.
This commit is contained in:
@@ -185,7 +185,6 @@ pub fn order_by_sorter_insert(
|
||||
)?;
|
||||
}
|
||||
let mut cur_reg = start_reg + order_by_len;
|
||||
let mut translated_result_col_count = 0;
|
||||
for (i, rc) in result_columns.iter().enumerate() {
|
||||
// If the result column is an exact duplicate of a sort key, we skip it.
|
||||
if sort_metadata
|
||||
@@ -203,15 +202,71 @@ pub fn order_by_sorter_insert(
|
||||
cur_reg,
|
||||
resolver,
|
||||
)?;
|
||||
translated_result_col_count += 1;
|
||||
cur_reg += 1;
|
||||
}
|
||||
|
||||
// Handle SELECT DISTINCT deduplication
|
||||
if let Distinctness::Distinct { ctx } = &plan.distinctness {
|
||||
let distinct_ctx = ctx.as_ref().expect("distinct context must exist");
|
||||
let num_regs = order_by_len + translated_result_col_count;
|
||||
distinct_ctx.emit_deduplication_insns(program, num_regs, start_reg);
|
||||
|
||||
// For distinctness checking with Insn::Found, we need a contiguous run of registers containing all the result columns.
|
||||
// The emitted columns are in the ORDER BY sorter order, which may be different from the SELECT order, and obviously the
|
||||
// ORDER BY clause may not have all the result columns.
|
||||
// Hence, we need to allocate new registers and Copy from the existing ones to make a contiguous run of registers.
|
||||
let mut needs_reordering = false;
|
||||
|
||||
// Check if result columns in sorter are in SELECT order
|
||||
let mut prev = None;
|
||||
for (select_idx, _rc) in result_columns.iter().enumerate() {
|
||||
let sorter_idx = sort_metadata
|
||||
.remappings
|
||||
.get(select_idx)
|
||||
.expect("remapping must exist for all result columns")
|
||||
.orderby_sorter_idx;
|
||||
|
||||
if prev.is_some_and(|p| sorter_idx != p + 1) {
|
||||
needs_reordering = true;
|
||||
break;
|
||||
}
|
||||
prev = Some(sorter_idx);
|
||||
}
|
||||
|
||||
if needs_reordering {
|
||||
// Allocate registers for reordered result columns.
|
||||
// TODO: it may be possible to optimize this to minimize the number of Insn::Copy we do, but for now
|
||||
// we will just allocate a new reg for every result column.
|
||||
let reordered_start_reg = program.alloc_registers(result_columns.len());
|
||||
|
||||
for (select_idx, _rc) in result_columns.iter().enumerate() {
|
||||
let src_reg = sort_metadata
|
||||
.remappings
|
||||
.get(select_idx)
|
||||
.map(|r| start_reg + r.orderby_sorter_idx)
|
||||
.expect("remapping must exist for all result columns");
|
||||
|
||||
let dst_reg = reordered_start_reg + select_idx;
|
||||
|
||||
program.emit_insn(Insn::Copy {
|
||||
src_reg,
|
||||
dst_reg,
|
||||
extra_amount: 0,
|
||||
});
|
||||
}
|
||||
|
||||
distinct_ctx.emit_deduplication_insns(
|
||||
program,
|
||||
result_columns.len(),
|
||||
reordered_start_reg,
|
||||
);
|
||||
} else {
|
||||
// Result columns are already in SELECT order, use them directly
|
||||
let start_reg = sort_metadata
|
||||
.remappings
|
||||
.first()
|
||||
.map(|r| start_reg + r.orderby_sorter_idx)
|
||||
.expect("remapping must exist for all result columns");
|
||||
distinct_ctx.emit_deduplication_insns(program, result_columns.len(), start_reg);
|
||||
}
|
||||
}
|
||||
|
||||
let SortMetadata {
|
||||
|
||||
@@ -211,3 +211,10 @@ do_execsql_test orderby_desc_regression_verify_order {
|
||||
} {99
|
||||
98
|
||||
97}
|
||||
|
||||
do_execsql_test_on_specific_db {:memory:} distinct_orderby_regression {
|
||||
CREATE TABLE t (a,b,c,d);
|
||||
INSERT INTO t VALUES (1,2,3,4),(2,3,4,5);
|
||||
SELECT DISTINCT c,b FROM t ORDER BY d,b;
|
||||
} {3|2
|
||||
4|3}
|
||||
|
||||
@@ -1720,6 +1720,72 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn fuzz_distinct() {
|
||||
let db = TempDatabase::new_empty(true);
|
||||
let limbo_conn = db.connect_limbo();
|
||||
let sqlite_conn = rusqlite::Connection::open_in_memory().unwrap();
|
||||
|
||||
let (mut rng, seed) = rng_from_time_or_env();
|
||||
tracing::info!("fuzz_distinct seed: {}", seed);
|
||||
|
||||
let columns = ["a", "b", "c", "d", "e"];
|
||||
|
||||
// Create table with 3 integer columns
|
||||
let create_table = format!("CREATE TABLE t ({})", columns.join(", "));
|
||||
limbo_exec_rows(&db, &limbo_conn, &create_table);
|
||||
sqlite_exec_rows(&sqlite_conn, &create_table);
|
||||
|
||||
// Insert some random data
|
||||
for _ in 0..1000 {
|
||||
let values = (0..columns.len())
|
||||
.map(|_| rng.random_range(1..3)) // intentionally narrow range
|
||||
.collect::<Vec<_>>();
|
||||
let query = format!(
|
||||
"INSERT INTO t VALUES ({})",
|
||||
values
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
limbo_exec_rows(&db, &limbo_conn, &query);
|
||||
sqlite_exec_rows(&sqlite_conn, &query);
|
||||
}
|
||||
|
||||
// Test different DISTINCT + ORDER BY combinations
|
||||
for _ in 0..300 {
|
||||
// Randomly select columns for DISTINCT
|
||||
let num_distinct_cols = rng.random_range(1..=columns.len());
|
||||
let mut available_cols = columns.to_vec();
|
||||
let mut distinct_cols = Vec::with_capacity(num_distinct_cols);
|
||||
|
||||
for _ in 0..num_distinct_cols {
|
||||
let idx = rng.random_range(0..available_cols.len());
|
||||
distinct_cols.push(available_cols.remove(idx));
|
||||
}
|
||||
let distinct_cols = distinct_cols.join(", ");
|
||||
|
||||
// Randomly select columns for ORDER BY
|
||||
let num_order_cols = rng.random_range(1..=columns.len());
|
||||
let mut available_cols = columns.to_vec();
|
||||
let mut order_cols = Vec::with_capacity(num_order_cols);
|
||||
|
||||
for _ in 0..num_order_cols {
|
||||
let idx = rng.random_range(0..available_cols.len());
|
||||
order_cols.push(available_cols.remove(idx));
|
||||
}
|
||||
let order_cols = order_cols.join(", ");
|
||||
|
||||
let query = format!("SELECT DISTINCT {distinct_cols} FROM t ORDER BY {order_cols}");
|
||||
|
||||
let limbo = limbo_exec_rows(&db, &limbo_conn, &query);
|
||||
let sqlite = sqlite_exec_rows(&sqlite_conn, &query);
|
||||
|
||||
assert_eq!(limbo, sqlite, "seed: {seed}, query: {query}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
pub fn fuzz_long_create_table_drop_table_alter_table() {
|
||||
|
||||
Reference in New Issue
Block a user