From f3ea9a603af7166cd4fa748d9f6d5be5d61575fd Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Sun, 18 May 2025 11:43:00 +0300 Subject: [PATCH] add support for SELECT DISTINCT --- core/translate/group_by.rs | 5 ++++ core/translate/main_loop.rs | 48 +++++++++++++++++++++++++----------- core/translate/order_by.rs | 32 ++++++++++++++++++++++-- core/translate/result_row.rs | 35 ++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 18 deletions(-) diff --git a/core/translate/group_by.rs b/core/translate/group_by.rs index 601e6b94c..2953e4ef4 100644 --- a/core/translate/group_by.rs +++ b/core/translate/group_by.rs @@ -773,6 +773,11 @@ pub fn group_by_emit_row_phase<'a>( labels.label_group_by_end_without_emitting_row, program.offset(), ); + // SELECT DISTINCT also jumps here if there is a duplicate. + if let Distinctness::Distinct { ctx } = &plan.distinctness { + let distinct_agg_ctx = ctx.as_ref().expect("distinct context must exist"); + program.resolve_label(distinct_agg_ctx.label_on_conflict, program.offset()); + } program.emit_insn(Insn::Return { return_reg: registers.reg_subrtn_acc_output_return_offset, }); diff --git a/core/translate/main_loop.rs b/core/translate/main_loop.rs index b35aaa9ad..aae7abde4 100644 --- a/core/translate/main_loop.rs +++ b/core/translate/main_loop.rs @@ -92,14 +92,20 @@ pub fn init_distinct(program: &mut ProgramBuilder, plan: &mut SelectPlan) { unique: false, has_rowid: false, }); + let cursor_id = program.alloc_cursor_id( + Some(index_name.clone()), + CursorType::BTreeIndex(index.clone()), + ); *ctx = Some(DistinctCtx { - cursor_id: program.alloc_cursor_id( - Some(index_name.clone()), - CursorType::BTreeIndex(index.clone()), - ), + cursor_id, ephemeral_index_name: index_name, label_on_conflict: program.allocate_label(), }); + + program.emit_insn(Insn::OpenEphemeral { + cursor_id, + is_table: false, + }); } } @@ -772,16 +778,25 @@ fn emit_loop_source<'a>( Ok(()) } - LoopEmitTarget::OrderBySorter => order_by_sorter_insert( - program, - &t_ctx.resolver, - t_ctx - .meta_sort - .as_ref() - .expect("sort metadata must exist for ORDER BY"), - &mut t_ctx.result_column_indexes_in_orderby_sorter, - plan, - ), + LoopEmitTarget::OrderBySorter => { + order_by_sorter_insert( + program, + &t_ctx.resolver, + t_ctx + .meta_sort + .as_ref() + .expect("sort metadata must exist for ORDER BY"), + &mut t_ctx.result_column_indexes_in_orderby_sorter, + plan, + )?; + + if let Distinctness::Distinct { ctx } = &plan.distinctness { + let distinct_agg_ctx = ctx.as_ref().expect("distinct context must exist"); + program.preassign_label_to_next_insn(distinct_agg_ctx.label_on_conflict); + } + + Ok(()) + } LoopEmitTarget::AggStep => { let num_aggs = plan.aggregates.len(); let start_reg = program.alloc_registers(num_aggs); @@ -871,6 +886,11 @@ fn emit_loop_source<'a>( t_ctx.reg_limit_offset_sum, )?; + if let Distinctness::Distinct { ctx } = &plan.distinctness { + let distinct_agg_ctx = ctx.as_ref().expect("distinct context must exist"); + program.preassign_label_to_next_insn(distinct_agg_ctx.label_on_conflict); + } + Ok(()) } } diff --git a/core/translate/order_by.rs b/core/translate/order_by.rs index f1c6a0b13..a5be4d003 100644 --- a/core/translate/order_by.rs +++ b/core/translate/order_by.rs @@ -8,7 +8,7 @@ use crate::{ util::exprs_are_equivalent, vdbe::{ builder::{CursorType, ProgramBuilder}, - insn::Insn, + insn::{IdxInsertFlags, Insn}, }, Result, }; @@ -16,7 +16,7 @@ use crate::{ use super::{ emitter::{Resolver, TranslateCtx}, expr::translate_expr, - plan::{ResultSetColumn, SelectPlan, TableReference}, + plan::{Distinctness, ResultSetColumn, SelectPlan, TableReference}, result_row::{emit_offset, emit_result_row_and_limit}, }; @@ -227,6 +227,7 @@ pub fn order_by_sorter_insert( } let mut cur_reg = start_reg + order_by_len; let mut cur_idx_in_orderby_sorter = order_by_len; + let mut translated_result_col_count = 0; for (i, rc) in result_columns.iter().enumerate() { if let Some(ref v) = result_columns_to_skip { let found = v.iter().find(|(skipped_idx, _)| *skipped_idx == i); @@ -243,11 +244,38 @@ pub fn order_by_sorter_insert( cur_reg, resolver, )?; + translated_result_col_count += 1; res_col_indexes_in_orderby_sorter.insert(i, cur_idx_in_orderby_sorter); cur_idx_in_orderby_sorter += 1; cur_reg += 1; } + // Handle SELECT DISTINCT deduplication + if let Distinctness::Distinct { ctx } = &plan.distinctness { + let distinct_agg_ctx = ctx.as_ref().expect("distinct context must exist"); + let num_regs = order_by_len + translated_result_col_count; + program.emit_insn(Insn::Found { + cursor_id: distinct_agg_ctx.cursor_id, + target_pc: distinct_agg_ctx.label_on_conflict, + record_reg: start_reg, + num_regs, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg, + count: num_regs, + dest_reg: record_reg, + index_name: Some(distinct_agg_ctx.ephemeral_index_name.to_string()), + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: distinct_agg_ctx.cursor_id, + record_reg: record_reg, + unpacked_start: None, + unpacked_count: None, + flags: IdxInsertFlags::new(), + }); + } + let SortMetadata { sort_cursor, reg_sorter_data, diff --git a/core/translate/result_row.rs b/core/translate/result_row.rs index d674ae0f9..11ccaf7b0 100644 --- a/core/translate/result_row.rs +++ b/core/translate/result_row.rs @@ -1,12 +1,16 @@ use crate::{ - vdbe::{builder::ProgramBuilder, insn::Insn, BranchOffset}, + vdbe::{ + builder::ProgramBuilder, + insn::{IdxInsertFlags, Insn}, + BranchOffset, + }, Result, }; use super::{ emitter::Resolver, expr::translate_expr, - plan::{SelectPlan, SelectQueryType}, + plan::{Distinctness, SelectPlan, SelectQueryType}, }; /// Emits the bytecode for: @@ -49,6 +53,33 @@ pub fn emit_select_result( resolver, )?; } + + // Handle SELECT DISTINCT deduplication + if let Distinctness::Distinct { ctx } = &plan.distinctness { + let distinct_agg_ctx = ctx.as_ref().expect("distinct context must exist"); + let num_regs = plan.result_columns.len(); + program.emit_insn(Insn::Found { + cursor_id: distinct_agg_ctx.cursor_id, + target_pc: distinct_agg_ctx.label_on_conflict, + record_reg: reg_result_cols_start, + num_regs, + }); + let record_reg = program.alloc_register(); + program.emit_insn(Insn::MakeRecord { + start_reg: reg_result_cols_start, + count: num_regs, + dest_reg: record_reg, + index_name: Some(distinct_agg_ctx.ephemeral_index_name.to_string()), + }); + program.emit_insn(Insn::IdxInsert { + cursor_id: distinct_agg_ctx.cursor_id, + record_reg: record_reg, + unpacked_start: None, + unpacked_count: None, + flags: IdxInsertFlags::new(), + }); + } + emit_result_row_and_limit( program, plan,