mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-07 09:14:26 +01:00
Merge 'Aggregation without group by produces incorrect results for scalars' from Ihor Andrianov
Closes #954 Before: <img width="669" alt="Знімок екрана 2025-03-27 о 21 49 19" src="https://github.com/user- attachments/assets/d005e690-7dab-41e5-bc03-b574cade3965" /> After: <img width="676" alt="Знімок екрана 2025-03-27 о 21 49 44" src="https://github.com/user- attachments/assets/1f4eb6bf-a238-496e-9fa4-32382799ef86" /> SQLite: <img width="656" alt="Знімок екрана 2025-03-27 о 21 50 04" src="https://github.com/user- attachments/assets/3eca184e-6ea5-47c1-824f-51d11256a7af" /> Reviewed-by: Pere Diaz Bou <pere-altea@homail.com> Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com> Closes #1198
This commit is contained in:
@@ -62,6 +62,10 @@ pub struct TranslateCtx<'a> {
|
||||
pub label_main_loop_end: Option<BranchOffset>,
|
||||
// First register of the aggregation results
|
||||
pub reg_agg_start: Option<usize>,
|
||||
// In non-group-by statements with aggregations (e.g. SELECT foo, bar, sum(baz) FROM t),
|
||||
// we want to emit the non-aggregate columns (foo and bar) only once.
|
||||
// This register is a flag that tracks whether we have already done that.
|
||||
pub reg_nonagg_emit_once_flag: Option<usize>,
|
||||
// First register of the result columns of the query
|
||||
pub reg_result_cols_start: Option<usize>,
|
||||
// The register holding the limit value, if any.
|
||||
@@ -115,6 +119,7 @@ fn prologue<'a>(
|
||||
labels_main_loop: (0..table_count).map(|_| LoopLabels::new(program)).collect(),
|
||||
label_main_loop_end: None,
|
||||
reg_agg_start: None,
|
||||
reg_nonagg_emit_once_flag: None,
|
||||
reg_limit: None,
|
||||
reg_offset: None,
|
||||
reg_limit_offset_sum: None,
|
||||
@@ -243,6 +248,18 @@ pub fn emit_query<'a>(
|
||||
});
|
||||
}
|
||||
|
||||
// For non-grouped aggregation queries that also have non-aggregate columns,
|
||||
// we need to ensure non-aggregate columns are only emitted once.
|
||||
// This flag helps track whether we've already emitted these columns.
|
||||
if !plan.aggregates.is_empty()
|
||||
&& plan.group_by.is_none()
|
||||
&& plan.result_columns.iter().any(|c| !c.contains_aggregates)
|
||||
{
|
||||
let flag = program.alloc_register();
|
||||
program.emit_int(0, flag); // Initialize flag to 0 (not yet emitted)
|
||||
t_ctx.reg_nonagg_emit_once_flag = Some(flag);
|
||||
}
|
||||
|
||||
// Allocate registers for result columns
|
||||
t_ctx.reg_result_cols_start = Some(program.alloc_registers(plan.result_columns.len()));
|
||||
|
||||
@@ -251,8 +268,8 @@ pub fn emit_query<'a>(
|
||||
init_order_by(program, t_ctx, order_by)?;
|
||||
}
|
||||
|
||||
if let Some(ref mut group_by) = plan.group_by {
|
||||
init_group_by(program, t_ctx, group_by, &plan.aggregates)?;
|
||||
if let Some(ref group_by) = plan.group_by {
|
||||
init_group_by(program, t_ctx, group_by, &plan)?;
|
||||
}
|
||||
init_loop(
|
||||
program,
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::{
|
||||
function::AggFunc,
|
||||
schema::{Column, PseudoTable},
|
||||
types::{OwnedValue, Record},
|
||||
util::exprs_are_equivalent,
|
||||
vdbe::{
|
||||
builder::{CursorType, ProgramBuilder},
|
||||
insn::Insn,
|
||||
@@ -37,12 +38,15 @@ pub struct GroupByMetadata {
|
||||
pub reg_sorter_key: usize,
|
||||
// Register holding a flag to abort the grouping process if necessary
|
||||
pub reg_abort_flag: usize,
|
||||
// Register holding the start of the accumulator group registers (i.e. the groups, not the aggregates)
|
||||
pub reg_group_exprs_acc: usize,
|
||||
// Register holding the start of the non aggregate query members (all columns except aggregate arguments)
|
||||
pub reg_non_aggregate_exprs_acc: usize,
|
||||
// Starting index of the register(s) that hold the comparison result between the current row and the previous row
|
||||
// The comparison result is used to determine if the current row belongs to the same group as the previous row
|
||||
// Each group by expression has a corresponding register
|
||||
pub reg_group_exprs_cmp: usize,
|
||||
// Columns that not part of GROUP BY clause and not arguments of Aggregation function.
|
||||
// Heavy calculation and needed in different functions, so it is reasonable to do it once and save.
|
||||
pub non_group_by_non_agg_column_count: Option<usize>,
|
||||
}
|
||||
|
||||
/// Initialize resources needed for GROUP BY processing
|
||||
@@ -50,15 +54,21 @@ pub fn init_group_by(
|
||||
program: &mut ProgramBuilder,
|
||||
t_ctx: &mut TranslateCtx,
|
||||
group_by: &GroupBy,
|
||||
aggregates: &[Aggregate],
|
||||
plan: &SelectPlan,
|
||||
) -> Result<()> {
|
||||
let num_aggs = aggregates.len();
|
||||
let num_aggs = plan.aggregates.len();
|
||||
|
||||
let non_aggregate_count = plan
|
||||
.result_columns
|
||||
.iter()
|
||||
.filter(|rc| !rc.contains_aggregates)
|
||||
.count();
|
||||
|
||||
let sort_cursor = program.alloc_cursor_id(None, CursorType::Sorter);
|
||||
|
||||
let reg_abort_flag = program.alloc_register();
|
||||
let reg_group_exprs_cmp = program.alloc_registers(group_by.exprs.len());
|
||||
let reg_group_exprs_acc = program.alloc_registers(group_by.exprs.len());
|
||||
let reg_non_aggregate_exprs_acc = program.alloc_registers(non_aggregate_count);
|
||||
let reg_agg_exprs_start = program.alloc_registers(num_aggs);
|
||||
let reg_sorter_key = program.alloc_register();
|
||||
|
||||
@@ -71,7 +81,7 @@ pub fn init_group_by(
|
||||
}
|
||||
program.emit_insn(Insn::SorterOpen {
|
||||
cursor_id: sort_cursor,
|
||||
columns: aggregates.len() + group_by.exprs.len(),
|
||||
columns: non_aggregate_count + plan.aggregates.len(),
|
||||
order: Record::new(order),
|
||||
});
|
||||
|
||||
@@ -110,9 +120,10 @@ pub fn init_group_by(
|
||||
label_acc_indicator_set_flag_true: program.allocate_label(),
|
||||
reg_subrtn_acc_clear_return_offset,
|
||||
reg_abort_flag,
|
||||
reg_group_exprs_acc,
|
||||
reg_non_aggregate_exprs_acc,
|
||||
reg_group_exprs_cmp,
|
||||
reg_sorter_key,
|
||||
non_group_by_non_agg_column_count: None,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
@@ -146,25 +157,57 @@ pub fn emit_group_by<'a>(
|
||||
sort_cursor,
|
||||
reg_group_exprs_cmp,
|
||||
reg_subrtn_acc_clear_return_offset,
|
||||
reg_group_exprs_acc,
|
||||
reg_non_aggregate_exprs_acc,
|
||||
reg_abort_flag,
|
||||
reg_sorter_key,
|
||||
label_subrtn_acc_clear,
|
||||
label_acc_indicator_set_flag_true,
|
||||
non_group_by_non_agg_column_count,
|
||||
..
|
||||
} = *t_ctx.meta_group_by.as_mut().unwrap();
|
||||
|
||||
let group_by = plan.group_by.as_ref().unwrap();
|
||||
|
||||
// all group by columns and all arguments of agg functions are in the sorter.
|
||||
// the sort keys are the group by columns (the aggregation within groups is done based on how long the sort keys remain the same)
|
||||
let sorter_column_count = group_by.exprs.len()
|
||||
+ plan
|
||||
.aggregates
|
||||
let agg_args_count = plan
|
||||
.aggregates
|
||||
.iter()
|
||||
.map(|agg| agg.args.len())
|
||||
.sum::<usize>();
|
||||
let group_by_count = group_by.exprs.len();
|
||||
let non_group_by_non_agg_column_count = non_group_by_non_agg_column_count.unwrap();
|
||||
|
||||
// We have to know which group by expr present in resulting set
|
||||
let group_by_expr_in_res_cols = group_by.exprs.iter().map(|expr| {
|
||||
plan.result_columns
|
||||
.iter()
|
||||
.map(|agg| agg.args.len())
|
||||
.sum::<usize>();
|
||||
// sorter column names do not matter
|
||||
.any(|e| exprs_are_equivalent(&e.expr, expr))
|
||||
});
|
||||
|
||||
// Create a map from sorter column index to result register
|
||||
// This helps track where each column from the sorter should be stored
|
||||
let mut column_register_mapping =
|
||||
vec![None; group_by_count + non_group_by_non_agg_column_count];
|
||||
let mut next_reg = reg_non_aggregate_exprs_acc;
|
||||
|
||||
// Map GROUP BY columns that are in the result set to registers
|
||||
for (i, is_in_result) in group_by_expr_in_res_cols.clone().enumerate() {
|
||||
if is_in_result {
|
||||
column_register_mapping[i] = Some(next_reg);
|
||||
next_reg += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle other non-aggregate columns that aren't part of GROUP BY and not part of Aggregation function
|
||||
for i in group_by_count..group_by_count + non_group_by_non_agg_column_count {
|
||||
column_register_mapping[i] = Some(next_reg);
|
||||
next_reg += 1;
|
||||
}
|
||||
|
||||
// Calculate total number of columns in the sorter
|
||||
// The sorter contains all GROUP BY columns, aggregate arguments, and other columns
|
||||
let sorter_column_count = agg_args_count + group_by_count + non_group_by_non_agg_column_count;
|
||||
|
||||
// Create pseudo-columns for the pseudo-table
|
||||
// (these are placeholders as we only care about structure, not semantics)
|
||||
let ty = crate::schema::Type::Null;
|
||||
let pseudo_columns = (0..sorter_column_count)
|
||||
.map(|_| Column {
|
||||
@@ -178,7 +221,8 @@ pub fn emit_group_by<'a>(
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// A pseudo table is a "fake" table to which we read one row at a time from the sorter
|
||||
// Create a pseudo-table to read one row at a time from the sorter
|
||||
// This allows us to use standard table access operations on the sorted data
|
||||
let pseudo_table = Rc::new(PseudoTable {
|
||||
columns: pseudo_columns,
|
||||
});
|
||||
@@ -237,13 +281,6 @@ pub fn emit_group_by<'a>(
|
||||
target_pc_gt: program.offset().add(1u32),
|
||||
});
|
||||
|
||||
// New group, move current group by columns into the comparison register
|
||||
program.emit_insn(Insn::Move {
|
||||
source_reg: groups_start_reg,
|
||||
dest_reg: reg_group_exprs_cmp,
|
||||
count: group_by.exprs.len(),
|
||||
});
|
||||
|
||||
program.add_comment(
|
||||
program.offset(),
|
||||
"check if ended group had data, and output if so",
|
||||
@@ -253,6 +290,13 @@ pub fn emit_group_by<'a>(
|
||||
return_reg: reg_subrtn_acc_output_return_offset,
|
||||
});
|
||||
|
||||
// New group, move current group by columns into the comparison register
|
||||
program.emit_insn(Insn::Move {
|
||||
source_reg: groups_start_reg,
|
||||
dest_reg: reg_group_exprs_cmp,
|
||||
count: group_by.exprs.len(),
|
||||
});
|
||||
|
||||
program.add_comment(program.offset(), "check abort flag");
|
||||
program.emit_insn(Insn::IfPos {
|
||||
reg: reg_abort_flag,
|
||||
@@ -266,10 +310,10 @@ pub fn emit_group_by<'a>(
|
||||
return_reg: reg_subrtn_acc_clear_return_offset,
|
||||
});
|
||||
|
||||
// Accumulate the values into the aggregations
|
||||
// Process each aggregate function for the current row
|
||||
program.resolve_label(agg_step_label, program.offset());
|
||||
let start_reg = t_ctx.reg_agg_start.unwrap();
|
||||
let mut cursor_index = group_by.exprs.len();
|
||||
let mut cursor_index = group_by_count + non_group_by_non_agg_column_count; // Skipping all columns in sorter that not an aggregation arguments
|
||||
for (i, agg) in plan.aggregates.iter().enumerate() {
|
||||
let agg_result_reg = start_reg + i;
|
||||
translate_aggregation_step_groupby(
|
||||
@@ -284,7 +328,8 @@ pub fn emit_group_by<'a>(
|
||||
cursor_index += agg.args.len();
|
||||
}
|
||||
|
||||
// We only emit the group by columns if we are going to start a new group (i.e. the prev group will not accumulate any more values into the aggregations)
|
||||
// We only need to store non-aggregate columns once per group
|
||||
// Skip if we've already stored them for this group
|
||||
program.add_comment(
|
||||
program.offset(),
|
||||
"don't emit group columns if continuing existing group",
|
||||
@@ -295,17 +340,18 @@ pub fn emit_group_by<'a>(
|
||||
jump_if_null: false,
|
||||
});
|
||||
|
||||
// Read the group by columns for a finished group
|
||||
for i in 0..group_by.exprs.len() {
|
||||
let key_reg = reg_group_exprs_acc + i;
|
||||
let sorter_column_index = i;
|
||||
program.emit_insn(Insn::Column {
|
||||
cursor_id: pseudo_cursor,
|
||||
column: sorter_column_index,
|
||||
dest: key_reg,
|
||||
});
|
||||
// Read non-aggregate columns from the current row
|
||||
for (sorter_column_index, dest_reg) in column_register_mapping.iter().enumerate() {
|
||||
if let Some(dest_reg) = dest_reg {
|
||||
program.emit_insn(Insn::Column {
|
||||
cursor_id: pseudo_cursor,
|
||||
column: sorter_column_index,
|
||||
dest: *dest_reg,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Mark that we've stored data for this group
|
||||
program.resolve_label(label_acc_indicator_set_flag_true, program.offset());
|
||||
program.add_comment(program.offset(), "indicate data in accumulator");
|
||||
program.emit_insn(Insn::Integer {
|
||||
@@ -313,6 +359,7 @@ pub fn emit_group_by<'a>(
|
||||
dest: reg_data_in_acc_flag,
|
||||
});
|
||||
|
||||
// Continue to the next row in the sorter
|
||||
program.emit_insn(Insn::SorterNext {
|
||||
cursor_id: sort_cursor,
|
||||
pc_if_next: label_grouping_loop_start,
|
||||
@@ -340,18 +387,22 @@ pub fn emit_group_by<'a>(
|
||||
|
||||
program.resolve_label(label_subrtn_acc_output, program.offset());
|
||||
|
||||
// Only output a row if there's data in the accumulator
|
||||
program.add_comment(program.offset(), "output group by row subroutine start");
|
||||
program.emit_insn(Insn::IfPos {
|
||||
reg: reg_data_in_acc_flag,
|
||||
target_pc: label_agg_final,
|
||||
decrement_by: 0,
|
||||
});
|
||||
|
||||
// If no data, return without outputting a row
|
||||
let group_by_end_without_emitting_row_label = program.allocate_label();
|
||||
program.resolve_label(group_by_end_without_emitting_row_label, program.offset());
|
||||
program.emit_insn(Insn::Return {
|
||||
return_reg: reg_subrtn_acc_output_return_offset,
|
||||
});
|
||||
|
||||
// Finalize aggregate values for output
|
||||
let agg_start_reg = t_ctx.reg_agg_start.unwrap();
|
||||
// Resolve the label for the start of the group by output row subroutine
|
||||
program.resolve_label(label_agg_final, program.offset());
|
||||
@@ -363,16 +414,34 @@ pub fn emit_group_by<'a>(
|
||||
});
|
||||
}
|
||||
|
||||
// we now have the group by columns in registers (group_exprs_start_register..group_exprs_start_register + group_by.len() - 1)
|
||||
// and the agg results in (agg_start_reg..agg_start_reg + aggregates.len() - 1)
|
||||
// we need to call translate_expr on each result column, but replace the expr with a register copy in case any part of the
|
||||
// result column expression matches a) a group by column or b) an aggregation result.
|
||||
for (i, expr) in group_by.exprs.iter().enumerate() {
|
||||
t_ctx
|
||||
.resolver
|
||||
.expr_to_reg_cache
|
||||
.push((expr, reg_group_exprs_acc + i));
|
||||
// Map GROUP BY expressions to their registers in the result set
|
||||
for (i, (expr, is_in_result)) in group_by
|
||||
.exprs
|
||||
.iter()
|
||||
.zip(group_by_expr_in_res_cols)
|
||||
.enumerate()
|
||||
{
|
||||
if is_in_result {
|
||||
if let Some(reg) = &column_register_mapping.get(i).and_then(|opt| *opt) {
|
||||
t_ctx.resolver.expr_to_reg_cache.push((expr, *reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map non-aggregate, non-GROUP BY columns to their registers
|
||||
let non_agg_cols = plan
|
||||
.result_columns
|
||||
.iter()
|
||||
.filter(|rc| !rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs));
|
||||
|
||||
for (idx, rc) in non_agg_cols.enumerate() {
|
||||
let sorter_idx = group_by_count + idx;
|
||||
if let Some(reg) = column_register_mapping.get(sorter_idx).and_then(|opt| *opt) {
|
||||
t_ctx.resolver.expr_to_reg_cache.push((&rc.expr, reg));
|
||||
}
|
||||
}
|
||||
|
||||
// Map aggregate expressions to their result registers
|
||||
for (i, agg) in plan.aggregates.iter().enumerate() {
|
||||
t_ctx
|
||||
.resolver
|
||||
@@ -415,12 +484,18 @@ pub fn emit_group_by<'a>(
|
||||
return_reg: reg_subrtn_acc_output_return_offset,
|
||||
});
|
||||
|
||||
// Subroutine to clear accumulators for a new group
|
||||
program.add_comment(program.offset(), "clear accumulator subroutine start");
|
||||
program.resolve_label(label_subrtn_acc_clear, program.offset());
|
||||
let start_reg = reg_group_exprs_acc;
|
||||
let start_reg = reg_non_aggregate_exprs_acc;
|
||||
|
||||
// Reset all accumulator registers to NULL
|
||||
program.emit_insn(Insn::Null {
|
||||
dest: start_reg,
|
||||
dest_end: Some(start_reg + group_by.exprs.len() + plan.aggregates.len() - 1),
|
||||
dest_end: Some(
|
||||
start_reg + non_group_by_non_agg_column_count + group_by_count + plan.aggregates.len()
|
||||
- 1,
|
||||
),
|
||||
});
|
||||
|
||||
program.emit_insn(Insn::Integer {
|
||||
@@ -668,3 +743,9 @@ pub fn translate_aggregation_step_groupby(
|
||||
};
|
||||
Ok(dest)
|
||||
}
|
||||
|
||||
pub fn is_column_in_group_by(expr: &ast::Expr, group_by_exprs: &[ast::Expr]) -> bool {
|
||||
group_by_exprs
|
||||
.iter()
|
||||
.any(|expr2| exprs_are_equivalent(expr, expr2))
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ use super::{
|
||||
aggregation::translate_aggregation_step,
|
||||
emitter::{OperationMode, TranslateCtx},
|
||||
expr::{translate_condition_expr, translate_expr, ConditionMetadata},
|
||||
group_by::is_column_in_group_by,
|
||||
order_by::{order_by_sorter_insert, sorter_insert},
|
||||
plan::{
|
||||
IterationDirection, Operation, Search, SelectPlan, SelectQueryType, TableReference,
|
||||
@@ -597,19 +598,46 @@ fn emit_loop_source(
|
||||
) -> Result<()> {
|
||||
match emit_target {
|
||||
LoopEmitTarget::GroupBySorter => {
|
||||
// This function creates a sorter for GROUP BY operations by allocating registers and
|
||||
// translating expressions for three types of columns:
|
||||
// 1) GROUP BY columns (used as sorting keys)
|
||||
// 2) non-aggregate, non-GROUP BY columns
|
||||
// 3) aggregate function arguments
|
||||
let group_by = plan.group_by.as_ref().unwrap();
|
||||
let aggregates = &plan.aggregates;
|
||||
let sort_keys_count = group_by.exprs.len();
|
||||
|
||||
// Identify columns in the result set that are neither in GROUP BY nor contain aggregates
|
||||
let non_group_by_non_agg_expr = plan
|
||||
.result_columns
|
||||
.iter()
|
||||
.filter(|rc| {
|
||||
!rc.contains_aggregates && !is_column_in_group_by(&rc.expr, &group_by.exprs)
|
||||
})
|
||||
.map(|rc| &rc.expr);
|
||||
let non_agg_count = non_group_by_non_agg_expr.clone().count();
|
||||
// Store the count of non-GROUP BY, non-aggregate columns in the metadata
|
||||
// This will be used later during aggregation processing
|
||||
t_ctx.meta_group_by.as_mut().map(|meta| {
|
||||
meta.non_group_by_non_agg_column_count = Some(non_agg_count);
|
||||
meta
|
||||
});
|
||||
|
||||
// Calculate the total number of arguments used across all aggregate functions
|
||||
let aggregate_arguments_count = plan
|
||||
.aggregates
|
||||
.iter()
|
||||
.map(|agg| agg.args.len())
|
||||
.sum::<usize>();
|
||||
let column_count = sort_keys_count + aggregate_arguments_count;
|
||||
|
||||
// Calculate total number of registers needed for all columns in the sorter
|
||||
let column_count = group_by.exprs.len() + aggregate_arguments_count + non_agg_count;
|
||||
|
||||
// Allocate a contiguous block of registers for all columns
|
||||
let start_reg = program.alloc_registers(column_count);
|
||||
let mut cur_reg = start_reg;
|
||||
|
||||
// The group by sorter rows will contain the grouping keys first. They are also the sort keys.
|
||||
// Step 1: Process GROUP BY columns first
|
||||
// These will be the first columns in the sorter and serve as sort keys
|
||||
for expr in group_by.exprs.iter() {
|
||||
let key_reg = cur_reg;
|
||||
cur_reg += 1;
|
||||
@@ -621,14 +649,28 @@ fn emit_loop_source(
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
}
|
||||
// Then we have the aggregate arguments.
|
||||
|
||||
// Step 2: Process columns that aren't part of GROUP BY and don't contain aggregates
|
||||
// Example: SELECT col1, col2, SUM(col3) FROM table GROUP BY col1
|
||||
// Here col2 would be processed in this loop if it's in the result set
|
||||
for expr in non_group_by_non_agg_expr {
|
||||
let key_reg = cur_reg;
|
||||
cur_reg += 1;
|
||||
translate_expr(
|
||||
program,
|
||||
Some(&plan.table_references),
|
||||
expr,
|
||||
key_reg,
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Step 3: Process arguments for all aggregate functions
|
||||
// For each aggregate, translate all its argument expressions
|
||||
for agg in aggregates.iter() {
|
||||
// Here we are collecting scalars for the group by sorter, which will include
|
||||
// both the group by expressions and the aggregate arguments.
|
||||
// e.g. in `select u.first_name, sum(u.age) from users group by u.first_name`
|
||||
// the sorter will have two scalars: u.first_name and u.age.
|
||||
// these are then sorted by u.first_name, and for each u.first_name, we sum the u.age.
|
||||
// the actual aggregation is done later.
|
||||
// For a query like: SELECT group_col, SUM(val1), AVG(val2) FROM table GROUP BY group_col
|
||||
// we'll process val1 and val2 here, storing them in the sorter so they're available
|
||||
// when computing the aggregates after sorting by group_col
|
||||
for expr in agg.args.iter() {
|
||||
let agg_reg = cur_reg;
|
||||
cur_reg += 1;
|
||||
@@ -642,9 +684,6 @@ fn emit_loop_source(
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: although it's less often useful, SQLite does allow for expressions in the SELECT that are not part of a GROUP BY or aggregate.
|
||||
// We currently ignore those and only emit the GROUP BY keys and aggregate arguments. This should be fixed.
|
||||
|
||||
let group_by_metadata = t_ctx.meta_group_by.as_ref().unwrap();
|
||||
|
||||
sorter_insert(
|
||||
@@ -677,14 +716,31 @@ fn emit_loop_source(
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
}
|
||||
for (i, rc) in plan.result_columns.iter().enumerate() {
|
||||
if rc.contains_aggregates {
|
||||
// Do nothing, aggregates are computed above
|
||||
// if this result column is e.g. something like sum(x) + 1 or length(sum(x)), we do not want to translate that (+1) or length() yet,
|
||||
// it will be computed after the aggregations are finalized.
|
||||
continue;
|
||||
}
|
||||
let reg = start_reg + num_aggs + i;
|
||||
|
||||
let label_emit_nonagg_only_once = if let Some(flag) = t_ctx.reg_nonagg_emit_once_flag {
|
||||
let if_label = program.allocate_label();
|
||||
program.emit_insn(Insn::If {
|
||||
reg: flag,
|
||||
target_pc: if_label,
|
||||
jump_if_null: false,
|
||||
});
|
||||
Some(if_label)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let col_start = t_ctx.reg_result_cols_start.unwrap();
|
||||
|
||||
// Process only non-aggregate columns
|
||||
let non_agg_columns = plan
|
||||
.result_columns
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, rc)| !rc.contains_aggregates);
|
||||
|
||||
for (i, rc) in non_agg_columns {
|
||||
let reg = col_start + i;
|
||||
|
||||
translate_expr(
|
||||
program,
|
||||
Some(&plan.table_references),
|
||||
@@ -693,6 +749,12 @@ fn emit_loop_source(
|
||||
&t_ctx.resolver,
|
||||
)?;
|
||||
}
|
||||
if let Some(label) = label_emit_nonagg_only_once {
|
||||
program.resolve_label(label, program.offset());
|
||||
let flag = t_ctx.reg_nonagg_emit_once_flag.unwrap();
|
||||
program.emit_int(1, flag);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
LoopEmitTarget::QueryResult => {
|
||||
|
||||
@@ -4,6 +4,7 @@ use limbo_sqlite3_parser::ast;
|
||||
|
||||
use crate::{
|
||||
schema::{Index, Schema},
|
||||
util::exprs_are_equivalent,
|
||||
Result,
|
||||
};
|
||||
|
||||
@@ -43,6 +44,8 @@ fn optimize_select_plan(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
|
||||
|
||||
eliminate_unnecessary_orderby(plan, schema)?;
|
||||
|
||||
eliminate_orderby_like_groupby(plan)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -117,6 +120,71 @@ fn query_is_already_ordered_by(
|
||||
}
|
||||
}
|
||||
|
||||
fn eliminate_orderby_like_groupby(plan: &mut SelectPlan) -> Result<()> {
|
||||
if plan.order_by.is_none() | plan.group_by.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
if plan.table_references.len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let order_by_clauses = plan.order_by.as_mut().unwrap();
|
||||
let group_by_clauses = plan.group_by.as_mut().unwrap();
|
||||
|
||||
let mut group_by_insert_position = 0;
|
||||
let mut order_index = 0;
|
||||
|
||||
// This function optimizes query execution by eliminating duplicate expressions between ORDER BY and GROUP BY clauses
|
||||
// When the same column appears in both clauses, we can avoid redundant sorting operations
|
||||
// The function reorders GROUP BY expressions and removes redundant ORDER BY expressions to ensure consistent ordering
|
||||
while order_index < order_by_clauses.len() {
|
||||
let (order_expr, direction) = &order_by_clauses[order_index];
|
||||
|
||||
// Skip descending orders as they require separate sorting
|
||||
if matches!(direction, Direction::Descending) {
|
||||
order_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the current ORDER BY expression matches any expression in the GROUP BY clause
|
||||
if let Some(group_expr_position) = group_by_clauses
|
||||
.exprs
|
||||
.iter()
|
||||
.position(|expr| exprs_are_equivalent(expr, order_expr))
|
||||
{
|
||||
// If we found a matching expression in GROUP BY, we need to ensure it's in the correct position
|
||||
// to preserve the ordering specified by ORDER BY clauses
|
||||
|
||||
// Move the matching GROUP BY expression to the current insertion position
|
||||
// This effectively "bubbles up" the expression to maintain proper ordering
|
||||
if group_expr_position != group_by_insert_position {
|
||||
let mut current_position = group_expr_position;
|
||||
|
||||
// Swap expressions to move the matching one to the correct position
|
||||
while current_position > group_by_insert_position {
|
||||
group_by_clauses
|
||||
.exprs
|
||||
.swap(current_position, current_position - 1);
|
||||
current_position -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
group_by_insert_position += 1;
|
||||
|
||||
// Remove this expression from ORDER BY since it's now handled by GROUP BY
|
||||
order_by_clauses.remove(order_index);
|
||||
// Note: We don't increment order_index here because removal shifts all elements
|
||||
} else {
|
||||
// If not found in GROUP BY, move to next ORDER BY expression
|
||||
order_index += 1;
|
||||
}
|
||||
}
|
||||
if order_by_clauses.is_empty() {
|
||||
plan.order_by = None
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Result<()> {
|
||||
if plan.order_by.is_none() {
|
||||
return Ok(());
|
||||
@@ -125,6 +193,13 @@ fn eliminate_unnecessary_orderby(plan: &mut SelectPlan, schema: &Schema) -> Resu
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// If GROUP BY clause is present, we can't rely on already ordered columns because GROUP BY reorders the data
|
||||
// This early return prevents the elimination of ORDER BY when GROUP BY exists, as sorting must be applied after grouping
|
||||
// And if ORDER BY clause duplicates GROUP BY we handle it later in fn eliminate_orderby_like_groupby
|
||||
if plan.group_by.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let o = plan.order_by.as_mut().unwrap();
|
||||
|
||||
if o.len() != 1 {
|
||||
|
||||
@@ -25,7 +25,16 @@ pub fn emit_select_result(
|
||||
}
|
||||
|
||||
let start_reg = t_ctx.reg_result_cols_start.unwrap();
|
||||
for (i, rc) in plan.result_columns.iter().enumerate() {
|
||||
for (i, rc) in plan.result_columns.iter().enumerate().filter(|(_, rc)| {
|
||||
// For aggregate queries, we handle columns differently; example: select id, first_name, sum(age) from users limit 1;
|
||||
// 1. Columns with aggregates (e.g., sum(age)) are computed in each iteration of aggregation
|
||||
// 2. Non-aggregate columns (e.g., id, first_name) are only computed once in the first iteration
|
||||
// This filter ensures we only emit expressions for non aggregate columns once,
|
||||
// preserving previously calculated values while updating aggregate results
|
||||
// For all other queries where reg_nonagg_emit_once_flag is none we do nothing.
|
||||
t_ctx.reg_nonagg_emit_once_flag.is_some() && rc.contains_aggregates
|
||||
|| t_ctx.reg_nonagg_emit_once_flag.is_none()
|
||||
}) {
|
||||
let reg = start_reg + i;
|
||||
translate_expr(
|
||||
program,
|
||||
|
||||
@@ -75,6 +75,7 @@ pub fn emit_subquery<'a>(
|
||||
meta_left_joins: (0..plan.table_references.len()).map(|_| None).collect(),
|
||||
meta_sort: None,
|
||||
reg_agg_start: None,
|
||||
reg_nonagg_emit_once_flag: None,
|
||||
reg_result_cols_start: None,
|
||||
result_column_indexes_in_orderby_sorter: (0..plan.result_columns.len()).collect(),
|
||||
result_columns_to_skip_in_orderby_sorter: None,
|
||||
|
||||
@@ -99,6 +99,27 @@ do_execsql_test select-agg-binary-unary-positive {
|
||||
SELECT min(age) + +max(age) FROM users;
|
||||
} {101}
|
||||
|
||||
do_execsql_test select-non-agg-cols-should-be-not-null {
|
||||
SELECT id, first_name, sum(age) FROM users LIMIT 1;
|
||||
} {1|Jamie|503960}
|
||||
|
||||
do_execsql_test select-with-group-by-and-agg-1 {
|
||||
SELECT id, first_name, avg(age) FROM users group by last_name limit 1;
|
||||
} {274|Debra|66.25}
|
||||
|
||||
do_execsql_test select-with-group-by-and-agg-2 {
|
||||
select first_name, last_name from users where state = 'AL' group by last_name limit 10;
|
||||
} {Jay|Acosta
|
||||
Daniel|Adams
|
||||
Aaron|Baker
|
||||
Sharon|Becker
|
||||
Kim|Berg
|
||||
Donald|Bishop
|
||||
Brian|Bradford
|
||||
Jesus|Bradley
|
||||
John|Brown
|
||||
Hunter|Burke}
|
||||
|
||||
do_execsql_test select-agg-json-array {
|
||||
SELECT json_group_array(name) FROM products;
|
||||
} {["hat","cap","shirt","sweater","sweatshirt","shorts","jeans","sneakers","boots","coat","accessories"]}
|
||||
|
||||
0
testing/testing
Normal file
0
testing/testing
Normal file
Reference in New Issue
Block a user