Files
turso/core/translate/window.rs

1064 lines
40 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use crate::schema::{BTreeTable, Table};
use crate::translate::aggregation::{translate_aggregation_step, AggArgumentSource};
use crate::translate::collate::{get_collseq_from_expr, CollationSeq};
use crate::translate::emitter::{Resolver, TranslateCtx};
use crate::translate::expr::{walk_expr, walk_expr_mut, WalkControl};
use crate::translate::order_by::order_by_sorter_insert;
use crate::translate::plan::{
Aggregate, Distinctness, JoinOrderMember, JoinedTable, QueryDestination, ResultSetColumn,
SelectPlan, TableReferences, Window,
};
use crate::translate::planner::resolve_window_and_aggregate_functions;
use crate::translate::result_row::emit_select_result;
use crate::types::KeyInfo;
use crate::util::exprs_are_equivalent;
use crate::vdbe::builder::{CursorType, ProgramBuilder, TableRefIdCounter};
use crate::vdbe::insn::{InsertFlags, Insn};
use crate::vdbe::{BranchOffset, CursorID};
use crate::Result;
use std::mem;
use std::sync::Arc;
use turso_parser::ast::Name;
use turso_parser::ast::{Expr, FunctionTail, Literal, Over, SortOrder, TableInternalId};
const SUBQUERY_DATABASE_ID: usize = 0;
struct WindowSubqueryContext<'a> {
resolver: &'a Resolver<'a>,
subquery_order_by: &'a mut Vec<(Box<Expr>, SortOrder)>,
subquery_result_columns: &'a mut Vec<ResultSetColumn>,
subquery_id: &'a TableInternalId,
}
/// Rewrite a `SELECT` plan for window function processing.
///
/// A `SELECT` may reference multiple window definitions, but internally, each `SELECT` plan
/// operates on **exactly one** window. Multiple window functions may reference the same window.
///
/// The original plan is rewritten into a series of nested subqueries, each bound to a single
/// window definition. Each subquery produces rows in the order determined by its parent window
/// definition. The innermost subquery does not have any window assigned to it; instead,
/// the FROM, WHERE, GROUP BY, and HAVING clauses from the original query are pushed down to it.
/// The outermost query retains ORDER BY, LIMIT, and OFFSET.
///
/// # Examples
/// ```sql
/// -- Example 1: Query with one window
/// SELECT
/// a+1,
/// max(b) OVER (PARTITION BY c ORDER BY d),
/// min(c) OVER (PARTITION BY c ORDER BY d)
/// FROM t1
/// ORDER BY e;
///
/// -- Rewritten form
/// SELECT
/// a+1,
/// max(b) OVER (PARTITION BY c ORDER BY d),
/// min(c) OVER (PARTITION BY c ORDER BY d)
/// FROM (SELECT a, b, c, d, e FROM t1 ORDER BY c, d)
/// ORDER BY e;
///
/// -- Example 2: Query with multiple windows
/// SELECT
/// a,
/// max(b) OVER (PARTITION BY c ORDER BY d),
/// min(c) OVER (PARTITION BY e ORDER BY f)
/// FROM t1;
///
/// -- Rewritten form
/// SELECT
/// a,
/// max(b) OVER (PARTITION BY c ORDER BY d) AS w1,
/// w2
/// FROM (
/// SELECT
/// a,
/// b,
/// c,
/// d,
/// min(c) OVER (PARTITION BY e ORDER BY f) AS w2
/// FROM (SELECT a, b, c, d, e, f FROM t1 ORDER BY e, f)
/// ORDER BY c, d
/// );
/// ```
pub fn plan_windows(
plan: &mut SelectPlan,
resolver: &Resolver,
table_ref_counter: &mut TableRefIdCounter,
windows: &mut Vec<Window>,
) -> crate::Result<()> {
// Remove named windows that are not referenced by any function, as they can be ignored.
windows.retain(|w| !w.functions.is_empty());
if !windows.is_empty() {
// Sanity check: this should never happen because the syntax disallows combining VALUES with windows
assert!(
plan.values.is_empty(),
"VALUES clause with windows is not supported"
);
}
prepare_window_subquery(plan, resolver, table_ref_counter, windows, 0)
}
fn prepare_window_subquery(
outer_plan: &mut SelectPlan,
resolver: &Resolver,
table_ref_counter: &mut TableRefIdCounter,
windows: &mut Vec<Window>,
processed_window_count: usize,
) -> crate::Result<()> {
if windows.is_empty() {
return Ok(());
}
let mut current_window = windows.swap_remove(0);
let mut subquery_result_columns = Vec::new();
let mut subquery_order_by = Vec::new();
let subquery_id = table_ref_counter.next();
if current_window.name.is_none() {
// This is part of normalizing the window definition. The remaining logic lives in
// `rewrite_expr_referencing_current_window`, which replaces inline window definitions
// with references by name.
//
// The goal is to always work with named windows instead of a mix of named and
// inline ones. This way, we dont need to rewrite expressions embedded in inline
// definitions (there might be many equivalent definitions per subquery). Instead,
// we rewrite the named definition once, and all associated window functions
// require no additional processing.
//
// At this stage, window definitions and window functions are already bound,
// so this normalization is purely to keep the plan valid.
//
// If the generated name is not unique across the entire query, thats acceptable —
// the final plan always associates exactly one window with one subquery.
current_window.name = Some(format!("window_{processed_window_count}"));
}
let mut ctx = WindowSubqueryContext {
resolver,
subquery_order_by: &mut subquery_order_by,
subquery_result_columns: &mut subquery_result_columns,
subquery_id: &subquery_id,
};
// Build the ORDER BY clause for the subquery by concatenating the windows PARTITION BY
// columns with its ORDER BY columns.This ensures that rows in the subquery are returned
// in the correct order for partitioning and window function evaluation.
for expr in current_window.partition_by.iter_mut() {
append_order_by(outer_plan, expr, &SortOrder::Asc, &mut ctx)?;
current_window.deduplicated_partition_by_len = Some(ctx.subquery_result_columns.len())
}
for (expr, order) in current_window.order_by.iter_mut() {
append_order_by(outer_plan, expr, order, &mut ctx)?;
}
// Rewrite expressions from the outer querys result columns and ORDER BY clause so that
// they reference the subquery instead. The original expressions are included in the
// subquerys result columns.
for col in outer_plan.result_columns.iter_mut() {
rewrite_terminal_expr(
&mut outer_plan.aggregates,
&mut col.expr,
&mut current_window,
&mut ctx,
)?;
}
for (expr, _) in outer_plan.order_by.iter_mut() {
rewrite_terminal_expr(
&mut outer_plan.aggregates,
expr,
&mut current_window,
&mut ctx,
)?;
}
// When there is no ORDER BY or PARTITION BY clause, the window function takes zero arguments,
// and no other columns are selected (e.g., "SELECT count() OVER () FROM products"),
// `subquery_result_columns` may be empty. Add a constant expression to keep the query valid.
if subquery_result_columns.is_empty() {
subquery_result_columns.push(ResultSetColumn {
expr: Expr::Literal(Literal::Numeric("0".to_string())),
alias: None,
contains_aggregates: false,
});
}
let new_join_order = vec![JoinOrderMember {
table_id: subquery_id,
original_idx: 0,
is_outer: false,
}];
let new_table_references = TableReferences::new(vec![], vec![]);
let mut inner_plan = SelectPlan {
join_order: mem::replace(&mut outer_plan.join_order, new_join_order),
table_references: mem::replace(&mut outer_plan.table_references, new_table_references),
result_columns: subquery_result_columns,
where_clause: mem::take(&mut outer_plan.where_clause),
group_by: mem::take(&mut outer_plan.group_by),
order_by: subquery_order_by,
aggregates: mem::take(&mut outer_plan.aggregates),
limit: None,
offset: None,
contains_constant_false_condition: false,
query_destination: QueryDestination::placeholder_for_subquery(),
distinctness: Distinctness::NonDistinct,
values: vec![],
window: None,
non_from_clause_subqueries: vec![],
};
prepare_window_subquery(
&mut inner_plan,
resolver,
table_ref_counter,
windows,
processed_window_count + 1,
)?;
let subquery = JoinedTable::new_subquery(
format!("window_subquery_{processed_window_count}"),
inner_plan,
None,
subquery_id,
)?;
// Verify that the subquery has the expected database ID.
// This is required to ensure that assumptions in `rewrite_terminal_expr` are valid.
assert_eq!(
subquery.database_id, SUBQUERY_DATABASE_ID,
"expected subquery database id to be {SUBQUERY_DATABASE_ID}"
);
outer_plan.window = Some(current_window);
outer_plan.table_references.add_joined_table(subquery);
Ok(())
}
fn append_order_by(
plan: &mut SelectPlan,
expr: &mut Expr,
sort_order: &SortOrder,
ctx: &mut WindowSubqueryContext,
) -> crate::Result<()> {
ctx.subquery_order_by
.push((Box::new(expr.clone()), *sort_order));
let contains_aggregates =
resolve_window_and_aggregate_functions(expr, ctx.resolver, &mut plan.aggregates, None)?;
rewrite_expr_as_subquery_column(expr, ctx, contains_aggregates);
Ok(())
}
fn rewrite_terminal_expr(
aggregates: &mut Vec<Aggregate>,
top_level_expr: &mut Expr,
current_window: &mut Window,
ctx: &mut WindowSubqueryContext,
) -> crate::Result<WalkControl> {
walk_expr_mut(
top_level_expr,
&mut |expr: &mut Expr| -> crate::Result<WalkControl> {
match expr {
Expr::FunctionCall { filter_over, .. }
| Expr::FunctionCallStar { filter_over, .. } => {
if filter_over.over_clause.is_none() {
// If the expression is a standard aggregate (non-window), push it down
// to the subquery.
if aggregates
.iter()
.any(|a| exprs_are_equivalent(&a.original_expr, expr))
{
rewrite_expr_as_subquery_column(expr, ctx, true);
}
} else if let Some(window_function) = current_window
.functions
.iter_mut()
.find(|f| exprs_are_equivalent(&f.original_expr, expr))
{
// If the expression is a window function tied to the current window,
// do not push it to the subquery. Instead, rewrite it so its child
// expressions reference the subquery where needed.
rewrite_expr_referencing_current_window(
aggregates,
current_window
.name
.clone()
.expect("current_window must always have a name here"),
ctx,
expr,
)?;
window_function.original_expr = expr.clone();
// At this point, the expression and all its children now reference the subquery,
// so further traversal is unnecessary.
return Ok(WalkControl::SkipChildren);
} else {
// This is a window function referencing a different window (not the current one).
// Push the entire expression to the subquery; it will be rewritten later.
rewrite_expr_as_subquery_column(expr, ctx, false);
}
}
Expr::RowId { .. } | Expr::Column { .. } => {
rewrite_expr_as_subquery_column(expr, ctx, false);
}
_ => {}
}
Ok(WalkControl::Continue)
},
)
}
fn rewrite_expr_referencing_current_window(
aggregates: &mut Vec<Aggregate>,
window_name: String,
ctx: &mut WindowSubqueryContext,
expr: &mut Expr,
) -> crate::Result<()> {
fn normalize_over_clause(filter_over: &mut FunctionTail, window_name: &str) {
// FILTER clause is not supported yet. Proper checks elsewhere return appropriate
// error messages, and this ensures that nothing slips through unnoticed.
assert!(
filter_over.filter_clause.is_none(),
"FILTER in window functions is not supported"
);
// Replace inline OVER clause with a reference to the named window.
// The window name may be user-provided or planner-generated.
*filter_over = FunctionTail {
filter_clause: None,
over_clause: Some(Over::Name(Name::exact(window_name.to_string()))),
};
}
match expr {
Expr::FunctionCall {
name: _,
distinctness: _,
args,
order_by,
filter_over,
} => {
for arg in args.iter_mut() {
let contains_aggregates =
resolve_window_and_aggregate_functions(arg, ctx.resolver, aggregates, None)?;
rewrite_expr_as_subquery_column(arg, ctx, contains_aggregates);
}
assert!(
order_by.is_empty(),
"ORDER BY in window functions is not supported"
);
normalize_over_clause(filter_over, &window_name);
}
Expr::FunctionCallStar {
filter_over,
name: _,
} => {
normalize_over_clause(filter_over, &window_name);
}
_ => unreachable!("only functions can reference windows"),
}
Ok(())
}
/// Rewrites an expression into a reference to a subquery column.
/// If the expression was already pushed down, reuses the existing column index.
/// Otherwise, adds it as a new column in the subquery's result set.
fn rewrite_expr_as_subquery_column(
expr: &mut Expr,
ctx: &mut WindowSubqueryContext,
contains_aggregates: bool,
) {
let (column_idx, existing) = match ctx
.subquery_result_columns
.iter()
.position(|col| exprs_are_equivalent(&col.expr, expr))
{
Some(pos) => (pos, true),
None => (ctx.subquery_result_columns.len(), false),
};
let subquery_ref = Expr::Column {
database: Some(SUBQUERY_DATABASE_ID),
table: *ctx.subquery_id,
column: column_idx,
is_rowid_alias: false,
};
if existing {
*expr = subquery_ref;
} else {
let subquery_expr = mem::replace(expr, subquery_ref);
ctx.subquery_result_columns.push(ResultSetColumn {
expr: subquery_expr,
alias: None,
contains_aggregates,
});
}
}
#[derive(Debug)]
pub struct WindowMetadata<'a> {
pub labels: WindowLabels,
pub registers: WindowRegisters,
pub cursors: WindowCursors,
/// Number of input columns in the source subquery.
pub src_column_count: usize,
/// Maps expressions in the current query that reference subquery columns
/// to their corresponding column indexes in the subquerys result.
pub expressions_referencing_subquery: Vec<(&'a Expr, usize)>,
pub buffer_table_name: String,
}
#[derive(Debug)]
pub struct WindowLabels {
/// Address of the subroutine for flushing buffered rows
pub flush_buffer: BranchOffset,
/// Address of the end of window processing
pub window_processing_end: BranchOffset,
}
#[derive(Debug)]
pub struct WindowRegisters {
/// Stores the ROWID of the last row inserted into the buffer table.
/// If NULL, we are before inserting the first row of a new partition.
pub rowid: usize,
/// Start of the register array storing partition key values for the current partition.
pub partition_start: Option<usize>,
/// Start of the register array storing accumulator states for each window function
/// (populated by `AggStep` during aggregation).
pub acc_start: usize,
/// Start of the register array storing current accumulator results for each window function
/// (populated by `AggValue` when computing results without clearing accumulators).
pub acc_result_start: usize,
/// Stores the address to which control returns after all buffered rows are flushed.
pub flush_buffer_return_offset: usize,
/// Start of consecutive registers containing column values for the current row
/// read from the subquery.
pub src_columns_start: usize,
/// Start of the register array storing column values that need to be propagated
/// from the subquery to the parent query.
pub result_columns_start: usize,
/// Start of the register array holding ORDER BY column values for the current row.
/// These registers are used to detect whether the current row is a "peer"
/// (i.e., has identical ORDER BY values to the previous row).
pub new_order_by_columns_start: Option<usize>,
/// Start of the register array holding ORDER BY column values from the previous row.
/// These are used to compare against the current row to determine peer relationships.
pub prev_order_by_columns_start: Option<usize>,
}
#[derive(Debug)]
pub struct WindowCursors {
/// Cursor used to read from the ephemeral buffer table
pub buffer_read: CursorID,
/// Cursor used to write to the ephemeral buffer table
pub buffer_write: CursorID,
}
pub fn init_window<'a>(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx<'a>,
window: &'a Window,
plan: &SelectPlan,
result_columns: &'a [ResultSetColumn],
order_by: &'a [(Box<Expr>, SortOrder)],
) -> crate::Result<()> {
let joined_tables = &plan.joined_tables();
assert_eq!(joined_tables.len(), 1, "expected only one joined table");
let src_table = &joined_tables[0];
let reg_src_columns_start =
if let Table::FromClauseSubquery(from_clause_subquery) = &src_table.table {
from_clause_subquery
.result_columns_start_reg
.expect("Subquery result_columns_start_reg must be set")
} else {
panic!(
"expected source table to be a FromClauseSubquery, but got: {:?}",
src_table.table
);
};
let src_columns = src_table.columns().to_vec();
let src_column_count = src_columns.len();
let window_name = window.name.clone().expect("window name is missing");
let partition_by_len = window.partition_by.len();
let order_by_len = window.order_by.len();
let window_function_count = window.functions.len();
// An ephemeral table used to buffer rows for the current frame
let buffer_table = Arc::new(BTreeTable {
root_page: 0,
// TODO: Generating the name this way may cause collisions with real tables in the
// attached database. Other ephemeral tables are created similarly, so its left
// as-is for now. Ideally, there should be a way to mark tables as ephemeral so
// they can be handled differently from regular tables.
name: format!("buffer_table_{window_name}"),
has_rowid: true,
primary_key_columns: vec![],
columns: src_columns,
is_strict: false,
unique_sets: vec![],
has_autoincrement: false,
foreign_keys: vec![],
});
let cursor_buffer_read = program.alloc_cursor_id(CursorType::BTreeTable(buffer_table.clone()));
let cursor_buffer_write = program.alloc_cursor_id(CursorType::BTreeTable(buffer_table.clone()));
program.emit_insn(Insn::OpenEphemeral {
cursor_id: cursor_buffer_read,
is_table: true,
});
program.emit_insn(Insn::OpenDup {
original_cursor_id: cursor_buffer_read,
new_cursor_id: cursor_buffer_write,
});
// Window function processing is similar to aggregation processing in how results are mapped
// to registers. Each function expression is stored in `expr_to_reg_cache` along with its
// result register. Later, when bytecode generation encounters the expression, the value is
// copied from the result register instead of generating code to evaluate the expression.
let reg_acc_start = program.alloc_registers(window_function_count);
let reg_acc_result_start = program.alloc_registers(window_function_count);
for (i, func) in window.functions.iter().enumerate() {
t_ctx
.resolver
.expr_to_reg_cache
.push((&func.original_expr, reg_acc_result_start + i));
}
// The same approach applies to expressions referencing the subquery (columns).
// Instead of reading directly from the subquery, we redirect them to the corresponding
// result registers. This is necessary because rows are buffered in an ephemeral table and
// returned according to the rules of the window definition.
let expressions_referencing_subquery =
collect_expressions_referencing_subquery(result_columns, order_by, &src_table.internal_id)?;
let reg_col_start = program.alloc_registers(expressions_referencing_subquery.len());
for (i, (expr, _)) in expressions_referencing_subquery.iter().enumerate() {
t_ctx
.resolver
.expr_to_reg_cache
.push((expr, reg_col_start + i));
}
t_ctx.meta_window = Some(WindowMetadata {
labels: WindowLabels {
flush_buffer: program.allocate_label(),
window_processing_end: program.allocate_label(),
},
registers: WindowRegisters {
rowid: program.alloc_registers_and_init_w_null(1),
partition_start: if partition_by_len > 0 {
Some(program.alloc_registers_and_init_w_null(partition_by_len))
} else {
None
},
acc_start: reg_acc_start,
acc_result_start: reg_acc_result_start,
flush_buffer_return_offset: program.alloc_register(),
src_columns_start: reg_src_columns_start,
result_columns_start: reg_col_start,
prev_order_by_columns_start: alloc_optional_registers(program, order_by_len),
new_order_by_columns_start: alloc_optional_registers(program, order_by_len),
},
cursors: WindowCursors {
buffer_read: cursor_buffer_read,
buffer_write: cursor_buffer_write,
},
src_column_count,
expressions_referencing_subquery,
buffer_table_name: buffer_table.name.clone(),
});
Ok(())
}
fn alloc_optional_registers(program: &mut ProgramBuilder, count: usize) -> Option<usize> {
if count > 0 {
Some(program.alloc_registers(count))
} else {
None
}
}
fn collect_expressions_referencing_subquery<'a>(
result_columns: &'a [ResultSetColumn],
order_by: &'a [(Box<Expr>, SortOrder)],
subquery_id: &TableInternalId,
) -> crate::Result<Vec<(&'a Expr, usize)>> {
let mut expressions_referencing_subquery: Vec<(&'a Expr, usize)> = Vec::new();
for root_expr in result_columns
.iter()
.map(|col| &col.expr)
.chain(order_by.iter().map(|(e, _)| e.as_ref()))
{
walk_expr(
root_expr,
&mut |expr: &Expr| -> crate::Result<WalkControl> {
match expr {
Expr::FunctionCall { filter_over, .. }
| Expr::FunctionCallStar { filter_over, .. } => {
if filter_over.over_clause.is_some() {
return Ok(WalkControl::SkipChildren);
}
}
Expr::Column { column, table, .. } => {
assert_eq!(
table, subquery_id,
"only subquery columns can be referenced"
);
if expressions_referencing_subquery
.iter()
.all(|(_, existing_column)| column != existing_column)
{
expressions_referencing_subquery.push((expr, *column));
}
}
_ => {}
};
Ok(WalkControl::Continue)
},
)?;
}
Ok(expressions_referencing_subquery)
}
/// Emits bytecode to process a single row of the windows input (always a subquery).
///
/// Note:
/// The **buffer table** mentioned below is an ephemeral B-tree that temporarily
/// stores rows for the current window frame.
///
/// High-level overview:
/// - Each row from the subquery is read, and its ORDER BY columns are loaded into
/// dedicated registers for comparison and partitioning purposes.
/// - If the row starts a new partition (based on PARTITION BY columns), the buffer
/// and accumulators are flushed or reset as needed.
/// - Rows are compared against the previous row to determine if they are "peers"
/// (i.e., have the same ORDER BY values). Non-peer rows may trigger flushing
/// of intermediate results.
/// - The row is then inserted into the windows buffer table.
/// - Aggregate steps for any window functions are executed.
pub fn emit_window_loop_source(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
plan: &SelectPlan,
) -> crate::Result<()> {
let WindowMetadata {
labels,
registers,
cursors,
src_column_count: input_column_count,
buffer_table_name,
..
} = t_ctx.meta_window.as_ref().expect("missing window metadata");
let window = plan.window.as_ref().expect("missing window");
emit_load_order_by_columns(program, window, registers);
emit_flush_buffer_if_new_partition(program, labels, registers, window, plan)?;
emit_reset_state_if_new_partition(program, registers, window);
emit_flush_buffer_if_not_peer(program, labels, registers, window, plan)?;
emit_insert_row_into_buffer(
program,
registers,
cursors,
input_column_count,
buffer_table_name,
);
emit_aggregation_step(program, window, &t_ctx.resolver, plan, registers)?;
Ok(())
}
fn emit_flush_buffer_if_new_partition(
program: &mut ProgramBuilder,
labels: &WindowLabels,
registers: &WindowRegisters,
window: &Window,
plan: &SelectPlan,
) -> Result<()> {
if let Some(reg_partition_start) = registers.partition_start {
let same_partition_label = program.allocate_label();
let new_partition_label = program.allocate_label();
// Compare the first `deduplicated_partition_by_len` source columns with the saved
// partition keys. If they differ, this row starts a new partition and we flush the buffer.
let partition_by_len = window
.deduplicated_partition_by_len
.expect("deduplicated_partition_by_len must exist");
program.add_comment(
program.offset(),
"compare partition keys to detect new partition",
);
let mut compare_key_info = (0..window.partition_by.len())
.map(|_| KeyInfo {
sort_order: SortOrder::Asc,
collation: CollationSeq::default(),
})
.collect::<Vec<_>>();
for (i, c) in compare_key_info
.iter_mut()
.enumerate()
.take(window.partition_by.len())
{
let maybe_collation =
get_collseq_from_expr(&window.partition_by[i], &plan.table_references)?;
c.collation = maybe_collation.unwrap_or_default();
}
program.emit_insn(Insn::Compare {
start_reg_a: registers.src_columns_start,
start_reg_b: reg_partition_start,
count: partition_by_len,
key_info: compare_key_info,
});
program.emit_insn(Insn::Jump {
target_pc_lt: new_partition_label,
target_pc_eq: same_partition_label,
target_pc_gt: new_partition_label,
});
program.resolve_label(new_partition_label, program.offset());
program.add_comment(program.offset(), "detected new partition");
program.emit_insn(Insn::Gosub {
target_pc: labels.flush_buffer,
return_reg: registers.flush_buffer_return_offset,
});
// Reset rowid to signal the start of processing a new partition.
program.emit_insn(Insn::Null {
dest: registers.rowid,
dest_end: None,
});
program.emit_insn(Insn::Copy {
src_reg: registers.src_columns_start,
dst_reg: reg_partition_start,
extra_amount: partition_by_len - 1,
});
program.resolve_label(same_partition_label, program.offset());
}
Ok(())
}
fn emit_reset_state_if_new_partition(
program: &mut ProgramBuilder,
registers: &WindowRegisters,
window: &Window,
) {
let label_skip_reset_state = program.allocate_label();
// If rowid is null, it means we are starting a new partition. It was either set by the code
// initializing window processing or by code detecting the start of a new partition.
program.emit_insn(Insn::NotNull {
reg: registers.rowid,
target_pc: label_skip_reset_state,
});
if let Some(dst_reg_start) = registers.new_order_by_columns_start {
// Initialize previous ORDER BY values for the new partition. The first row of the
// partition is compared to itself, not to the row from the previous partition.
program.add_comment(
program.offset(),
"initialize previous peer register for new partition",
);
program.emit_insn(Insn::Copy {
src_reg: dst_reg_start,
dst_reg: registers
.prev_order_by_columns_start
.expect("prev_order_by_columns_start must exist"),
extra_amount: window.order_by.len() - 1,
});
}
// Since this is a new partition, we must reset accumulator registers.
program.add_comment(program.offset(), "reset accumulator registers");
program.emit_insn(Insn::Null {
dest: registers.acc_start,
dest_end: Some(registers.acc_start + window.functions.len() - 1),
});
program.preassign_label_to_next_insn(label_skip_reset_state);
}
fn emit_flush_buffer_if_not_peer(
program: &mut ProgramBuilder,
labels: &WindowLabels,
registers: &WindowRegisters,
window: &Window,
plan: &SelectPlan,
) -> Result<()> {
if let Some(reg_new_order_by_columns_start) = registers.new_order_by_columns_start {
let label_peer = program.allocate_label();
let label_not_peer = program.allocate_label();
let order_by_len = window.order_by.len();
let reg_prev_order_by_columns_start = registers
.prev_order_by_columns_start
.expect("prev_order_by_columns_start must exist");
program.add_comment(program.offset(), "compare ORDER BY columns to detect peer");
let mut compare_key_info = (0..window.order_by.len())
.map(|_| KeyInfo {
sort_order: SortOrder::Asc,
collation: CollationSeq::default(),
})
.collect::<Vec<_>>();
for (i, c) in compare_key_info
.iter_mut()
.enumerate()
.take(window.order_by.len())
{
let maybe_collation =
get_collseq_from_expr(&window.order_by[i].0, &plan.table_references)?;
c.collation = maybe_collation.unwrap_or_default();
}
program.emit_insn(Insn::Compare {
start_reg_a: reg_prev_order_by_columns_start,
start_reg_b: reg_new_order_by_columns_start,
count: order_by_len,
key_info: compare_key_info,
});
program.emit_insn(Insn::Jump {
target_pc_lt: label_not_peer,
target_pc_eq: label_peer,
target_pc_gt: label_not_peer,
});
program.resolve_label(label_not_peer, program.offset());
program.add_comment(program.offset(), "detected non-peer row");
program.emit_insn(Insn::Gosub {
target_pc: labels.flush_buffer,
return_reg: registers.flush_buffer_return_offset,
});
program.emit_insn(Insn::Copy {
src_reg: reg_new_order_by_columns_start,
dst_reg: reg_prev_order_by_columns_start,
extra_amount: order_by_len - 1,
});
program.resolve_label(label_peer, program.offset());
}
Ok(())
}
fn emit_load_order_by_columns(
program: &mut ProgramBuilder,
window: &Window,
registers: &WindowRegisters,
) {
if let Some(reg_new_order_by_columns_start) = registers.new_order_by_columns_start {
// Source columns are deduplicated and may appear in a different order than
// the ORDER BY terms. Therefore, we must restore the original ORDER BY layout
// here by copying the values into an array of registers.
for (i, (expr, _)) in window.order_by.iter().enumerate() {
match expr {
Expr::Column { column, .. } => {
program.emit_insn(Insn::Copy {
src_reg: registers.src_columns_start + column,
dst_reg: reg_new_order_by_columns_start + i,
extra_amount: 0,
});
}
_ => unreachable!("expected Column, got {:?}", expr),
}
}
}
}
fn emit_insert_row_into_buffer(
program: &mut ProgramBuilder,
registers: &WindowRegisters,
cursors: &WindowCursors,
input_column_count: &usize,
table_name: &str,
) {
let reg_record = program.alloc_register();
program.emit_insn(Insn::MakeRecord {
start_reg: registers.src_columns_start,
count: *input_column_count,
dest_reg: reg_record,
index_name: None,
affinity_str: None,
});
program.emit_insn(Insn::NewRowid {
cursor: cursors.buffer_write,
rowid_reg: registers.rowid,
prev_largest_reg: 0,
});
program.emit_insn(Insn::Insert {
cursor: cursors.buffer_write,
key_reg: registers.rowid,
record_reg: reg_record,
flag: InsertFlags::new(),
table_name: table_name.to_string(),
});
}
fn emit_aggregation_step(
program: &mut ProgramBuilder,
window: &Window,
resolver: &Resolver,
plan: &SelectPlan,
registers: &WindowRegisters,
) -> crate::Result<()> {
for (i, func) in window.functions.iter().enumerate() {
// The aggregation step is performed incrementally as each row from the subquery is
// processed. Therefore, we dont need to access the buffer table and can obtain argument
// values directly by evaluating the expressions that reference the subquery result columns.
let args = match &func.original_expr {
Expr::FunctionCall { args, .. } => args.iter().map(|a| (**a).clone()).collect(),
Expr::FunctionCallStar { .. } => vec![],
_ => unreachable!("All window functions should be either FunctionCall or FunctionCallStar expressions"),
};
let reg_acc_start = registers.acc_start + i;
translate_aggregation_step(
program,
&plan.table_references,
AggArgumentSource::new_from_expression(&func.func, &args, &Distinctness::NonDistinct),
reg_acc_start,
resolver,
)?;
}
Ok(())
}
/// Emits bytecode to output all buffered rows produced by window processing.
///
/// The generated code has two possible entry points:
/// * **Fallthrough mode** (normal flow): After all source rows have been processed,
/// this code executes inline to flush any remaining buffered rows, then continues execution.
/// * **Subroutine mode** (jump into `labels.flush_buffer`): In this case the code
/// returns control to the address stored in `registers.flush_buffer_return_offset`
/// once all buffered rows are processed.
pub fn emit_window_results(
program: &mut ProgramBuilder,
t_ctx: &mut TranslateCtx,
plan: &SelectPlan,
) -> crate::Result<()> {
let WindowMetadata {
labels,
registers,
cursors,
..
} = t_ctx.meta_window.as_ref().expect("missing window metadata");
let window = plan.window.as_ref().expect("missing window");
let label_empty = program.allocate_label();
let label_window_processing_end = labels.window_processing_end;
let reg_flush_buffer_return_offset = registers.flush_buffer_return_offset;
let cursor_buffer_read = cursors.buffer_read;
// All source rows have already been processed at this point.
// In fallthrough mode, we are not returning to a caller — we just flush
// the buffered rows and continue execution.
program.add_comment(program.offset(), "return remaining buffered rows");
program.emit_insn(Insn::Null {
dest: registers.flush_buffer_return_offset,
dest_end: None,
});
// If control jumps here (labels.flush_buffer), we are in subroutine mode.
// In that case, after flushing the buffer, execution will return to the
// address stored in `flush_buffer_return_offset`.
program.preassign_label_to_next_insn(labels.flush_buffer);
program.emit_insn(Insn::Rewind {
cursor_id: cursor_buffer_read,
pc_if_empty: label_empty,
});
emit_return_buffered_rows(program, window, t_ctx, plan)?;
program.resolve_label(label_empty, program.offset());
program.emit_insn(Insn::ResetSorter {
cursor_id: cursor_buffer_read,
});
program.emit_insn(Insn::Return {
return_reg: reg_flush_buffer_return_offset,
can_fallthrough: true,
});
program.preassign_label_to_next_insn(label_window_processing_end);
Ok(())
}
fn emit_return_buffered_rows(
program: &mut ProgramBuilder,
window: &Window,
t_ctx: &mut TranslateCtx,
plan: &SelectPlan,
) -> crate::Result<()> {
let WindowMetadata {
labels,
registers,
cursors,
expressions_referencing_subquery,
..
} = t_ctx.meta_window.as_ref().expect("missing window metadata");
for (i, func) in window.functions.iter().enumerate() {
program.emit_insn(Insn::AggValue {
acc_reg: registers.acc_start + i,
dest_reg: registers.acc_result_start + i,
func: func.func.clone(),
});
}
let label_skip_returning_row = program.allocate_label();
let label_loop_start = program.allocate_label();
program.preassign_label_to_next_insn(label_loop_start);
// Propagate subquery result column values to the outer query (if any) or directly to
// the final output that will be returned to the user, by copying them from the buffer table
// into the dedicated registers.
for (i, (_, col_idx)) in expressions_referencing_subquery.iter().enumerate() {
let reg_result = registers.result_columns_start + i;
program.emit_column_or_rowid(cursors.buffer_read, *col_idx, reg_result);
}
t_ctx.resolver.enable_expr_to_reg_cache();
match plan.order_by.is_empty() {
true => {
emit_select_result(
program,
&t_ctx.resolver,
plan,
Some(labels.window_processing_end),
Some(label_skip_returning_row),
t_ctx.reg_nonagg_emit_once_flag,
t_ctx.reg_offset,
t_ctx.reg_result_cols_start.unwrap(),
t_ctx.limit_ctx,
)?;
}
false => {
order_by_sorter_insert(program, t_ctx, plan)?;
}
}
program.resolve_label(label_skip_returning_row, program.offset());
if let Distinctness::Distinct { ctx } = &plan.distinctness {
let distinct_ctx = ctx.as_ref().expect("distinct context must exist");
program.preassign_label_to_next_insn(distinct_ctx.label_on_conflict);
}
program.emit_insn(Insn::Next {
cursor_id: cursors.buffer_read,
pc_if_next: label_loop_start,
});
Ok(())
}