mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-26 12:34:22 +01:00
526 lines
24 KiB
Rust
526 lines
24 KiB
Rust
use super::emitter::{emit_program, TranslateCtx};
|
|
use super::plan::{select_star, JoinOrderMember, Operation, Search, SelectQueryType};
|
|
use super::planner::Scope;
|
|
use crate::function::{AggFunc, ExtFunc, Func};
|
|
use crate::translate::optimizer::optimize_plan;
|
|
use crate::translate::plan::{Aggregate, GroupBy, Plan, ResultSetColumn, SelectPlan};
|
|
use crate::translate::planner::{
|
|
bind_column_references, break_predicate_at_and_boundaries, parse_from, parse_limit,
|
|
parse_where, resolve_aggregates,
|
|
};
|
|
use crate::util::normalize_ident;
|
|
use crate::vdbe::builder::{ProgramBuilderOpts, QueryMode};
|
|
use crate::vdbe::insn::Insn;
|
|
use crate::SymbolTable;
|
|
use crate::{schema::Schema, vdbe::builder::ProgramBuilder, Result};
|
|
use limbo_sqlite3_parser::ast::{self, SortOrder};
|
|
use limbo_sqlite3_parser::ast::{ResultColumn, SelectInner};
|
|
|
|
pub fn translate_select(
|
|
query_mode: QueryMode,
|
|
schema: &Schema,
|
|
select: ast::Select,
|
|
syms: &SymbolTable,
|
|
) -> Result<ProgramBuilder> {
|
|
let mut select_plan = prepare_select_plan(schema, select, syms, None)?;
|
|
optimize_plan(&mut select_plan, schema)?;
|
|
let Plan::Select(ref select) = select_plan else {
|
|
panic!("select_plan is not a SelectPlan");
|
|
};
|
|
|
|
let mut program = ProgramBuilder::new(ProgramBuilderOpts {
|
|
query_mode,
|
|
num_cursors: count_plan_required_cursors(select),
|
|
approx_num_insns: estimate_num_instructions(select),
|
|
approx_num_labels: estimate_num_labels(select),
|
|
});
|
|
emit_program(&mut program, select_plan, syms)?;
|
|
Ok(program)
|
|
}
|
|
|
|
pub fn prepare_select_plan<'a>(
|
|
schema: &Schema,
|
|
select: ast::Select,
|
|
syms: &SymbolTable,
|
|
outer_scope: Option<&'a Scope<'a>>,
|
|
) -> Result<Plan> {
|
|
match *select.body.select {
|
|
ast::OneSelect::Select(select_inner) => {
|
|
let SelectInner {
|
|
mut columns,
|
|
from,
|
|
where_clause,
|
|
group_by,
|
|
..
|
|
} = *select_inner;
|
|
let col_count = columns.len();
|
|
if col_count == 0 {
|
|
crate::bail_parse_error!("SELECT without columns is not allowed");
|
|
}
|
|
|
|
let mut where_predicates = vec![];
|
|
|
|
let with = select.with;
|
|
|
|
// Parse the FROM clause into a vec of TableReferences. Fold all the join conditions expressions into the WHERE clause.
|
|
let table_references =
|
|
parse_from(schema, from, syms, with, &mut where_predicates, outer_scope)?;
|
|
|
|
// Preallocate space for the result columns
|
|
let result_columns = Vec::with_capacity(
|
|
columns
|
|
.iter()
|
|
.map(|c| match c {
|
|
// Allocate space for all columns in all tables
|
|
ResultColumn::Star => {
|
|
table_references.iter().map(|t| t.columns().len()).sum()
|
|
}
|
|
// Guess 5 columns if we can't find the table using the identifier (maybe it's in [brackets] or `tick_quotes`, or miXeDcAse)
|
|
ResultColumn::TableStar(n) => table_references
|
|
.iter()
|
|
.find(|t| t.identifier == n.0)
|
|
.map(|t| t.columns().len())
|
|
.unwrap_or(5),
|
|
// Otherwise allocate space for 1 column
|
|
ResultColumn::Expr(_, _) => 1,
|
|
})
|
|
.sum(),
|
|
);
|
|
|
|
let mut plan = SelectPlan {
|
|
join_order: table_references
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, t)| JoinOrderMember {
|
|
table_no: i,
|
|
is_outer: t.join_info.as_ref().map_or(false, |j| j.outer),
|
|
})
|
|
.collect(),
|
|
table_references,
|
|
result_columns,
|
|
where_clause: where_predicates,
|
|
group_by: None,
|
|
order_by: None,
|
|
aggregates: vec![],
|
|
limit: None,
|
|
offset: None,
|
|
contains_constant_false_condition: false,
|
|
query_type: SelectQueryType::TopLevel,
|
|
};
|
|
|
|
let mut aggregate_expressions = Vec::new();
|
|
for column in columns.iter_mut() {
|
|
match column {
|
|
ResultColumn::Star => {
|
|
select_star(&plan.table_references, &mut plan.result_columns);
|
|
for table in plan.table_references.iter_mut() {
|
|
for idx in 0..table.columns().len() {
|
|
table.mark_column_used(idx);
|
|
}
|
|
}
|
|
}
|
|
ResultColumn::TableStar(name) => {
|
|
let name_normalized = normalize_ident(name.0.as_str());
|
|
let referenced_table = plan
|
|
.table_references
|
|
.iter_mut()
|
|
.enumerate()
|
|
.find(|(_, t)| t.identifier == name_normalized);
|
|
|
|
if referenced_table.is_none() {
|
|
crate::bail_parse_error!("Table {} not found", name.0);
|
|
}
|
|
let (table_index, table) = referenced_table.unwrap();
|
|
let num_columns = table.columns().len();
|
|
for idx in 0..num_columns {
|
|
let is_rowid_alias = {
|
|
let columns = table.columns();
|
|
columns[idx].is_rowid_alias
|
|
};
|
|
plan.result_columns.push(ResultSetColumn {
|
|
expr: ast::Expr::Column {
|
|
database: None, // TODO: support different databases
|
|
table: table_index,
|
|
column: idx,
|
|
is_rowid_alias,
|
|
},
|
|
alias: None,
|
|
contains_aggregates: false,
|
|
});
|
|
table.mark_column_used(idx);
|
|
}
|
|
}
|
|
ResultColumn::Expr(ref mut expr, maybe_alias) => {
|
|
bind_column_references(
|
|
expr,
|
|
&mut plan.table_references,
|
|
Some(&plan.result_columns),
|
|
)?;
|
|
match expr {
|
|
ast::Expr::FunctionCall {
|
|
name,
|
|
distinctness: _,
|
|
args,
|
|
filter_over: _,
|
|
order_by: _,
|
|
} => {
|
|
let args_count = if let Some(args) = &args {
|
|
args.len()
|
|
} else {
|
|
0
|
|
};
|
|
match Func::resolve_function(
|
|
normalize_ident(name.0.as_str()).as_str(),
|
|
args_count,
|
|
) {
|
|
Ok(Func::Agg(f)) => {
|
|
let agg_args = match (args, &f) {
|
|
(None, crate::function::AggFunc::Count0) => {
|
|
// COUNT() case
|
|
vec![ast::Expr::Literal(ast::Literal::Numeric(
|
|
"1".to_string(),
|
|
))]
|
|
}
|
|
(None, _) => crate::bail_parse_error!(
|
|
"Aggregate function {} requires arguments",
|
|
name.0
|
|
),
|
|
(Some(args), _) => args.clone(),
|
|
};
|
|
|
|
let agg = Aggregate {
|
|
func: f,
|
|
args: agg_args.clone(),
|
|
original_expr: expr.clone(),
|
|
};
|
|
aggregate_expressions.push(agg.clone());
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| match alias {
|
|
ast::As::Elided(alias) => alias.0.clone(),
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates: true,
|
|
});
|
|
}
|
|
Ok(_) => {
|
|
let contains_aggregates =
|
|
resolve_aggregates(expr, &mut aggregate_expressions);
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| match alias {
|
|
ast::As::Elided(alias) => alias.0.clone(),
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates,
|
|
});
|
|
}
|
|
Err(e) => {
|
|
if let Some(f) = syms.resolve_function(&name.0, args_count)
|
|
{
|
|
if let ExtFunc::Scalar(_) = f.as_ref().func {
|
|
let contains_aggregates = resolve_aggregates(
|
|
expr,
|
|
&mut aggregate_expressions,
|
|
);
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| {
|
|
match alias {
|
|
ast::As::Elided(alias) => {
|
|
alias.0.clone()
|
|
}
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates,
|
|
});
|
|
} else {
|
|
let agg = Aggregate {
|
|
func: AggFunc::External(f.func.clone().into()),
|
|
args: args.as_ref().unwrap().clone(),
|
|
original_expr: expr.clone(),
|
|
};
|
|
aggregate_expressions.push(agg.clone());
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| {
|
|
match alias {
|
|
ast::As::Elided(alias) => {
|
|
alias.0.clone()
|
|
}
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates: true,
|
|
});
|
|
}
|
|
continue; // Continue with the normal flow instead of returning
|
|
} else {
|
|
return Err(e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ast::Expr::FunctionCallStar {
|
|
name,
|
|
filter_over: _,
|
|
} => {
|
|
if let Ok(Func::Agg(f)) = Func::resolve_function(
|
|
normalize_ident(name.0.as_str()).as_str(),
|
|
0,
|
|
) {
|
|
let agg = Aggregate {
|
|
func: f,
|
|
args: vec![ast::Expr::Literal(ast::Literal::Numeric(
|
|
"1".to_string(),
|
|
))],
|
|
original_expr: expr.clone(),
|
|
};
|
|
aggregate_expressions.push(agg.clone());
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| match alias {
|
|
ast::As::Elided(alias) => alias.0.clone(),
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates: true,
|
|
});
|
|
} else {
|
|
crate::bail_parse_error!(
|
|
"Invalid aggregate function: {}",
|
|
name.0
|
|
);
|
|
}
|
|
}
|
|
expr => {
|
|
let contains_aggregates =
|
|
resolve_aggregates(expr, &mut aggregate_expressions);
|
|
plan.result_columns.push(ResultSetColumn {
|
|
alias: maybe_alias.as_ref().map(|alias| match alias {
|
|
ast::As::Elided(alias) => alias.0.clone(),
|
|
ast::As::As(alias) => alias.0.clone(),
|
|
}),
|
|
expr: expr.clone(),
|
|
contains_aggregates,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Parse the actual WHERE clause and add its conditions to the plan WHERE clause that already contains the join conditions.
|
|
parse_where(
|
|
where_clause,
|
|
&mut plan.table_references,
|
|
Some(&plan.result_columns),
|
|
&mut plan.where_clause,
|
|
)?;
|
|
|
|
if let Some(mut group_by) = group_by {
|
|
for expr in group_by.exprs.iter_mut() {
|
|
replace_column_number_with_copy_of_column_expr(expr, &plan.result_columns)?;
|
|
bind_column_references(
|
|
expr,
|
|
&mut plan.table_references,
|
|
Some(&plan.result_columns),
|
|
)?;
|
|
}
|
|
|
|
plan.group_by = Some(GroupBy {
|
|
sort_order: Some((0..group_by.exprs.len()).map(|_| SortOrder::Asc).collect()),
|
|
exprs: group_by.exprs,
|
|
having: if let Some(having) = group_by.having {
|
|
let mut predicates = vec![];
|
|
break_predicate_at_and_boundaries(*having, &mut predicates);
|
|
for expr in predicates.iter_mut() {
|
|
bind_column_references(
|
|
expr,
|
|
&mut plan.table_references,
|
|
Some(&plan.result_columns),
|
|
)?;
|
|
let contains_aggregates =
|
|
resolve_aggregates(expr, &mut aggregate_expressions);
|
|
if !contains_aggregates {
|
|
// TODO: sqlite allows HAVING clauses with non aggregate expressions like
|
|
// HAVING id = 5. We should support this too eventually (I guess).
|
|
// sqlite3-parser does not support HAVING without group by though, so we'll
|
|
// need to either make a PR or add it to our vendored version.
|
|
crate::bail_parse_error!(
|
|
"HAVING clause must contain an aggregate function"
|
|
);
|
|
}
|
|
}
|
|
Some(predicates)
|
|
} else {
|
|
None
|
|
},
|
|
});
|
|
}
|
|
|
|
plan.aggregates = aggregate_expressions;
|
|
|
|
// Parse the ORDER BY clause
|
|
if let Some(order_by) = select.order_by {
|
|
let mut key = Vec::new();
|
|
|
|
for mut o in order_by {
|
|
replace_column_number_with_copy_of_column_expr(
|
|
&mut o.expr,
|
|
&plan.result_columns,
|
|
)?;
|
|
|
|
bind_column_references(
|
|
&mut o.expr,
|
|
&mut plan.table_references,
|
|
Some(&plan.result_columns),
|
|
)?;
|
|
resolve_aggregates(&o.expr, &mut plan.aggregates);
|
|
|
|
key.push((o.expr, o.order.unwrap_or(ast::SortOrder::Asc)));
|
|
}
|
|
plan.order_by = Some(key);
|
|
}
|
|
|
|
// Parse the LIMIT/OFFSET clause
|
|
(plan.limit, plan.offset) =
|
|
select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?;
|
|
|
|
// Return the unoptimized query plan
|
|
Ok(Plan::Select(plan))
|
|
}
|
|
_ => todo!(),
|
|
}
|
|
}
|
|
|
|
/// Replaces a column number in an ORDER BY or GROUP BY expression with a copy of the column expression.
|
|
/// For example, in SELECT u.first_name, count(1) FROM users u GROUP BY 1 ORDER BY 2,
|
|
/// the column number 1 is replaced with u.first_name and the column number 2 is replaced with count(1).
|
|
fn replace_column_number_with_copy_of_column_expr(
|
|
order_by_or_group_by_expr: &mut ast::Expr,
|
|
columns: &[ResultSetColumn],
|
|
) -> Result<()> {
|
|
if let ast::Expr::Literal(ast::Literal::Numeric(num)) = order_by_or_group_by_expr {
|
|
let column_number = num.parse::<usize>()?;
|
|
if column_number == 0 {
|
|
crate::bail_parse_error!("invalid column index: {}", column_number);
|
|
}
|
|
let maybe_result_column = columns.get(column_number - 1);
|
|
match maybe_result_column {
|
|
Some(ResultSetColumn { expr, .. }) => {
|
|
*order_by_or_group_by_expr = expr.clone();
|
|
}
|
|
None => {
|
|
crate::bail_parse_error!("invalid column index: {}", column_number)
|
|
}
|
|
};
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn count_plan_required_cursors(plan: &SelectPlan) -> usize {
|
|
let num_table_cursors: usize = plan
|
|
.table_references
|
|
.iter()
|
|
.map(|t| match &t.op {
|
|
Operation::Scan { .. } => 1,
|
|
Operation::Search(search) => match search {
|
|
Search::RowidEq { .. } => 1,
|
|
Search::Seek { index, .. } => 1 + index.is_some() as usize,
|
|
},
|
|
Operation::Subquery { plan, .. } => count_plan_required_cursors(plan),
|
|
})
|
|
.sum();
|
|
let num_sorter_cursors = plan.group_by.is_some() as usize + plan.order_by.is_some() as usize;
|
|
let num_pseudo_cursors = plan.group_by.is_some() as usize + plan.order_by.is_some() as usize;
|
|
|
|
num_table_cursors + num_sorter_cursors + num_pseudo_cursors
|
|
}
|
|
|
|
fn estimate_num_instructions(select: &SelectPlan) -> usize {
|
|
let table_instructions: usize = select
|
|
.table_references
|
|
.iter()
|
|
.map(|t| match &t.op {
|
|
Operation::Scan { .. } => 10,
|
|
Operation::Search(_) => 15,
|
|
Operation::Subquery { plan, .. } => 10 + estimate_num_instructions(plan),
|
|
})
|
|
.sum();
|
|
|
|
let group_by_instructions = select.group_by.is_some() as usize * 10;
|
|
let order_by_instructions = select.order_by.is_some() as usize * 10;
|
|
let condition_instructions = select.where_clause.len() * 3;
|
|
|
|
let num_instructions = 20
|
|
+ table_instructions
|
|
+ group_by_instructions
|
|
+ order_by_instructions
|
|
+ condition_instructions;
|
|
|
|
num_instructions
|
|
}
|
|
|
|
fn estimate_num_labels(select: &SelectPlan) -> usize {
|
|
let init_halt_labels = 2;
|
|
// 3 loop labels for each table in main loop + 1 to signify end of main loop
|
|
let table_labels = select
|
|
.table_references
|
|
.iter()
|
|
.map(|t| match &t.op {
|
|
Operation::Scan { .. } => 3,
|
|
Operation::Search(_) => 3,
|
|
Operation::Subquery { plan, .. } => 3 + estimate_num_labels(plan),
|
|
})
|
|
.sum::<usize>()
|
|
+ 1;
|
|
|
|
let group_by_labels = select.group_by.is_some() as usize * 10;
|
|
let order_by_labels = select.order_by.is_some() as usize * 10;
|
|
let condition_labels = select.where_clause.len() * 2;
|
|
|
|
let num_labels =
|
|
init_halt_labels + table_labels + group_by_labels + order_by_labels + condition_labels;
|
|
|
|
num_labels
|
|
}
|
|
|
|
pub fn emit_simple_count<'a>(
|
|
program: &mut ProgramBuilder,
|
|
_t_ctx: &mut TranslateCtx<'a>,
|
|
plan: &'a SelectPlan,
|
|
) -> Result<()> {
|
|
let cursors = plan
|
|
.table_references
|
|
.get(0)
|
|
.unwrap()
|
|
.resolve_cursors(program)?;
|
|
|
|
let cursor_id = {
|
|
match cursors {
|
|
(_, Some(cursor_id)) | (Some(cursor_id), None) => cursor_id,
|
|
_ => panic!("cursor for table should have been opened"),
|
|
}
|
|
};
|
|
|
|
// TODO: I think this allocation can be avoided if we are smart with the `TranslateCtx`
|
|
let target_reg = program.alloc_register();
|
|
|
|
program.emit_insn(Insn::Count {
|
|
cursor_id,
|
|
target_reg,
|
|
exact: true,
|
|
});
|
|
|
|
program.emit_insn(Insn::Close { cursor_id });
|
|
let output_reg = program.alloc_register();
|
|
program.emit_insn(Insn::Copy {
|
|
src_reg: target_reg,
|
|
dst_reg: output_reg,
|
|
amount: 0,
|
|
});
|
|
program.emit_result_row(output_reg, 1);
|
|
Ok(())
|
|
}
|