mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-29 05:54:21 +01:00
551 lines
19 KiB
Rust
551 lines
19 KiB
Rust
use core::fmt;
|
|
use sqlite3_parser::ast;
|
|
use std::{
|
|
fmt::{Display, Formatter},
|
|
rc::Rc,
|
|
};
|
|
|
|
use crate::{
|
|
function::AggFunc,
|
|
schema::{Column, Index, Table},
|
|
vdbe::BranchOffset,
|
|
Result,
|
|
};
|
|
use crate::{
|
|
schema::{PseudoTable, Type},
|
|
translate::plan::Plan::{Delete, Select},
|
|
};
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct ResultSetColumn {
|
|
pub expr: ast::Expr,
|
|
pub name: String,
|
|
// TODO: encode which aggregates (e.g. index bitmask of plan.aggregates) are present in this column
|
|
pub contains_aggregates: bool,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct GroupBy {
|
|
pub exprs: Vec<ast::Expr>,
|
|
/// having clause split into a vec at 'AND' boundaries.
|
|
pub having: Option<Vec<ast::Expr>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum Plan {
|
|
Select(SelectPlan),
|
|
Delete(DeletePlan),
|
|
}
|
|
|
|
/// The type of the query, either top level or subquery
|
|
#[derive(Debug, Clone)]
|
|
pub enum SelectQueryType {
|
|
TopLevel,
|
|
Subquery {
|
|
/// The register that holds the program offset that handles jumping to/from the subquery.
|
|
yield_reg: usize,
|
|
/// The index of the first instruction in the bytecode that implements the subquery.
|
|
coroutine_implementation_start: BranchOffset,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SelectPlan {
|
|
/// A tree of sources (tables).
|
|
pub source: SourceOperator,
|
|
/// the columns inside SELECT ... FROM
|
|
pub result_columns: Vec<ResultSetColumn>,
|
|
/// where clause split into a vec at 'AND' boundaries.
|
|
pub where_clause: Option<Vec<ast::Expr>>,
|
|
/// group by clause
|
|
pub group_by: Option<GroupBy>,
|
|
/// order by clause
|
|
pub order_by: Option<Vec<(ast::Expr, Direction)>>,
|
|
/// all the aggregates collected from the result columns, order by, and (TODO) having clauses
|
|
pub aggregates: Vec<Aggregate>,
|
|
/// limit clause
|
|
pub limit: Option<usize>,
|
|
/// all the tables referenced in the query
|
|
pub referenced_tables: Vec<TableReference>,
|
|
/// all the indexes available
|
|
pub available_indexes: Vec<Rc<Index>>,
|
|
/// query contains a constant condition that is always false
|
|
pub contains_constant_false_condition: bool,
|
|
/// query type (top level or subquery)
|
|
pub query_type: SelectQueryType,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
#[derive(Debug, Clone)]
|
|
pub struct DeletePlan {
|
|
/// A tree of sources (tables).
|
|
pub source: SourceOperator,
|
|
/// the columns inside SELECT ... FROM
|
|
pub result_columns: Vec<ResultSetColumn>,
|
|
/// where clause split into a vec at 'AND' boundaries.
|
|
pub where_clause: Option<Vec<ast::Expr>>,
|
|
/// order by clause
|
|
pub order_by: Option<Vec<(ast::Expr, Direction)>>,
|
|
/// limit clause
|
|
pub limit: Option<usize>,
|
|
/// all the tables referenced in the query
|
|
pub referenced_tables: Vec<TableReference>,
|
|
/// all the indexes available
|
|
pub available_indexes: Vec<Rc<Index>>,
|
|
/// query contains a constant condition that is always false
|
|
pub contains_constant_false_condition: bool,
|
|
}
|
|
|
|
impl Display for Plan {
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
Select(select_plan) => write!(f, "{}", select_plan.source),
|
|
Delete(delete_plan) => write!(f, "{}", delete_plan.source),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
pub enum IterationDirection {
|
|
Forwards,
|
|
Backwards,
|
|
}
|
|
|
|
impl SourceOperator {
|
|
pub fn select_star(&self, out_columns: &mut Vec<ResultSetColumn>) {
|
|
for (table_index, col, idx) in self.select_star_helper() {
|
|
out_columns.push(ResultSetColumn {
|
|
name: col.name.clone(),
|
|
expr: ast::Expr::Column {
|
|
database: None,
|
|
table: table_index,
|
|
column: idx,
|
|
is_rowid_alias: col.is_rowid_alias,
|
|
},
|
|
contains_aggregates: false,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// All this ceremony is required to deduplicate columns when joining with USING
|
|
fn select_star_helper(&self) -> Vec<(usize, &Column, usize)> {
|
|
match self {
|
|
SourceOperator::Join {
|
|
left, right, using, ..
|
|
} => {
|
|
let mut columns = left.select_star_helper();
|
|
|
|
// Join columns are filtered out from the right side
|
|
// in the case of a USING join.
|
|
if let Some(using_cols) = using {
|
|
let right_columns = right.select_star_helper();
|
|
|
|
for (table_index, col, idx) in right_columns {
|
|
if !using_cols
|
|
.iter()
|
|
.any(|using_col| col.name.eq_ignore_ascii_case(&using_col.0))
|
|
{
|
|
columns.push((table_index, col, idx));
|
|
}
|
|
}
|
|
} else {
|
|
columns.extend(right.select_star_helper());
|
|
}
|
|
columns
|
|
}
|
|
SourceOperator::Scan {
|
|
table_reference, ..
|
|
}
|
|
| SourceOperator::Search {
|
|
table_reference, ..
|
|
}
|
|
| SourceOperator::Subquery {
|
|
table_reference, ..
|
|
} => table_reference
|
|
.columns()
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, col)| (table_reference.table_index, col, i))
|
|
.collect(),
|
|
SourceOperator::Nothing { .. } => Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
A SourceOperator is a Node in the query plan that reads data from a table.
|
|
*/
|
|
#[derive(Clone, Debug)]
|
|
pub enum SourceOperator {
|
|
// Join operator
|
|
// This operator is used to join two source operators.
|
|
// It takes a left and right source operator, a list of predicates to evaluate,
|
|
// and a boolean indicating whether it is an outer join.
|
|
Join {
|
|
id: usize,
|
|
left: Box<SourceOperator>,
|
|
right: Box<SourceOperator>,
|
|
predicates: Option<Vec<ast::Expr>>,
|
|
outer: bool,
|
|
using: Option<ast::DistinctNames>,
|
|
},
|
|
// Scan operator
|
|
// This operator is used to scan a table.
|
|
// It takes a table to scan and an optional list of predicates to evaluate.
|
|
// The predicates are used to filter rows from the table.
|
|
// e.g. SELECT * FROM t1 WHERE t1.foo = 5
|
|
// The iter_dir are uset to indicate the direction of the iterator.
|
|
// The use of Option for iter_dir is aimed at implementing a conservative optimization strategy: it only pushes
|
|
// iter_dir down to Scan when iter_dir is None, to prevent potential result set errors caused by multiple
|
|
// assignments. for more detailed discussions, please refer to https://github.com/penberg/limbo/pull/376
|
|
Scan {
|
|
id: usize,
|
|
table_reference: TableReference,
|
|
predicates: Option<Vec<ast::Expr>>,
|
|
iter_dir: Option<IterationDirection>,
|
|
},
|
|
// Search operator
|
|
// This operator is used to search for a row in a table using an index
|
|
// (i.e. a primary key or a secondary index)
|
|
Search {
|
|
id: usize,
|
|
table_reference: TableReference,
|
|
search: Search,
|
|
predicates: Option<Vec<ast::Expr>>,
|
|
},
|
|
Subquery {
|
|
id: usize,
|
|
table_reference: TableReference,
|
|
plan: Box<SelectPlan>,
|
|
predicates: Option<Vec<ast::Expr>>,
|
|
},
|
|
// Nothing operator
|
|
// This operator is used to represent an empty query.
|
|
// e.g. SELECT * from foo WHERE 0 will eventually be optimized to Nothing.
|
|
Nothing {
|
|
id: usize,
|
|
},
|
|
}
|
|
|
|
/// The type of the table reference, either BTreeTable or Subquery
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
pub enum TableReferenceType {
|
|
/// A BTreeTable is a table that is stored on disk in a B-tree index.
|
|
BTreeTable,
|
|
/// A subquery.
|
|
Subquery {
|
|
/// The index of the first register in the query plan that contains the result columns of the subquery.
|
|
result_columns_start_reg: usize,
|
|
},
|
|
}
|
|
|
|
/// A query plan has a list of TableReference objects, each of which represents a table or subquery.
|
|
#[derive(Clone, Debug)]
|
|
pub struct TableReference {
|
|
/// Table object, which contains metadata about the table, e.g. columns.
|
|
pub table: Table,
|
|
/// The name of the table as referred to in the query, either the literal name or an alias e.g. "users" or "u"
|
|
pub table_identifier: String,
|
|
/// The index of this reference in the list of TableReference objects in the query plan
|
|
/// The reference at index 0 is the first table in the FROM clause, the reference at index 1 is the second table in the FROM clause, etc.
|
|
/// So, the index is relevant for determining when predicates (WHERE, ON filters etc.) should be evaluated.
|
|
pub table_index: usize,
|
|
/// The type of the table reference, either BTreeTable or Subquery
|
|
pub reference_type: TableReferenceType,
|
|
}
|
|
|
|
impl TableReference {
|
|
pub fn new_subquery(identifier: String, table_index: usize, plan: &SelectPlan) -> Self {
|
|
Self {
|
|
table: Table::Pseudo(Rc::new(PseudoTable::new_with_columns(
|
|
plan.result_columns
|
|
.iter()
|
|
.map(|rc| Column {
|
|
name: rc.name.clone(),
|
|
ty: Type::Text, // FIXME: infer proper type
|
|
is_rowid_alias: false,
|
|
primary_key: false,
|
|
})
|
|
.collect(),
|
|
))),
|
|
table_identifier: identifier.clone(),
|
|
table_index,
|
|
reference_type: TableReferenceType::Subquery {
|
|
result_columns_start_reg: 0, // Will be set in the bytecode emission phase
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn columns(&self) -> &[Column] {
|
|
self.table.columns()
|
|
}
|
|
}
|
|
|
|
/// An enum that represents a search operation that can be used to search for a row in a table using an index
|
|
/// (i.e. a primary key or a secondary index)
|
|
#[allow(clippy::enum_variant_names)]
|
|
#[derive(Clone, Debug)]
|
|
pub enum Search {
|
|
/// A rowid equality point lookup. This is a special case that uses the SeekRowid bytecode instruction and does not loop.
|
|
RowidEq { cmp_expr: ast::Expr },
|
|
/// A rowid search. Uses bytecode instructions like SeekGT, SeekGE etc.
|
|
RowidSearch {
|
|
cmp_op: ast::Operator,
|
|
cmp_expr: ast::Expr,
|
|
},
|
|
/// A secondary index search. Uses bytecode instructions like SeekGE, SeekGT etc.
|
|
IndexSearch {
|
|
index: Rc<Index>,
|
|
cmp_op: ast::Operator,
|
|
cmp_expr: ast::Expr,
|
|
},
|
|
}
|
|
|
|
impl SourceOperator {
|
|
pub fn id(&self) -> usize {
|
|
match self {
|
|
SourceOperator::Join { id, .. } => *id,
|
|
SourceOperator::Scan { id, .. } => *id,
|
|
SourceOperator::Search { id, .. } => *id,
|
|
SourceOperator::Subquery { id, .. } => *id,
|
|
SourceOperator::Nothing { id } => *id,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
|
pub enum Direction {
|
|
Ascending,
|
|
Descending,
|
|
}
|
|
|
|
impl Display for Direction {
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
match self {
|
|
Direction::Ascending => write!(f, "ASC"),
|
|
Direction::Descending => write!(f, "DESC"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub struct Aggregate {
|
|
pub func: AggFunc,
|
|
pub args: Vec<ast::Expr>,
|
|
pub original_expr: ast::Expr,
|
|
}
|
|
|
|
impl Display for Aggregate {
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
let args_str = self
|
|
.args
|
|
.iter()
|
|
.map(|arg| arg.to_string())
|
|
.collect::<Vec<String>>()
|
|
.join(", ");
|
|
write!(f, "{:?}({})", self.func, args_str)
|
|
}
|
|
}
|
|
|
|
// For EXPLAIN QUERY PLAN
|
|
impl Display for SourceOperator {
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
fn fmt_operator(
|
|
operator: &SourceOperator,
|
|
f: &mut Formatter,
|
|
level: usize,
|
|
last: bool,
|
|
) -> fmt::Result {
|
|
let indent = if level == 0 {
|
|
if last { "`--" } else { "|--" }.to_string()
|
|
} else {
|
|
format!(
|
|
" {}{}",
|
|
"| ".repeat(level - 1),
|
|
if last { "`--" } else { "|--" }
|
|
)
|
|
};
|
|
|
|
match operator {
|
|
SourceOperator::Join {
|
|
left,
|
|
right,
|
|
predicates,
|
|
outer,
|
|
..
|
|
} => {
|
|
let join_name = if *outer { "OUTER JOIN" } else { "JOIN" };
|
|
match predicates
|
|
.as_ref()
|
|
.and_then(|ps| if ps.is_empty() { None } else { Some(ps) })
|
|
{
|
|
Some(ps) => {
|
|
let predicates_string = ps
|
|
.iter()
|
|
.map(|p| p.to_string())
|
|
.collect::<Vec<String>>()
|
|
.join(" AND ");
|
|
writeln!(f, "{}{} ON {}", indent, join_name, predicates_string)?;
|
|
}
|
|
None => writeln!(f, "{}{}", indent, join_name)?,
|
|
}
|
|
fmt_operator(left, f, level + 1, false)?;
|
|
fmt_operator(right, f, level + 1, true)
|
|
}
|
|
SourceOperator::Scan {
|
|
table_reference,
|
|
predicates: filter,
|
|
..
|
|
} => {
|
|
let table_name =
|
|
if table_reference.table.get_name() == table_reference.table_identifier {
|
|
table_reference.table_identifier.clone()
|
|
} else {
|
|
format!(
|
|
"{} AS {}",
|
|
&table_reference.table.get_name(),
|
|
&table_reference.table_identifier
|
|
)
|
|
};
|
|
let filter_string = filter.as_ref().map(|f| {
|
|
let filters_string = f
|
|
.iter()
|
|
.map(|p| p.to_string())
|
|
.collect::<Vec<String>>()
|
|
.join(" AND ");
|
|
format!("FILTER {}", filters_string)
|
|
});
|
|
match filter_string {
|
|
Some(fs) => writeln!(f, "{}SCAN {} {}", indent, table_name, fs),
|
|
None => writeln!(f, "{}SCAN {}", indent, table_name),
|
|
}?;
|
|
Ok(())
|
|
}
|
|
SourceOperator::Search {
|
|
table_reference,
|
|
search,
|
|
..
|
|
} => {
|
|
match search {
|
|
Search::RowidEq { .. } | Search::RowidSearch { .. } => {
|
|
writeln!(
|
|
f,
|
|
"{}SEARCH {} USING INTEGER PRIMARY KEY (rowid=?)",
|
|
indent, table_reference.table_identifier
|
|
)?;
|
|
}
|
|
Search::IndexSearch { index, .. } => {
|
|
writeln!(
|
|
f,
|
|
"{}SEARCH {} USING INDEX {}",
|
|
indent, table_reference.table_identifier, index.name
|
|
)?;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
SourceOperator::Subquery { plan, .. } => {
|
|
fmt_operator(&plan.source, f, level + 1, last)
|
|
}
|
|
SourceOperator::Nothing { .. } => Ok(()),
|
|
}
|
|
}
|
|
writeln!(f, "QUERY PLAN")?;
|
|
fmt_operator(self, f, 0, true)
|
|
}
|
|
}
|
|
|
|
/**
|
|
Returns a bitmask where each bit corresponds to a table in the `tables` vector.
|
|
If a table is referenced in the given Operator, the corresponding bit is set to 1.
|
|
Example:
|
|
if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")],
|
|
and the Operator is a join between table2 and table3,
|
|
then the return value will be (in bits): 110
|
|
*/
|
|
pub fn get_table_ref_bitmask_for_operator<'a>(
|
|
tables: &'a Vec<TableReference>,
|
|
operator: &'a SourceOperator,
|
|
) -> Result<usize> {
|
|
let mut table_refs_mask = 0;
|
|
match operator {
|
|
SourceOperator::Join { left, right, .. } => {
|
|
table_refs_mask |= get_table_ref_bitmask_for_operator(tables, left)?;
|
|
table_refs_mask |= get_table_ref_bitmask_for_operator(tables, right)?;
|
|
}
|
|
SourceOperator::Scan {
|
|
table_reference, ..
|
|
} => {
|
|
table_refs_mask |= 1
|
|
<< tables
|
|
.iter()
|
|
.position(|t| t.table_identifier == table_reference.table_identifier)
|
|
.unwrap();
|
|
}
|
|
SourceOperator::Search {
|
|
table_reference, ..
|
|
} => {
|
|
table_refs_mask |= 1
|
|
<< tables
|
|
.iter()
|
|
.position(|t| t.table_identifier == table_reference.table_identifier)
|
|
.unwrap();
|
|
}
|
|
SourceOperator::Subquery { .. } => {}
|
|
SourceOperator::Nothing { .. } => {}
|
|
}
|
|
Ok(table_refs_mask)
|
|
}
|
|
|
|
/**
|
|
Returns a bitmask where each bit corresponds to a table in the `tables` vector.
|
|
If a table is referenced in the given AST expression, the corresponding bit is set to 1.
|
|
Example:
|
|
if tables = [(table1, "t1"), (table2, "t2"), (table3, "t3")],
|
|
and predicate = "t1.a = t2.b"
|
|
then the return value will be (in bits): 011
|
|
*/
|
|
#[allow(clippy::only_used_in_recursion)]
|
|
pub fn get_table_ref_bitmask_for_ast_expr<'a>(
|
|
tables: &'a Vec<TableReference>,
|
|
predicate: &'a ast::Expr,
|
|
) -> Result<usize> {
|
|
let mut table_refs_mask = 0;
|
|
match predicate {
|
|
ast::Expr::Binary(e1, _, e2) => {
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e1)?;
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, e2)?;
|
|
}
|
|
ast::Expr::Column { table, .. } => {
|
|
table_refs_mask |= 1 << table;
|
|
}
|
|
ast::Expr::Id(_) => unreachable!("Id should be resolved to a Column before optimizer"),
|
|
ast::Expr::Qualified(_, _) => {
|
|
unreachable!("Qualified should be resolved to a Column before optimizer")
|
|
}
|
|
ast::Expr::Literal(_) => {}
|
|
ast::Expr::Like { lhs, rhs, .. } => {
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?;
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs)?;
|
|
}
|
|
ast::Expr::FunctionCall {
|
|
args: Some(args), ..
|
|
} => {
|
|
for arg in args {
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, arg)?;
|
|
}
|
|
}
|
|
ast::Expr::InList { lhs, rhs, .. } => {
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, lhs)?;
|
|
if let Some(rhs_list) = rhs {
|
|
for rhs_expr in rhs_list {
|
|
table_refs_mask |= get_table_ref_bitmask_for_ast_expr(tables, rhs_expr)?;
|
|
}
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
Ok(table_refs_mask)
|
|
}
|