Merge 'core/translate/optimizer: eliminate unnecessary ORDER BY if result set is already ordered' from Jussi Saurio

Built on top of, and currently targets, #350, _not_ main
Closes #365
Examples:
```
limbo> explain select u.first_name, p.name from users u join products p on u.id = p.id order by u.id;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     15    0                    0   Start at 15
1     OpenReadAsync      0     2     0                    0   table=u, root=2
2     OpenReadAwait      0     0     0                    0
3     OpenReadAsync      1     3     0                    0   table=p, root=3
4     OpenReadAwait      0     0     0                    0
5     RewindAsync        0     0     0                    0
6     RewindAwait        0     14    0                    0   Rewind table u
7       RowId            0     1     0                    0   r[1]=u.rowid
8       SeekRowid        1     1     12                   0   if (r[1]!=p.rowid) goto 12
9       Column           0     1     2                    0   r[2]=u.first_name
10      Column           1     1     3                    0   r[3]=p.name
11      ResultRow        2     2     0                    0   output=r[2..3]
12    NextAsync          0     0     0                    0
13    NextAwait          0     7     0                    0
14    Halt               0     0     0                    0
15    Transaction        0     0     0                    0
16    Goto               0     1     0                    0
```
```
limbo> explain select * from users where age > 80 order by age limit 5;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     23    0                    0   Start at 23
1     OpenReadAsync      0     2     0                    0   table=users, root=2
2     OpenReadAwait      0     0     0                    0
3     OpenReadAsync      1     274   0                    0   table=age_idx, root=274
4     OpenReadAwait      0     0     0                    0
5     Integer            80    1     0                    0   r[1]=80
6     SeekGT             1     22    1                    0
7       DeferredSeek     1     0     0                    0
8       RowId            0     2     0                    0   r[2]=users.rowid
9       Column           0     1     3                    0   r[3]=users.first_name
10      Column           0     2     4                    0   r[4]=users.last_name
11      Column           0     3     5                    0   r[5]=users.email
12      Column           0     4     6                    0   r[6]=users.phone_number
13      Column           0     5     7                    0   r[7]=users.address
14      Column           0     6     8                    0   r[8]=users.city
15      Column           0     7     9                    0   r[9]=users.state
16      Column           0     8     10                   0   r[10]=users.zipcode
17      Column           0     9     11                   0   r[11]=users.age
18      ResultRow        2     10    0                    0   output=r[2..11]
19      DecrJumpZero     12    22    0                    0   if (--r[12]==0) goto 22
20    NextAsync          1     0     0                    0
21    NextAwait          1     7     0                    0
22    Halt               0     0     0                    0
23    Transaction        0     0     0                    0
24    Integer            5     12    0                    0   r[12]=5
25    Goto               0     1     0                    0
```

Closes #366
This commit is contained in:
Pekka Enberg
2024-10-13 10:12:28 +03:00

View File

@@ -9,12 +9,14 @@ use crate::{
};
use super::plan::{
get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, Operator, Plan,
ProjectionColumn, Search,
get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, Direction, Operator,
Plan, ProjectionColumn, Search,
};
/**
* Make a few passes over the plan to optimize it.
* TODO: these could probably be done in less passes,
* but having them separate makes them easier to understand
*/
pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCache)> {
let mut expr_result_cache = ExpressionResultCache::new();
@@ -39,12 +41,96 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCac
&select_plan.referenced_tables,
&select_plan.available_indexes,
)?;
eliminate_unnecessary_orderby(
&mut select_plan.root_operator,
&select_plan.referenced_tables,
&select_plan.available_indexes,
)?;
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache);
Ok((select_plan, expr_result_cache))
}
fn _operator_is_already_ordered_by(
operator: &mut Operator,
key: &mut ast::Expr,
available_indexes: &Vec<Rc<Index>>,
) -> Result<bool> {
match operator {
Operator::Scan {
table,
table_identifier,
..
} => {
let tuple = (table.clone(), table_identifier.clone());
Ok(key.is_primary_key_of(&tuple))
}
Operator::Search {
table,
table_identifier,
search,
..
} => match search {
Search::PrimaryKeyEq { .. } => {
let tuple = (table.clone(), table_identifier.clone());
Ok(key.is_primary_key_of(&tuple))
}
Search::PrimaryKeySearch { .. } => {
let tuple = (table.clone(), table_identifier.clone());
Ok(key.is_primary_key_of(&tuple))
}
Search::IndexSearch { index, .. } => {
let tuple = (table.clone(), table_identifier.clone());
let index_idx = key.check_index_scan(&tuple, available_indexes)?;
let index_is_the_same = index_idx
.map(|i| Rc::ptr_eq(&available_indexes[i], index))
.unwrap_or(false);
Ok(index_is_the_same)
}
},
Operator::Join { left, .. } => {
_operator_is_already_ordered_by(left, key, available_indexes)
}
Operator::Aggregate { source, .. } => {
_operator_is_already_ordered_by(source, key, available_indexes)
}
Operator::Projection { source, .. } => {
_operator_is_already_ordered_by(source, key, available_indexes)
}
_ => Ok(false),
}
}
fn eliminate_unnecessary_orderby(
operator: &mut Operator,
referenced_tables: &Vec<(Rc<BTreeTable>, String)>,
available_indexes: &Vec<Rc<Index>>,
) -> Result<()> {
match operator {
Operator::Order { source, key, .. } => {
if key.len() != 1 || key.first().unwrap().1 != Direction::Ascending {
// TODO: handle multiple order by keys and descending order
return Ok(());
}
let already_ordered = _operator_is_already_ordered_by(
source,
&mut key.first_mut().unwrap().0,
available_indexes,
)?;
if already_ordered {
*operator = source.take_ownership();
}
Ok(())
}
Operator::Limit { source, .. } => {
eliminate_unnecessary_orderby(source, referenced_tables, available_indexes)?;
Ok(())
}
_ => Ok(()),
}
}
/**
* Use indexes where possible (currently just primary key lookups)
* Use indexes where possible
*/
fn use_indexes(
operator: &mut Operator,