Merge 'GROUP BY support' from Jussi Saurio

This PR implements GROUP BY (and also ordering by the groups or the
aggregate expressions). See `groupby.test` for the kinds of things that
are now supported.
This PR is a rabbit hole and insanely big, sorry.
---
I thought about how to explain how GROUP BY works in SQLite bytecode,
and opted to go for just adding a bunch of Insn comments, so here's an
example that uses both GROUP BY and ORDER BY:
**LIMBO**
```
limbo> explain select u.first_name, sum(u.age) from users u group by u.first_name order by sum(u.age);
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     56    0                    0   Start at 56
1     SorterOpen         0     1     0     k(1,B)         0   cursor=0
2     SorterOpen         1     2     0     k(1,B)         0   cursor=1
3     Integer            0     2     0                    0   r[2]=0; clear group by abort flag
4     Null               0     4     0                    0   r[4]=NULL; initialize group by comparison registers to NULL
5     Gosub              8     45    0                    0   ; go to clear accumulator subroutine
6     OpenReadAsync      2     2     0                    0   table=u, root=2
7     OpenReadAwait      0     0     0                    0
8     RewindAsync        2     0     0                    0
9     RewindAwait        2     38    0                    0   Rewind table u
10      Column           2     1     10                   0   r[10]=u.first_name
11      Column           2     9     11                   0   r[11]=u.age
12      MakeRecord       10    2     7                    0   r[7]=mkrec(r[10..11])
13      SorterInsert     1     7     0     0              0   key=r[7]
14    NextAsync          2     0     0                    0
15    NextAwait          2     9     0                    0
16    OpenPseudo         3     7     2                    0   2 columns in r[7]
17    SorterSort         1     32    0                    0
18      SorterData       1     7     3                    0   r[7]=data
19      Column           3     0     12                   0   r[12]=cursor 3.u.first_name
20      Compare          4     12    1                    0   r[4..4]==r[12..12]
21      Jump             22    26    22                   0   ; start new group if comparison is not equal
22      Move             12    4     1                    0   r[4..4]=r[12..12]
23      Gosub            9     36    0                    0   ; check if ended group had data, and output if so
24      IfPos            2     55    0                    0   r[2]>0 -> r[2]-=0, goto 55; check abort flag
25      Gosub            8     45    0                    0   ; goto clear accumulator subroutine
26      Column           3     1     13                   0   r[13]=cursor 3.u.age
27      AggStep          0     13    6     sum            0   accum=r[6] step(r[13])
28      If               3     30    0                    0   if r[3] goto 30; don't emit group columns if continuing existing group
29      Column           3     0     5                    0   r[5]=cursor 3.u.first_name
30      Integer          1     3     0                    0   r[3]=1; indicate data in accumulator
31    SorterNext         1     18    0                    0
32    Gosub              9     36    0                    0   ; emit row for final group
33    Goto               0     48    0                    0   ; group by finished
34    Integer            1     2     0                    0   r[2]=1
35    Return             9     0     0                    0
36    IfPos              3     38    0                    0   r[3]>0 -> r[3]-=0, goto 38; output group by row subroutine start
37    Return             9     0     0                    0
38    AggFinal           0     6     0     sum            0   accum=r[6]
39    Copy               5     15    0                    0   r[15]=r[5]
40    Copy               6     16    0                    0   r[16]=r[6]
41    Copy               6     14    0                    0   r[14]=r[6]
42    MakeRecord         14    3     1                    0   r[1]=mkrec(r[14..16])
43    SorterInsert       0     1     0     0              0   key=r[1]
44    Return             9     0     0                    0
45    Null               0     5     6                    0   r[5..6]=NULL; clear accumulator subroutine start
46    Integer            0     3     0                    0   r[3]=0
47    Return             8     0     0                    0
48    OpenPseudo         4     1     3                    0   3 columns in r[1]
49    SorterSort         0     55    0                    0
50      SorterData       0     1     4                    0   r[1]=data
51      Column           4     1     17                   0   r[17]=cursor 4.sum
52      Column           4     2     18                   0   r[18]=cursor 4.u.first_name
53      ResultRow        17    2     0                    0   output=r[17..18]
54    SorterNext         0     50    0                    0
55    Halt               0     0     0                    0
56    Transaction        0     0     0                    0
57    Goto               0     1     0                    0
```
**SQLITE3**:
```
sqlite> explain select u.first_name, sum(u.age) from users u group by u.first_name order by sum(u.age);
addr  opcode         p1    p2    p3    p4             p5  comment
----  -------------  ----  ----  ----  -------------  --  -------------
0     Init           0     52    0                    0   Start at 52
1     SorterOpen     1     4     0     k(1,B)         0
2     SorterOpen     2     2     0     k(1,B)         0
3     Integer        0     2     0                    0   r[2]=0; clear abort flag
4     Null           0     5     5                    0   r[5..5]=NULL
5     Gosub          4     41    0                    0
6     OpenRead       0     2     0     10             0   root=2 iDb=0; users
7     Rewind         0     13    0                    0
8       Column         0     1     10                   0   r[10]= cursor 0 column 1
9       Column         0     9     11                   0   r[11]= cursor 0 column 9
10      MakeRecord     10    2     12                   0   r[12]=mkrec(r[10..11])
11      SorterInsert   2     12    0                    0   key=r[12]
12    Next           0     8     0                    1
13    OpenPseudo     3     12    2                    0   2 columns in r[12]
14    SorterSort     2     44    0                    0   GROUP BY sort
15      SorterData     2     12    3                    0   r[12]=data
16      Column         3     0     6                    0   r[6]= cursor 3 column 0
17      Compare        5     6     1     k(1,B)         0   r[5] <-> r[6]
18      Jump           19    23    19                   0
19      Move           6     5     1                    0   r[5]=r[6]
20      Gosub          3     33    0                    0   output one row
21      IfPos          2     44    0                    0   if r[2]>0 then r[2]-=0, goto 44; check abort flag
22      Gosub          4     41    0                    0   reset accumulator
23      Column         3     1     13                   0   r[13]=users.age
24      AggStep        0     13    9     sum(1)         1   accum=r[9] step(r[13])
25      If             1     27    0                    0
26      Column         3     0     7                    0   r[7]=users.first_name
27      Integer        1     1     0                    0   r[1]=1; indicate data in accumulator
28    SorterNext     2     15    0                    0
29    Gosub          3     33    0                    0   output final row
30    Goto           0     44    0                    0
31    Integer        1     2     0                    0   r[2]=1; set abort flag
32    Return         3     0     0                    0
33    IfPos          1     35    0                    0   if r[1]>0 then r[1]-=0, goto 35; Groupby result generator entry point
34    Return         3     0     0                    0
35    AggFinal       9     1     0     sum(1)         0   accum=r[9] N=1
36    Copy           7     15    0                    0   r[15]=r[7]
37    Copy           9     14    0                    0   r[14]=r[9]
38    MakeRecord     14    2     17                   0   r[17]=mkrec(r[14..15])
39    SorterInsert   1     17    14    2              0   key=r[17]
40    Return         3     0     0                    0   end groupby result generator
41    Null           0     7     9                    0   r[7..9]=NULL
42    Integer        0     1     0                    0   r[1]=0; indicate accumulator empty
43    Return         4     0     0                    0
44    OpenPseudo     4     18    4                    0   4 columns in r[18]
45    SorterSort     1     51    0                    0
46      SorterData     1     18    4                    0   r[18]=data
47      Column         4     0     16                   0   r[16]=sum(u.age)
48      Column         4     1     15                   0   r[15]=u.first_name
49      ResultRow      15    2     0                    0   output=r[15..16]
50    SorterNext     1     46    0                    0
51    Halt           0     0     0                    0
52    Transaction    0     0     2     0              1   usesStmtJournal=0
53    Goto           0     1     0                    0
```
As you can see the bytecodes are fairly close in this scenario.
SQLite opts to use an ephemeral index in certain cases (e.g. when you
use `LIMIT` or have multiple grouping columns). Will not implement those
branching strategies as part of this PR
---
Example operator tree:
```
limbo> explain query plan select u.first_name, p.name, sum(u.age) from users u join products p on u.id = p.id group by u.first_name, p.name order by p.name limit 10;

QUERY PLAN
`--TAKE 10
   `--SORT p.name ASC
   |  `--PROJECT u.first_name, p.name, sum (u.age)
   |  |  `--AGGREGATE Sum(u.age)
   |  |  |  `--JOIN
   |  |  |  |  |--SCAN users AS u
   |  |  |  |  `--SEEK products.rowid ON rowid=u.id
```

Reviewed-by: Pere Diaz Bou <pere-altea@hotmail.com>

Closes #309
This commit is contained in:
Pekka Enberg
2024-09-14 16:33:09 +03:00
18 changed files with 1915 additions and 227 deletions

View File

@@ -45,7 +45,7 @@ This document describes the SQLite compatibility status of Limbo:
| SELECT ... WHERE ... LIKE | Yes | |
| SELECT ... LIMIT | Yes | |
| SELECT ... ORDER BY | Partial | |
| SELECT ... GROUP BY | No | |
| SELECT ... GROUP BY | Partial | |
| SELECT ... JOIN | Partial | |
| SELECT ... CROSS JOIN | Partial | |
| SELECT ... INNER JOIN | Partial | |

View File

@@ -198,7 +198,7 @@ impl Connection {
match stmt {
ast::Stmt::Select(select) => {
let plan = prepare_select_plan(&self.schema, select)?;
let plan = optimize_plan(plan)?;
let (plan, _) = optimize_plan(plan)?;
println!("{}", plan);
}
_ => todo!(),

View File

@@ -41,7 +41,9 @@ impl Cursor for PseudoCursor {
.as_ref()
.map(|record| match record.values[0] {
OwnedValue::Integer(rowid) => rowid as u64,
_ => panic!("Expected integer value"),
ref ov => {
panic!("Expected integer value, got {:?}", ov);
}
});
Ok(x)
}

View File

@@ -95,6 +95,13 @@ impl Table {
}
}
pub fn get_column_at(&self, index: usize) -> &Column {
match self {
Table::BTree(table) => table.columns.get(index).unwrap(),
Table::Pseudo(table) => table.columns.get(index).unwrap(),
}
}
pub fn columns(&self) -> &Vec<Column> {
match self {
Table::BTree(table) => &table.columns,

File diff suppressed because it is too large Load Diff

View File

@@ -2,8 +2,9 @@ use crate::{function::JsonFunc, Result};
use sqlite3_parser::ast::{self, UnaryOperator};
use std::rc::Rc;
use super::optimizer::CachedResult;
use crate::function::{AggFunc, Func, FuncCtx, ScalarFunc};
use crate::schema::Type;
use crate::schema::{Table, Type};
use crate::util::normalize_ident;
use crate::{
schema::BTreeTable,
@@ -74,13 +75,27 @@ pub fn translate_condition_expr(
}
ast::Expr::Binary(lhs, op, rhs) => {
let lhs_reg = program.alloc_register();
let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint);
let _ = translate_expr(
program,
Some(referenced_tables),
lhs,
lhs_reg,
cursor_hint,
None,
);
match lhs.as_ref() {
ast::Expr::Literal(_) => program.mark_last_insn_constant(),
_ => {}
}
let rhs_reg = program.alloc_register();
let _ = translate_expr(program, Some(referenced_tables), rhs, rhs_reg, cursor_hint);
let _ = translate_expr(
program,
Some(referenced_tables),
rhs,
rhs_reg,
cursor_hint,
None,
);
match rhs.as_ref() {
ast::Expr::Literal(_) => program.mark_last_insn_constant(),
_ => {}
@@ -323,7 +338,14 @@ pub fn translate_condition_expr(
// The left hand side only needs to be evaluated once we have a list of values to compare against.
let lhs_reg = program.alloc_register();
let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, cursor_hint)?;
let _ = translate_expr(
program,
Some(referenced_tables),
lhs,
lhs_reg,
cursor_hint,
None,
)?;
let rhs = rhs.as_ref().unwrap();
@@ -352,6 +374,7 @@ pub fn translate_condition_expr(
expr,
rhs_reg,
cursor_hint,
None,
)?;
// If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true.
if !last_condition {
@@ -395,6 +418,7 @@ pub fn translate_condition_expr(
expr,
rhs_reg,
cursor_hint,
None,
)?;
program.emit_insn_with_label_dependency(
Insn::Eq {
@@ -444,6 +468,7 @@ pub fn translate_condition_expr(
rhs,
pattern_reg,
cursor_hint,
None,
)?;
program.mark_last_insn_constant();
let _ = translate_expr(
@@ -452,6 +477,7 @@ pub fn translate_condition_expr(
lhs,
column_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::Function {
// Only constant patterns for LIKE are supported currently, so this
@@ -516,20 +542,72 @@ pub fn translate_condition_expr(
Ok(())
}
pub fn get_cached_or_translate(
program: &mut ProgramBuilder,
referenced_tables: Option<&[(Rc<BTreeTable>, String)]>,
expr: &ast::Expr,
cursor_hint: Option<usize>,
cached_results: Option<&Vec<&CachedResult>>,
) -> Result<usize> {
if let Some(cached_results) = cached_results {
if let Some(cached_result) = cached_results
.iter()
.find(|cached_result| cached_result.source_expr == *expr)
{
return Ok(cached_result.register_idx);
}
}
let reg = program.alloc_register();
translate_expr(
program,
referenced_tables,
expr,
reg,
cursor_hint,
cached_results,
)?;
Ok(reg)
}
pub fn translate_expr(
program: &mut ProgramBuilder,
referenced_tables: Option<&[(Rc<BTreeTable>, String)]>,
expr: &ast::Expr,
target_register: usize,
cursor_hint: Option<usize>,
cached_results: Option<&Vec<&CachedResult>>,
) -> Result<usize> {
if let Some(cached_results) = &cached_results {
if let Some(cached_result) = cached_results
.iter()
.find(|cached_result| cached_result.source_expr == *expr)
{
program.emit_insn(Insn::Copy {
src_reg: cached_result.register_idx,
dst_reg: target_register,
amount: 0,
});
return Ok(target_register);
}
}
match expr {
ast::Expr::Between { .. } => todo!(),
ast::Expr::Binary(e1, op, e2) => {
let e1_reg = program.alloc_register();
let _ = translate_expr(program, referenced_tables, e1, e1_reg, cursor_hint)?;
let e2_reg = program.alloc_register();
let _ = translate_expr(program, referenced_tables, e2, e2_reg, cursor_hint)?;
let e1_reg = get_cached_or_translate(
program,
referenced_tables,
e1,
cursor_hint,
cached_results,
)?;
let e2_reg = get_cached_or_translate(
program,
referenced_tables,
e2,
cursor_hint,
cached_results,
)?;
match op {
ast::Operator::NotEquals => {
@@ -617,6 +695,13 @@ pub fn translate_expr(
dest: target_register,
});
}
ast::Operator::Multiply => {
program.emit_insn(Insn::Multiply {
lhs: e1_reg,
rhs: e2_reg,
dest: target_register,
});
}
other_unimplemented => todo!("{:?}", other_unimplemented),
}
Ok(target_register)
@@ -667,7 +752,14 @@ pub fn translate_expr(
);
};
let regs = program.alloc_register();
translate_expr(program, referenced_tables, &args[0], regs, cursor_hint)?;
translate_expr(
program,
referenced_tables,
&args[0],
regs,
cursor_hint,
cached_results,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: regs,
@@ -684,7 +776,14 @@ pub fn translate_expr(
for arg in args.iter() {
let reg = program.alloc_register();
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
translate_expr(
program,
referenced_tables,
arg,
reg,
cursor_hint,
cached_results,
)?;
}
program.emit_insn(Insn::Function {
@@ -721,6 +820,7 @@ pub fn translate_expr(
arg,
target_register,
cursor_hint,
cached_results,
)?;
if index < args.len() - 1 {
program.emit_insn_with_label_dependency(
@@ -747,7 +847,14 @@ pub fn translate_expr(
};
for arg in args.iter() {
let reg = program.alloc_register();
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
translate_expr(
program,
referenced_tables,
arg,
reg,
cursor_hint,
cached_results,
)?;
}
program.emit_insn(Insn::Function {
constant_mask: 0,
@@ -777,6 +884,7 @@ pub fn translate_expr(
&args[0],
temp_reg,
cursor_hint,
cached_results,
)?;
program.emit_insn(Insn::NotNull {
reg: temp_reg,
@@ -789,6 +897,7 @@ pub fn translate_expr(
&args[1],
temp_reg,
cursor_hint,
cached_results,
)?;
program.emit_insn(Insn::Copy {
src_reg: temp_reg,
@@ -821,6 +930,7 @@ pub fn translate_expr(
arg,
reg,
cursor_hint,
cached_results,
)?;
match arg {
ast::Expr::Literal(_) => program.mark_last_insn_constant(),
@@ -865,6 +975,7 @@ pub fn translate_expr(
&args[0],
regs,
cursor_hint,
cached_results,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
@@ -901,6 +1012,7 @@ pub fn translate_expr(
arg,
target_reg,
cursor_hint,
cached_results,
)?;
}
}
@@ -938,6 +1050,7 @@ pub fn translate_expr(
&args[0],
str_reg,
cursor_hint,
cached_results,
)?;
translate_expr(
program,
@@ -945,6 +1058,7 @@ pub fn translate_expr(
&args[1],
start_reg,
cursor_hint,
cached_results,
)?;
if args.len() == 3 {
translate_expr(
@@ -953,6 +1067,7 @@ pub fn translate_expr(
&args[2],
length_reg,
cursor_hint,
cached_results,
)?;
}
@@ -977,6 +1092,7 @@ pub fn translate_expr(
&args[0],
arg_reg,
cursor_hint,
cached_results,
)?;
start_reg = arg_reg;
}
@@ -1000,6 +1116,7 @@ pub fn translate_expr(
arg,
target_reg,
cursor_hint,
cached_results,
)?;
}
}
@@ -1032,7 +1149,14 @@ pub fn translate_expr(
for arg in args.iter() {
let reg = program.alloc_register();
translate_expr(program, referenced_tables, arg, reg, cursor_hint)?;
translate_expr(
program,
referenced_tables,
arg,
reg,
cursor_hint,
cached_results,
)?;
if let ast::Expr::Literal(_) = arg {
program.mark_last_insn_constant();
}
@@ -1064,6 +1188,7 @@ pub fn translate_expr(
arg,
reg,
cursor_hint,
cached_results,
)?;
match arg {
ast::Expr::Literal(_) => program.mark_last_insn_constant(),
@@ -1098,6 +1223,7 @@ pub fn translate_expr(
arg,
reg,
cursor_hint,
cached_results,
)?;
match arg {
ast::Expr::Literal(_) => program.mark_last_insn_constant(),
@@ -1132,6 +1258,7 @@ pub fn translate_expr(
&args[0],
first_reg,
cursor_hint,
cached_results,
)?;
let second_reg = program.alloc_register();
translate_expr(
@@ -1140,6 +1267,7 @@ pub fn translate_expr(
&args[1],
second_reg,
cursor_hint,
cached_results,
)?;
program.emit_insn(Insn::Function {
constant_mask: 0,
@@ -1208,6 +1336,7 @@ pub fn translate_expr(
ast::Literal::Null => {
program.emit_insn(Insn::Null {
dest: target_register,
dest_end: None,
});
Ok(target_register)
}
@@ -1389,16 +1518,15 @@ pub fn maybe_apply_affinity(col_type: Type, target_register: usize, program: &mu
pub fn translate_table_columns(
program: &mut ProgramBuilder,
table: &Rc<BTreeTable>,
table_identifier: &str,
cursor_override: Option<usize>,
cursor_id: usize,
table: &Table,
start_column_offset: usize,
start_reg: usize,
) -> usize {
let mut cur_reg = start_reg;
let cursor_id = cursor_override.unwrap_or(program.resolve_cursor_id(table_identifier, None));
for i in 0..table.columns.len() {
let is_rowid = table.column_is_rowid_alias(&table.columns[i]);
let col_type = &table.columns[i].ty;
for i in start_column_offset..table.columns().len() {
let is_rowid = table.column_is_rowid_alias(&table.get_column_at(i));
let col_type = &table.get_column_at(i).ty;
if is_rowid {
program.emit_insn(Insn::RowId {
cursor_id,
@@ -1437,6 +1565,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
acc_reg: target_register,
@@ -1458,6 +1587,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
);
expr_reg
};
@@ -1505,6 +1635,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
translate_expr(
program,
@@ -1512,6 +1643,7 @@ pub fn translate_aggregation(
&delimiter_expr,
delimiter_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
@@ -1535,6 +1667,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
acc_reg: target_register,
@@ -1556,6 +1689,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
acc_reg: target_register,
@@ -1596,6 +1730,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
translate_expr(
program,
@@ -1603,6 +1738,7 @@ pub fn translate_aggregation(
&delimiter_expr,
delimiter_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
@@ -1626,6 +1762,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
acc_reg: target_register,
@@ -1647,6 +1784,7 @@ pub fn translate_aggregation(
expr,
expr_reg,
cursor_hint,
None,
)?;
program.emit_insn(Insn::AggStep {
acc_reg: target_register,

View File

@@ -93,6 +93,7 @@ pub fn translate_insert(
expr,
column_registers_start + col,
None,
None,
)?;
}
program.emit_insn(Insn::Yield {

View File

@@ -1,4 +1,4 @@
use std::rc::Rc;
use std::{collections::HashMap, rc::Rc};
use sqlite3_parser::ast;
@@ -6,12 +6,14 @@ use crate::{schema::BTreeTable, util::normalize_ident, Result};
use super::plan::{
get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, Operator, Plan,
ProjectionColumn,
};
/**
* Make a few passes over the plan to optimize it.
*/
pub fn optimize_plan(mut select_plan: Plan) -> Result<Plan> {
pub fn optimize_plan(mut select_plan: Plan) -> Result<(Plan, ExpressionResultCache)> {
let mut expr_result_cache = ExpressionResultCache::new();
push_predicates(
&mut select_plan.root_operator,
&select_plan.referenced_tables,
@@ -19,16 +21,20 @@ pub fn optimize_plan(mut select_plan: Plan) -> Result<Plan> {
if eliminate_constants(&mut select_plan.root_operator)?
== ConstantConditionEliminationResult::ImpossibleCondition
{
return Ok(Plan {
root_operator: Operator::Nothing,
referenced_tables: vec![],
});
return Ok((
Plan {
root_operator: Operator::Nothing,
referenced_tables: vec![],
},
expr_result_cache,
));
}
use_indexes(
&mut select_plan.root_operator,
&select_plan.referenced_tables,
)?;
Ok(select_plan)
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(&select_plan.root_operator, &mut expr_result_cache);
Ok((select_plan, expr_result_cache))
}
/**
@@ -523,6 +529,383 @@ fn push_predicate(
}
}
#[derive(Debug)]
pub struct ExpressionResultCache {
resultmap: HashMap<usize, CachedResult>,
keymap: HashMap<usize, Vec<usize>>,
}
#[derive(Debug)]
pub struct CachedResult {
pub register_idx: usize,
pub source_expr: ast::Expr,
}
const OPERATOR_ID_MULTIPLIER: usize = 10000;
/**
ExpressionResultCache is a cache for the results of expressions that are computed in the query plan,
or more precisely, the VM registers that hold the results of these expressions.
Right now the cache is mainly used to avoid recomputing e.g. the result of an aggregation expression
e.g. SELECT t.a, SUM(t.b) FROM t GROUP BY t.a ORDER BY SUM(t.b)
*/
impl ExpressionResultCache {
pub fn new() -> Self {
ExpressionResultCache {
resultmap: HashMap::new(),
keymap: HashMap::new(),
}
}
/**
Store the result of an expression that is computed in the query plan.
The result is stored in a VM register. A copy of the expression AST node is
stored as well, so that parent operators can use it to compare their own expressions
with the one that was computed in a child operator.
This is a weakness of our current reliance on a 3rd party AST library, as we can't
e.g. modify the AST to add identifiers to nodes or replace nodes with some kind of
reference to a register, etc.
*/
pub fn cache_result_register(
&mut self,
operator_id: usize,
result_column_idx: usize,
register_idx: usize,
expr: ast::Expr,
) {
let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx;
self.resultmap.insert(
key,
CachedResult {
register_idx,
source_expr: expr,
},
);
}
/**
Set a mapping from a parent operator to a child operator, so that the parent operator
can look up the register of a result that was computed in the child operator.
E.g. "Parent operator's result column 3 is computed in child operator 5, result column 2"
*/
pub fn set_precomputation_key(
&mut self,
operator_id: usize,
result_column_idx: usize,
child_operator_id: usize,
child_operator_result_column_idx_mask: usize,
) -> () {
let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx;
let mut values = Vec::new();
for i in 0..64 {
if (child_operator_result_column_idx_mask >> i) & 1 == 1 {
values.push(child_operator_id * OPERATOR_ID_MULTIPLIER + i);
}
}
self.keymap.insert(key, values);
}
/**
Get the cache entries for a given operator and result column index.
There may be multiple cached entries, e.g. a binary operator's both
arms may have been cached.
*/
pub fn get_cached_result_registers(
&self,
operator_id: usize,
result_column_idx: usize,
) -> Option<Vec<&CachedResult>> {
let key = operator_id * OPERATOR_ID_MULTIPLIER + result_column_idx;
self.keymap.get(&key).and_then(|keys| {
let mut results = Vec::new();
for key in keys {
if let Some(result) = self.resultmap.get(key) {
results.push(result);
}
}
if results.is_empty() {
None
} else {
Some(results)
}
})
}
}
type ResultColumnIndexBitmask = usize;
/**
Find all result columns in an operator that match an expression, either fully or partially.
This is used to find the result columns that are computed in an operator and that are used
in a parent operator, so that the parent operator can look up the register that holds the result
of the child operator's expression.
The result is returned as a bitmask due to performance neuroticism. A limitation of this is that
we can only handle 64 result columns per operator.
*/
fn find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr: &ast::Expr,
operator: &Operator,
) -> ResultColumnIndexBitmask {
let exact_match = match operator {
Operator::Aggregate {
aggregates,
group_by,
..
} => {
let mut idx = 0;
let mut mask = 0;
for agg in aggregates.iter() {
if agg.original_expr == *expr {
mask |= 1 << idx;
}
idx += 1;
}
if let Some(group_by) = group_by {
for g in group_by.iter() {
if g == expr {
mask |= 1 << idx;
}
idx += 1
}
}
mask
}
Operator::Filter { .. } => 0,
Operator::SeekRowid { .. } => 0,
Operator::Limit { .. } => 0,
Operator::Join { .. } => 0,
Operator::Order { .. } => 0,
Operator::Projection { expressions, .. } => {
let mut idx = 0;
let mut mask = 0;
for e in expressions.iter() {
match e {
super::plan::ProjectionColumn::Column(c) => {
if c == expr {
mask |= 1 << idx;
}
}
super::plan::ProjectionColumn::Star => {}
super::plan::ProjectionColumn::TableStar(_, _) => {}
}
idx += 1;
}
mask
}
Operator::Scan { .. } => 0,
Operator::Nothing => 0,
};
if exact_match != 0 {
return exact_match;
}
match expr {
ast::Expr::Between {
lhs,
not,
start,
end,
} => {
let mut mask = 0;
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator);
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(start, operator);
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(end, operator);
mask
}
ast::Expr::Binary(lhs, op, rhs) => {
let mut mask = 0;
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator);
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator);
mask
}
ast::Expr::Case {
base,
when_then_pairs,
else_expr,
} => {
let mut mask = 0;
if let Some(base) = base {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(base, operator);
}
for (w, t) in when_then_pairs.iter() {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(w, operator);
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(t, operator);
}
if let Some(e) = else_expr {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator);
}
mask
}
ast::Expr::Cast { expr, type_name } => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr, operator,
)
}
ast::Expr::Collate(expr, collation) => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr, operator,
)
}
ast::Expr::DoublyQualified(schema, tbl, ident) => 0,
ast::Expr::Exists(_) => 0,
ast::Expr::FunctionCall {
name,
distinctness,
args,
order_by,
filter_over,
} => {
let mut mask = 0;
if let Some(args) = args {
for a in args.iter() {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(a, operator);
}
}
mask
}
ast::Expr::FunctionCallStar { name, filter_over } => 0,
ast::Expr::Id(_) => 0,
ast::Expr::InList { lhs, not, rhs } => {
let mut mask = 0;
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator);
if let Some(rhs) = rhs {
for r in rhs.iter() {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(r, operator);
}
}
mask
}
ast::Expr::InSelect { lhs, not, rhs } => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
lhs, operator,
)
}
ast::Expr::InTable {
lhs,
not,
rhs,
args,
} => 0,
ast::Expr::IsNull(expr) => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr, operator,
)
}
ast::Expr::Like {
lhs,
not,
op,
rhs,
escape,
} => {
let mut mask = 0;
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(lhs, operator);
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(rhs, operator);
mask
}
ast::Expr::Literal(_) => 0,
ast::Expr::Name(_) => 0,
ast::Expr::NotNull(expr) => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr, operator,
)
}
ast::Expr::Parenthesized(expr) => {
let mut mask = 0;
for e in expr.iter() {
mask |= find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(e, operator);
}
mask
}
ast::Expr::Qualified(_, _) => 0,
ast::Expr::Raise(_, _) => 0,
ast::Expr::Subquery(_) => 0,
ast::Expr::Unary(op, expr) => {
find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(
expr, operator,
)
}
ast::Expr::Variable(_) => 0,
}
}
/**
* This function is used to find all the expressions that are shared between the parent operator and the child operators.
* If an expression is shared between the parent and child operators, then the parent operator should not recompute the expression.
* Instead, it should use the result of the expression that was computed by the child operator.
*/
fn find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(
operator: &Operator,
expr_result_cache: &mut ExpressionResultCache,
) {
match operator {
Operator::Aggregate {
source,
aggregates,
group_by,
..
} => {
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(
source, expr_result_cache,
)
}
Operator::Filter { .. } => unreachable!(),
Operator::SeekRowid { .. } => {}
Operator::Limit { source, .. } => {
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache)
}
Operator::Join { .. } => {}
Operator::Order { source, key, .. } => {
let mut idx = 0;
for (expr, _) in key.iter() {
let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(&expr, source);
if result != 0 {
expr_result_cache.set_precomputation_key(
operator.id(),
idx,
source.id(),
result,
);
}
idx += 1;
}
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache)
}
Operator::Projection { source, expressions, .. } => {
let mut idx = 0;
for expr in expressions.iter() {
match expr {
ProjectionColumn::Column(expr) => {
let result = find_indexes_of_all_result_columns_in_operator_that_match_expr_either_fully_or_partially(&expr, source);
if result != 0 {
expr_result_cache.set_precomputation_key(
operator.id(),
idx,
source.id(),
result,
);
}
}
_ => {}
}
idx += 1;
}
find_shared_expressions_in_child_operators_and_mark_them_so_that_the_parent_operator_doesnt_recompute_them(source, expr_result_cache)
}
Operator::Scan { .. } => {}
Operator::Nothing => {}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConstantPredicate {
AlwaysTrue,
@@ -762,7 +1145,3 @@ impl TakeOwnership for Operator {
std::mem::replace(self, Operator::Nothing)
}
}
fn replace_with<T: TakeOwnership>(expr: &mut T, mut replacement: T) {
*expr = replacement.take_ownership();
}

View File

@@ -43,6 +43,7 @@ pub enum Operator {
id: usize,
source: Box<Operator>,
aggregates: Vec<Aggregate>,
group_by: Option<Vec<ast::Expr>>,
step: usize,
},
// Filter operator
@@ -154,7 +155,11 @@ impl ProjectionColumn {
impl Operator {
pub fn column_count(&self, referenced_tables: &[(Rc<BTreeTable>, String)]) -> usize {
match self {
Operator::Aggregate { aggregates, .. } => aggregates.len(),
Operator::Aggregate {
group_by,
aggregates,
..
} => aggregates.len() + group_by.as_ref().map_or(0, |g| g.len()),
Operator::Filter { source, .. } => source.column_count(referenced_tables),
Operator::SeekRowid { table, .. } => table.columns.len(),
Operator::Limit { source, .. } => source.column_count(referenced_tables),
@@ -173,8 +178,29 @@ impl Operator {
pub fn column_names(&self) -> Vec<String> {
match self {
Operator::Aggregate { .. } => {
todo!();
Operator::Aggregate {
aggregates,
group_by,
..
} => {
let mut names = vec![];
for agg in aggregates.iter() {
names.push(agg.func.to_string().to_string());
}
if let Some(group_by) = group_by {
for expr in group_by.iter() {
match expr {
ast::Expr::Id(ident) => names.push(ident.0.clone()),
ast::Expr::Qualified(tbl, ident) => {
names.push(format!("{}.{}", tbl.0, ident.0))
}
e => names.push(e.to_string()),
}
}
}
names
}
Operator::Filter { source, .. } => source.column_names(),
Operator::SeekRowid { table, .. } => {
@@ -238,6 +264,7 @@ impl Display for Direction {
pub struct Aggregate {
pub func: AggFunc,
pub args: Vec<ast::Expr>,
pub original_expr: ast::Expr,
}
impl Display for Aggregate {

View File

@@ -14,7 +14,7 @@ pub struct OperatorIdCounter {
impl OperatorIdCounter {
pub fn new() -> Self {
Self { id: 0 }
Self { id: 1 }
}
pub fn get_next_id(&mut self) -> usize {
let id = self.id;
@@ -23,12 +23,54 @@ impl OperatorIdCounter {
}
}
fn resolve_aggregates(expr: &ast::Expr, aggs: &mut Vec<Aggregate>) {
match expr {
ast::Expr::FunctionCall { name, args, .. } => {
let args_count = if let Some(args) = &args {
args.len()
} else {
0
};
match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), args_count) {
Ok(Func::Agg(f)) => aggs.push(Aggregate {
func: f,
args: args.clone().unwrap_or_default(),
original_expr: expr.clone(),
}),
_ => {
if let Some(args) = args {
for arg in args.iter() {
resolve_aggregates(&arg, aggs);
}
}
}
}
}
ast::Expr::FunctionCallStar { name, .. } => {
match Func::resolve_function(normalize_ident(name.0.as_str()).as_str(), 0) {
Ok(Func::Agg(f)) => aggs.push(Aggregate {
func: f,
args: vec![],
original_expr: expr.clone(),
}),
_ => {}
}
}
ast::Expr::Binary(lhs, _, rhs) => {
resolve_aggregates(lhs, aggs);
resolve_aggregates(rhs, aggs);
}
_ => {}
}
}
pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<Plan> {
match select.body.select {
ast::OneSelect::Select {
columns,
from,
where_clause,
group_by,
..
} => {
let col_count = columns.len();
@@ -53,21 +95,17 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
};
}
// Parse the SELECT clause to either a projection or an aggregation
// depending on the presence of aggregate functions.
// Since GROUP BY is not supported yet, mixing aggregate and non-aggregate
// columns is not allowed.
//
// If there are aggregate functions, we aggregate + project the columns.
// If there are no aggregate functions, we can simply project the columns.
// For a simple SELECT *, the projection operator is skipped.
// For a simple SELECT *, the projection operator is skipped as well.
let is_select_star = col_count == 1 && matches!(columns[0], ast::ResultColumn::Star);
if !is_select_star {
let mut aggregate_expressions = Vec::new();
let mut scalar_expressions = Vec::with_capacity(col_count);
let mut projection_expressions = Vec::with_capacity(col_count);
for column in columns.clone() {
match column {
ast::ResultColumn::Star => {
scalar_expressions.push(ProjectionColumn::Star);
projection_expressions.push(ProjectionColumn::Star);
}
ast::ResultColumn::TableStar(name) => {
let name_normalized = normalize_ident(name.0.as_str());
@@ -79,89 +117,98 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
crate::bail_parse_error!("Table {} not found", name.0);
}
let (table, identifier) = referenced_table.unwrap();
scalar_expressions.push(ProjectionColumn::TableStar(
projection_expressions.push(ProjectionColumn::TableStar(
table.clone(),
identifier.clone(),
));
}
ast::ResultColumn::Expr(expr, _) => match expr {
ast::Expr::FunctionCall {
name,
distinctness,
args,
filter_over,
order_by,
} => {
let args_count = if let Some(args) = &args {
args.len()
} else {
0
};
match Func::resolve_function(
normalize_ident(name.0.as_str()).as_str(),
args_count,
) {
Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate {
func: f,
args: args.unwrap(),
}),
Ok(_) => {
scalar_expressions.push(ProjectionColumn::Column(
ast::Expr::FunctionCall {
name,
distinctness,
args,
filter_over,
order_by,
},
));
ast::ResultColumn::Expr(expr, _) => {
projection_expressions.push(ProjectionColumn::Column(expr.clone()));
match expr.clone() {
ast::Expr::FunctionCall {
name,
distinctness,
args,
filter_over,
order_by,
} => {
let args_count = if let Some(args) = &args {
args.len()
} else {
0
};
match Func::resolve_function(
normalize_ident(name.0.as_str()).as_str(),
args_count,
) {
Ok(Func::Agg(f)) => {
aggregate_expressions.push(Aggregate {
func: f,
args: args.unwrap(),
original_expr: expr.clone(),
});
}
Ok(_) => {
resolve_aggregates(&expr, &mut aggregate_expressions);
}
_ => {}
}
_ => {}
}
}
ast::Expr::FunctionCallStar { name, filter_over } => {
match Func::resolve_function(
normalize_ident(name.0.as_str()).as_str(),
0,
) {
Ok(Func::Agg(f)) => aggregate_expressions.push(Aggregate {
func: f,
args: vec![],
}),
Ok(Func::Scalar(_)) => {
scalar_expressions.push(ProjectionColumn::Column(
ast::Expr::FunctionCallStar { name, filter_over },
));
ast::Expr::FunctionCallStar { name, filter_over } => {
match Func::resolve_function(
normalize_ident(name.0.as_str()).as_str(),
0,
) {
Ok(Func::Agg(f)) => {
aggregate_expressions.push(Aggregate {
func: f,
args: vec![],
original_expr: expr.clone(),
});
}
_ => {}
}
_ => {}
}
ast::Expr::Binary(lhs, _, rhs) => {
resolve_aggregates(&lhs, &mut aggregate_expressions);
resolve_aggregates(&rhs, &mut aggregate_expressions);
}
_ => {}
}
_ => {
scalar_expressions.push(ProjectionColumn::Column(expr));
}
},
}
}
}
let mixing_aggregate_and_non_aggregate_columns =
!aggregate_expressions.is_empty() && aggregate_expressions.len() != col_count;
if mixing_aggregate_and_non_aggregate_columns {
crate::bail_parse_error!(
"mixing aggregate and non-aggregate columns is not allowed (GROUP BY is not supported)"
);
if let Some(group_by) = group_by.as_ref() {
if aggregate_expressions.is_empty() {
crate::bail_parse_error!(
"GROUP BY clause without aggregate functions is not allowed"
);
}
for scalar in projection_expressions.iter() {
match scalar {
ProjectionColumn::Column(_) => {}
_ => {
crate::bail_parse_error!(
"Only column references are allowed in the SELECT clause when using GROUP BY"
);
}
}
}
}
if !aggregate_expressions.is_empty() {
operator = Operator::Aggregate {
source: Box::new(operator),
aggregates: aggregate_expressions,
group_by: group_by.map(|g| g.exprs), // TODO: support HAVING
id: operator_id_counter.get_next_id(),
step: 0,
}
} else if !scalar_expressions.is_empty() {
}
if !projection_expressions.is_empty() {
operator = Operator::Projection {
source: Box::new(operator),
expressions: scalar_expressions,
expressions: projection_expressions,
id: operator_id_counter.get_next_id(),
step: 0,
};
@@ -171,17 +218,18 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
// Parse the ORDER BY clause
if let Some(order_by) = select.order_by {
let mut key = Vec::new();
for o in order_by {
// if the ORDER BY expression is a number, interpret it as an 1-indexed column number
// otherwise, interpret it normally as an expression
let expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = o.expr {
let expr = if let ast::Expr::Literal(ast::Literal::Numeric(num)) = &o.expr {
let column_number = num.parse::<usize>()?;
if column_number == 0 {
crate::bail_parse_error!("invalid column index: {}", column_number);
}
let maybe_result_column = columns.get(column_number - 1);
match maybe_result_column {
Some(ResultColumn::Expr(expr, _)) => expr.clone(),
Some(ResultColumn::Expr(e, _)) => e.clone(),
None => {
crate::bail_parse_error!("invalid column index: {}", column_number)
}
@@ -190,6 +238,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
} else {
o.expr
};
key.push((
expr,
o.order.map_or(Direction::Ascending, |o| match o {

View File

@@ -14,6 +14,6 @@ pub fn translate_select(
database_header: Rc<RefCell<DatabaseHeader>>,
) -> Result<Program> {
let select_plan = prepare_select_plan(schema, select)?;
let optimized_plan = optimize_plan(select_plan)?;
emit_program(database_header, optimized_plan)
let (optimized_plan, expr_result_cache) = optimize_plan(select_plan)?;
emit_program(database_header, optimized_plan, expr_result_cache)
}

View File

@@ -69,6 +69,21 @@ pub enum AggContext {
GroupConcat(OwnedValue),
}
const NULL: OwnedValue = OwnedValue::Null;
impl AggContext {
pub fn final_value(&self) -> &OwnedValue {
match self {
AggContext::Avg(acc, _count) => acc,
AggContext::Sum(acc) => acc,
AggContext::Count(count) => count,
AggContext::Max(max) => max.as_ref().unwrap_or(&NULL),
AggContext::Min(min) => min.as_ref().unwrap_or(&NULL),
AggContext::GroupConcat(s) => s,
}
}
}
impl std::cmp::PartialOrd<OwnedValue> for OwnedValue {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
@@ -93,6 +108,21 @@ impl std::cmp::PartialOrd<OwnedValue> for OwnedValue {
(OwnedValue::Null, OwnedValue::Null) => Some(std::cmp::Ordering::Equal),
(OwnedValue::Null, _) => Some(std::cmp::Ordering::Less),
(_, OwnedValue::Null) => Some(std::cmp::Ordering::Greater),
(OwnedValue::Agg(a), OwnedValue::Agg(b)) => a.partial_cmp(b),
_ => None,
}
}
}
impl std::cmp::PartialOrd<AggContext> for AggContext {
fn partial_cmp(&self, other: &AggContext) -> Option<std::cmp::Ordering> {
match (self, other) {
(AggContext::Avg(a, _), AggContext::Avg(b, _)) => a.partial_cmp(b),
(AggContext::Sum(a), AggContext::Sum(b)) => a.partial_cmp(b),
(AggContext::Count(a), AggContext::Count(b)) => a.partial_cmp(b),
(AggContext::Max(a), AggContext::Max(b)) => a.partial_cmp(b),
(AggContext::Min(a), AggContext::Min(b)) => a.partial_cmp(b),
(AggContext::GroupConcat(a), AggContext::GroupConcat(b)) => a.partial_cmp(b),
_ => None,
}
}

View File

@@ -288,6 +288,16 @@ impl ProgramBuilder {
assert!(*target_pc < 0);
*target_pc = to_offset;
}
Insn::Gosub { target_pc, .. } => {
assert!(*target_pc < 0);
*target_pc = to_offset;
}
Insn::Jump { target_pc_eq, .. } => {
// FIXME: this current implementation doesnt scale for insns that
// have potentially multiple label dependencies.
assert!(*target_pc_eq < 0);
*target_pc_eq = to_offset;
}
_ => {
todo!("missing resolve_label for {:?}", insn);
}
@@ -315,6 +325,10 @@ impl ProgramBuilder {
.unwrap()
}
pub fn resolve_cursor_to_table(&self, cursor_id: CursorID) -> Option<Table> {
self.cursor_ref[cursor_id].1.clone()
}
pub fn resolve_deferred_labels(&mut self) {
for i in 0..self.deferred_label_resolutions.len() {
let (label, insn_reference) = self.deferred_label_resolutions[i];

View File

@@ -28,14 +28,25 @@ pub fn insn_to_str(
0,
format!("r[{}]=r[{}]+r[{}]", dest, lhs, rhs),
),
Insn::Null { dest } => (
"Null",
Insn::Multiply { lhs, rhs, dest } => (
"Multiply",
*lhs as i32,
*rhs as i32,
*dest as i32,
0,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!("r[{}]=NULL", dest),
format!("r[{}]=r[{}]*r[{}]", dest, lhs, rhs),
),
Insn::Null { dest, dest_end } => (
"Null",
0,
*dest as i32,
dest_end.map_or(0, |end| end as i32),
OwnedValue::Text(Rc::new("".to_string())),
0,
dest_end.map_or(format!("r[{}]=NULL", dest), |end| {
format!("r[{}..{}]=NULL", dest, end)
}),
),
Insn::NullRow { cursor_id } => (
"NullRow",
@@ -55,6 +66,57 @@ pub fn insn_to_str(
0,
format!("r[{}]!=NULL -> goto {}", reg, target_pc),
),
Insn::Compare {
start_reg_a,
start_reg_b,
count,
} => (
"Compare",
*start_reg_a as i32,
*start_reg_b as i32,
*count as i32,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!(
"r[{}..{}]==r[{}..{}]",
start_reg_a,
start_reg_a + (count - 1),
start_reg_b,
start_reg_b + (count - 1)
),
),
Insn::Jump {
target_pc_lt,
target_pc_eq,
target_pc_gt,
} => (
"Jump",
*target_pc_lt as i32,
*target_pc_eq as i32,
*target_pc_gt as i32,
OwnedValue::Text(Rc::new("".to_string())),
0,
"".to_string(),
),
Insn::Move {
source_reg,
dest_reg,
count,
} => (
"Move",
*source_reg as i32,
*dest_reg as i32,
*count as i32,
OwnedValue::Text(Rc::new("".to_string())),
0,
format!(
"r[{}..{}]=r[{}..{}]",
dest_reg,
dest_reg + (count - 1),
source_reg,
source_reg + (count - 1)
),
),
Insn::IfPos {
reg,
target_pc,
@@ -348,6 +410,27 @@ pub fn insn_to_str(
0,
"".to_string(),
),
Insn::Gosub {
target_pc,
return_reg,
} => (
"Gosub",
*return_reg as i32,
*target_pc as i32,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
"".to_string(),
),
Insn::Return { return_reg } => (
"Return",
*return_reg as i32,
0,
0,
OwnedValue::Text(Rc::new("".to_string())),
0,
"".to_string(),
),
Insn::Integer { value, dest } => (
"Integer",
*value as i32,
@@ -478,7 +561,11 @@ pub fn insn_to_str(
*cursor_id as i32,
*columns as i32,
0,
OwnedValue::Text(Rc::new(format!("k({},{})", columns, to_print.join(",")))),
OwnedValue::Text(Rc::new(format!(
"k({},{})",
order.values.len(),
to_print.join(",")
))),
0,
format!("cursor={}", cursor_id),
)

View File

@@ -70,9 +70,10 @@ pub enum Insn {
Init {
target_pc: BranchOffset,
},
// Set NULL in the given register.
// Write a NULL into register dest. If dest_end is Some, then also write NULL into register dest_end and every register in between dest and dest_end. If dest_end is not set, then only register dest is set to NULL.
Null {
dest: usize,
dest_end: Option<usize>,
},
// Move the cursor P1 to a null row. Any Column operations that occur while the cursor is on the null row will always write a NULL.
NullRow {
@@ -84,6 +85,30 @@ pub enum Insn {
rhs: usize,
dest: usize,
},
// Multiply two registers and store the result in a third register.
Multiply {
lhs: usize,
rhs: usize,
dest: usize,
},
// Compare two vectors of registers in reg(P1)..reg(P1+P3-1) (call this vector "A") and in reg(P2)..reg(P2+P3-1) ("B"). Save the result of the comparison for use by the next Jump instruct.
Compare {
start_reg_a: usize,
start_reg_b: usize,
count: usize,
},
// Jump to the instruction at address P1, P2, or P3 depending on whether in the most recent Compare instruction the P1 vector was less than, equal to, or greater than the P2 vector, respectively.
Jump {
target_pc_lt: BranchOffset,
target_pc_eq: BranchOffset,
target_pc_gt: BranchOffset,
},
// Move the P3 values in register P1..P1+P3-1 over into registers P2..P2+P3-1. Registers P1..P1+P3-1 are left holding a NULL. It is an error for register ranges P1..P1+P3-1 and P2..P2+P3-1 to overlap. It is an error for P3 to be less than 1.
Move {
source_reg: usize,
dest_reg: usize,
count: usize,
},
// If the given register is a positive integer, decrement it by decrement_by and jump to the given PC.
IfPos {
reg: usize,
@@ -214,6 +239,17 @@ pub enum Insn {
target_pc: BranchOffset,
},
// Stores the current program counter into register 'return_reg' then jumps to address target_pc.
Gosub {
target_pc: BranchOffset,
return_reg: usize,
},
// Returns to the program counter stored in register 'return_reg'.
Return {
return_reg: usize,
},
// Write an integer value into a register.
Integer {
value: i64,
@@ -382,6 +418,7 @@ pub struct ProgramState {
pub pc: BranchOffset,
cursors: RefCell<BTreeMap<CursorID, Box<dyn Cursor>>>,
registers: Vec<OwnedValue>,
last_compare: Option<std::cmp::Ordering>,
ended_coroutine: bool, // flag to notify yield coroutine finished
regex_cache: HashMap<String, Regex>,
}
@@ -395,6 +432,7 @@ impl ProgramState {
pc: 0,
cursors,
registers,
last_compare: None,
ended_coroutine: false,
regex_cache: HashMap::new(),
}
@@ -464,14 +502,123 @@ impl Program {
(OwnedValue::Null, _) | (_, OwnedValue::Null) => {
state.registers[dest] = OwnedValue::Null;
}
(OwnedValue::Agg(aggctx), other) | (other, OwnedValue::Agg(aggctx)) => {
match other {
OwnedValue::Null => {
state.registers[dest] = OwnedValue::Null;
}
OwnedValue::Integer(i) => match aggctx.final_value() {
OwnedValue::Float(acc) => {
state.registers[dest] = OwnedValue::Float(acc + *i as f64);
}
OwnedValue::Integer(acc) => {
state.registers[dest] = OwnedValue::Integer(acc + i);
}
_ => {
todo!("{:?}", aggctx);
}
},
OwnedValue::Float(f) => match aggctx.final_value() {
OwnedValue::Float(acc) => {
state.registers[dest] = OwnedValue::Float(acc + f);
}
OwnedValue::Integer(acc) => {
state.registers[dest] = OwnedValue::Float(*acc as f64 + f);
}
_ => {
todo!("{:?}", aggctx);
}
},
OwnedValue::Agg(aggctx2) => {
let acc = aggctx.final_value();
let acc2 = aggctx2.final_value();
match (acc, acc2) {
(OwnedValue::Integer(acc), OwnedValue::Integer(acc2)) => {
state.registers[dest] = OwnedValue::Integer(acc + acc2);
}
(OwnedValue::Float(acc), OwnedValue::Float(acc2)) => {
state.registers[dest] = OwnedValue::Float(acc + acc2);
}
(OwnedValue::Integer(acc), OwnedValue::Float(acc2)) => {
state.registers[dest] =
OwnedValue::Float(*acc as f64 + acc2);
}
(OwnedValue::Float(acc), OwnedValue::Integer(acc2)) => {
state.registers[dest] =
OwnedValue::Float(acc + *acc2 as f64);
}
_ => {
todo!("{:?} {:?}", acc, acc2);
}
}
}
rest => unimplemented!("{:?}", rest),
}
}
_ => {
todo!();
}
}
state.pc += 1;
}
Insn::Null { dest } => {
state.registers[*dest] = OwnedValue::Null;
Insn::Multiply { lhs, rhs, dest } => {
let lhs = *lhs;
let rhs = *rhs;
let dest = *dest;
match (&state.registers[lhs], &state.registers[rhs]) {
(OwnedValue::Integer(lhs), OwnedValue::Integer(rhs)) => {
state.registers[dest] = OwnedValue::Integer(lhs * rhs);
}
(OwnedValue::Float(lhs), OwnedValue::Float(rhs)) => {
state.registers[dest] = OwnedValue::Float(lhs * rhs);
}
(OwnedValue::Null, _) | (_, OwnedValue::Null) => {
state.registers[dest] = OwnedValue::Null;
}
(OwnedValue::Agg(aggctx), other) | (other, OwnedValue::Agg(aggctx)) => {
match other {
OwnedValue::Null => {
state.registers[dest] = OwnedValue::Null;
}
OwnedValue::Integer(i) => match aggctx.final_value() {
OwnedValue::Float(acc) => {
state.registers[dest] = OwnedValue::Float(acc * *i as f64);
}
OwnedValue::Integer(acc) => {
state.registers[dest] = OwnedValue::Integer(acc * i);
}
_ => {
todo!("{:?}", aggctx);
}
},
OwnedValue::Float(f) => match aggctx.final_value() {
OwnedValue::Float(acc) => {
state.registers[dest] = OwnedValue::Float(acc * f);
}
OwnedValue::Integer(acc) => {
state.registers[dest] = OwnedValue::Float(*acc as f64 * f);
}
_ => {
todo!("{:?}", aggctx);
}
},
rest => unimplemented!("{:?}", rest),
}
}
others => {
todo!("{:?}", others);
}
}
state.pc += 1;
}
Insn::Null { dest, dest_end } => {
if let Some(dest_end) = dest_end {
for i in *dest..=*dest_end {
state.registers[i] = OwnedValue::Null;
}
} else {
state.registers[*dest] = OwnedValue::Null;
}
state.pc += 1;
}
Insn::NullRow { cursor_id } => {
@@ -479,6 +626,68 @@ impl Program {
cursor.set_null_flag(true);
state.pc += 1;
}
Insn::Compare {
start_reg_a,
start_reg_b,
count,
} => {
let start_reg_a = *start_reg_a;
let start_reg_b = *start_reg_b;
let count = *count;
if start_reg_a + count > start_reg_b {
return Err(LimboError::InternalError(
"Compare registers overlap".to_string(),
));
}
let mut cmp = None;
for i in 0..count {
let a = &state.registers[start_reg_a + i];
let b = &state.registers[start_reg_b + i];
cmp = Some(a.cmp(b));
if cmp != Some(std::cmp::Ordering::Equal) {
break;
}
}
state.last_compare = cmp;
state.pc += 1;
}
Insn::Jump {
target_pc_lt,
target_pc_eq,
target_pc_gt,
} => {
let cmp = state.last_compare.take();
if cmp.is_none() {
return Err(LimboError::InternalError(
"Jump without compare".to_string(),
));
}
let target_pc = match cmp.unwrap() {
std::cmp::Ordering::Less => *target_pc_lt,
std::cmp::Ordering::Equal => *target_pc_eq,
std::cmp::Ordering::Greater => *target_pc_gt,
};
assert!(target_pc >= 0);
state.pc = target_pc;
}
Insn::Move {
source_reg,
dest_reg,
count,
} => {
let source_reg = *source_reg;
let dest_reg = *dest_reg;
let count = *count;
for i in 0..count {
state.registers[dest_reg + i] = std::mem::replace(
&mut state.registers[source_reg + i],
OwnedValue::Null,
);
}
state.pc += 1;
}
Insn::IfPos {
reg,
target_pc,
@@ -788,6 +997,28 @@ impl Program {
assert!(*target_pc >= 0);
state.pc = *target_pc;
}
Insn::Gosub {
target_pc,
return_reg,
} => {
assert!(*target_pc >= 0);
state.registers[*return_reg] = OwnedValue::Integer(state.pc as i64 + 1);
state.pc = *target_pc;
}
Insn::Return { return_reg } => {
if let OwnedValue::Integer(pc) = state.registers[*return_reg] {
if pc < 0 {
return Err(LimboError::InternalError(
"Return register is negative".to_string(),
));
}
state.pc = pc;
} else {
return Err(LimboError::InternalError(
"Return register is not an integer".to_string(),
));
}
}
Insn::Integer { value, dest } => {
state.registers[*dest] = OwnedValue::Integer(*value);
state.pc += 1;
@@ -1572,6 +1803,7 @@ fn exec_length(reg: &OwnedValue) -> OwnedValue {
OwnedValue::Integer(reg.to_string().len() as i64)
}
OwnedValue::Blob(blob) => OwnedValue::Integer(blob.len() as i64),
OwnedValue::Agg(aggctx) => exec_length(&aggctx.final_value()),
_ => reg.to_owned(),
}
}

View File

@@ -97,7 +97,7 @@ impl Cursor for Sorter {
let _ = moved_before;
let key_fields = self.order.len();
let key = OwnedRecord::new(record.values[0..key_fields].to_vec());
self.insert(key, OwnedRecord::new(record.values[key_fields..].to_vec()));
self.insert(key, OwnedRecord::new(record.values.to_vec()));
Ok(CursorResult::Ok(()))
}

View File

@@ -11,8 +11,9 @@ source $testdir/join.test
source $testdir/json.test
source $testdir/like.test
source $testdir/orderby.test
source $testdir/groupby.test
source $testdir/pragma.test
source $testdir/scalar-functions.test
source $testdir/scalar-functions-datetime.test
source $testdir/select.test
source $testdir/where.test
source $testdir/where.test

107
testing/groupby.test Normal file
View File

@@ -0,0 +1,107 @@
#!/usr/bin/env tclsh
set testdir [file dirname $argv0]
source $testdir/tester.tcl
do_execsql_test group_by {
select u.first_name, sum(u.age) from users u group by u.first_name limit 10;
} {Aaron|2271
Abigail|890
Adam|1642
Adrian|439
Adriana|83
Adrienne|318
Aimee|33
Alan|551
Albert|369
Alec|247}
do_execsql_test group_by_two_joined_columns {
select u.first_name, p.name, sum(u.age) from users u join products p on u.id = p.id group by u.first_name, p.name limit 10;
} {Aimee|jeans|24
Cindy|cap|37
Daniel|coat|13
Edward|sweatshirt|15
Jamie|hat|94
Jennifer|sweater|33
Matthew|boots|77
Nicholas|shorts|89
Rachel|sneakers|63
Tommy|shirt|18}
do_execsql_test group_by_order_by {
select u.first_name, p.name, sum(u.age) from users u join products p on u.id = p.id group by u.first_name, p.name order by p.name limit 10;
} {Travis|accessories|22
Matthew|boots|77
Cindy|cap|37
Daniel|coat|13
Jamie|hat|94
Aimee|jeans|24
Tommy|shirt|18
Nicholas|shorts|89
Rachel|sneakers|63
Jennifer|sweater|33}
do_execsql_test group_by_order_by_aggregate {
select u.first_name, p.name, sum(u.age) from users u join products p on u.id = p.id group by u.first_name, p.name order by sum(u.age) limit 10;
} {Daniel|coat|13
Edward|sweatshirt|15
Tommy|shirt|18
Travis|accessories|22
Aimee|jeans|24
Jennifer|sweater|33
Cindy|cap|37
Rachel|sneakers|63
Matthew|boots|77
Nicholas|shorts|89}
do_execsql_test group_by_multiple_aggregates {
select u.first_name, sum(u.age), count(u.age) from users u group by u.first_name order by sum(u.age) limit 10;
} {Jaclyn|1|1
Mia|1|1
Kirsten|7|1
Kellie|8|1
Makayla|8|1
Yvette|9|1
Mckenzie|12|1
Grant|14|1
Mackenzie|15|1
Cesar|17|1}
do_execsql_test group_by_multiple_aggregates_2 {
select u.first_name, sum(u.age), group_concat(u.age) from users u group by u.first_name order by u.first_name limit 10;
} {Aaron|2271|52,46,17,69,71,91,34,30,97,81,47,98,45,69,97,18,38,26,98,60,33,97,42,43,43,22,18,75,56,67,83,58,82,28,22,72,5,58,96,32,55
Abigail|890|17,82,62,57,55,5,9,83,93,22,23,57,56,100,74,95
Adam|1642|34,23,10,11,46,40,2,57,51,80,65,24,15,84,59,6,34,100,32,79,57,5,77,34,30,19,54,74,89,98,72,91,90
Adrian|439|37,28,94,76,69,60,34,41
Adriana|83|83
Adrienne|318|79,74,82,33,50
Aimee|33|24,9
Alan|551|18,52,30,62,96,13,85,97,98
Albert|369|99,80,41,7,64,7,26,41,4
Alec|247|55,48,53,91}
do_execsql_test group_by_complex_order_by {
select u.first_name, group_concat(u.last_name) from users u group by u.first_name order by -1 * length(group_concat(u.last_name)) limit 1;
} {Michael|Love,Finley,Hurst,Molina,Williams,Brown,King,Whitehead,Ochoa,Davis,Rhodes,Mcknight,Reyes,Johnston,Smith,Young,Lopez,Roberts,Green,Cole,Lane,Wagner,Allen,Simpson,Schultz,Perry,Mendez,Gibson,Hale,Williams,Bradford,Johnson,Weber,Nunez,Walls,Gonzalez,Park,Blake,Vazquez,Garcia,Mathews,Pacheco,Johnson,Perez,Gibson,Sparks,Chapman,Tate,Dudley,Miller,Alvarado,Ward,Nguyen,Rosales,Flynn,Ball,Jones,Hoffman,Clarke,Rivera,Moore,Hardin,Dillon,Montgomery,Rodgers,Payne,Williams,Mueller,Hernandez,Ware,Yates,Grimes,Gilmore,Johnson,Clark,Rodriguez,Walters,Powell,Colon,Mccoy,Allen,Quinn,Dunn,Wilson,Thompson,Bradford,Hunter,Gilmore,Woods,Bennett,Collier,Ali,Herrera,Lawson,Garner,Perez,Brown,Pena,Allen,Davis,Washington,Jackson,Khan,Martinez,Blackwell,Lee,Parker,Lynn,Johnson,Benton,Leonard,Munoz,Alvarado,Mathews,Salazar,Nelson,Jones,Carpenter,Walter,Young,Coleman,Berry,Clark,Powers,Meyer,Lewis,Barton,Guzman,Schneider,Hernandez,Mclaughlin,Allen,Atkinson,Woods,Rivera,Jones,Gordon,Dennis,Yoder,Hunt,Vance,Nelson,Park,Barnes,Lang,Williams,Cervantes,Tran,Anderson,Todd,Gonzalez,Lowery,Sanders,Mccullough,Haley,Rogers,Perez,Watson,Weaver,Wise,Walter,Summers,Long,Chan,Williams,Mccoy,Duncan,Roy,West,Christensen,Cuevas,Garcia,Williams,Butler,Anderson,Armstrong,Villarreal,Boyer,Johnson,Dyer,Hurst,Wilkins,Mercer,Taylor,Montes,Mccarty,Gill,Rodriguez,Williams,Copeland,Hansen,Palmer,Alexander,White,Taylor,Bowers,Hughes,Gibbs,Myers,Kennedy,Sanchez,Bell,Wilson,Berry,Spears,Patton,Rose,Smith,Bowen,Nicholson,Stewart,Quinn,Powell,Delgado,Mills,Duncan,Phillips,Grant,Hatfield,Russell,Anderson,Reed,Mahoney,Mcguire,Ortega,Logan,Schmitt,Walker}
do_execsql_test group_by_complex_order_by_2 {
select u.first_name, sum(u.age) from users u group by u.first_name order by -1 * sum(u.age) limit 10;
} {Michael|11204
David|8758
Robert|8109
Jennifer|7700
John|7299
Christopher|6397
James|5921
Joseph|5711
Brian|5059
William|5047}
do_execsql_test group_by_and_binary_expression_that_depends_on_two_aggregates {
select u.first_name, sum(u.age) + count(1) from users u group by u.first_name limit 5;
} {Aaron|2312
Abigail|906
Adam|1675
Adrian|447
Adriana|84}