Merge 'Add basic support for ANALYZE statement' from Alex Miller

This is part 1 of N implementing #656
This permits only `ANALYZE <table_name>` to work, and all other forms
fail with a parse error (as documented in the tests).
On SQLite, ANALYZE generates:
```
sqlite> CREATE TABLE sqlite_stat1(tbl,idx,stat);
sqlite> CREATE TABLE iiftest(a int, b int, c int); sqlite> EXPLAIN ANALYZE iiftest;
addr  opcode         p1    p2    p3    p4             p5  comment
----  -------------  ----  ----  ----  -------------  --  -------------
0     Init           0     21    0                    0   Start at 21
1     Null           0     1     0                    0   r[1]=NULL
2     OpenWrite      3     4     0     3              0   root=4 iDb=0; sqlite_stat1
3     Rewind         3     9     0                    0
4       Column         3     0     2                    0   r[2]= cursor 3 column 0
5       Ne             3     8     2     BINARY-8       81  if r[2]!=r[3] goto 8
6       Rowid          3     4     0                    0   r[4]=sqlite_stat1.rowid
7       Delete         3     0     0     sqlite_stat1   2
8     Next           3     4     0                    1
9     OpenWrite      0     4     0     3              0   root=4 iDb=0; sqlite_stat1
10    OpenRead       4     2     0     3              0   root=2 iDb=0; iiftest
11    String8        0     11    0     iiftest        0   r[11]='iiftest'; iiftest
12    Count          4     13    0                    0   r[13]=count()
13    IfNot          13    18    0                    0
14    Null           0     12    0                    0   r[12]=NULL
15    MakeRecord     11    3     9     BBB            0   r[9]=mkrec(r[11..13])
16    NewRowid       0     5     0                    0   r[5]=rowid
17    Insert         0     9     5                    8   intkey=r[5] data=r[9]
18    LoadAnalysis   0     0     0                    0
19    Expire         0     0     0                    0
20    Halt           0     0     0                    0
21    Transaction    0     1     9     0              1   usesStmtJournal=0
22    String8        0     3     0     iiftest        0   r[3]='iiftest'
23    Goto           0     1     0                    0
```
Turso can now generate:
```
turso> create table sqlite_stat1(tbl,idx,stat);
turso> create table iiftest(a int, b int, c int);
turso> explain analyze iiftest;
addr  opcode             p1    p2    p3    p4             p5  comment
----  -----------------  ----  ----  ----  -------------  --  -------
0     Init               0     19    0                    0   Start at 19
1     Null               0     1     0                    0   r[1]=NULL
2     OpenWrite          0     2     0                    0   root=2; iDb=0
3     Rewind             0     9     0                    0   Rewind  sqlite_stat1
4       Column           0     0     2                    0   r[2]=sqlite_stat1.tbl
5       Ne               2     3     9                    0   if r[2]!=r[3] goto 9
6       RowId            0     4     0                    0   r[4]=sqlite_stat1.rowid
7       Delete           0     0     0     sqlite_stat1   0
8     Next               0     4     0                    0
9     OpenWrite          1     2     0                    0   root=2; iDb=0
10    OpenRead           2     3     0                    0   =iiftest, root=3, iDb=0
11    String8            0     7     0     iiftest        0   r[7]='iiftest'
12    Count              2     9     0                    0
13    IfNot              9     18    0                    0   if !r[9] goto 18
14    Null               0     8     0                    0   r[8]=NULL
15    MakeRecord         7     3     6                    0   r[6]=mkrec(r[7..9])
16    NewRowid           1     5     0                    0   r[5]=rowid
17    Insert             1     6     5     sqlite_stat1   0   intkey=r[5] data=r[6]
18    Halt               0     0     0                    0
19    String8            0     3     0     iiftest        0   r[3]='iiftest'
20    Goto               0     1     0                    0
```
Note the missing support for LoadAnalysis and Expire, but there's no
optimizer work done yet to leverage any gathered statistics yet anyway.

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #2740
This commit is contained in:
Pekka Enberg
2025-08-23 10:50:57 +03:00
committed by GitHub
3 changed files with 232 additions and 1 deletions

166
core/translate/analyze.rs Normal file
View File

@@ -0,0 +1,166 @@
use turso_parser::ast;
use crate::{
bail_parse_error,
schema::Schema,
util::normalize_ident,
vdbe::{
builder::{CursorType, ProgramBuilder},
insn::{Insn, RegisterOrLiteral::*},
},
Result,
};
pub fn translate_analyze(
target_opt: Option<ast::QualifiedName>,
schema: &Schema,
mut program: ProgramBuilder,
) -> Result<ProgramBuilder> {
let Some(target) = target_opt else {
bail_parse_error!("ANALYZE with no target is not supported");
};
let normalized = normalize_ident(target.name.as_str());
let Some(target_schema) = schema.get_table(&normalized) else {
bail_parse_error!("ANALYZE <schema_name> is not supported");
};
let Some(target_btree) = target_schema.btree() else {
bail_parse_error!("ANALYZE on index is not supported");
};
// This is emitted early because SQLite does, and thus generated VDBE matches a bit closer.
let null_reg = program.alloc_register();
program.emit_insn(Insn::Null {
dest: null_reg,
dest_end: None,
});
if let Some(sqlite_stat1) = schema.get_btree_table("sqlite_stat1") {
// sqlite_stat1 already exists, so we need to remove the row
// corresponding to the stats for the table which we're about to
// ANALYZE. SQLite implements this as a full table scan over
// sqlite_stat1 deleting any rows where the first column (table_name)
// is the targeted table.
let cursor_id = program.alloc_cursor_id(CursorType::BTreeTable(sqlite_stat1.clone()));
program.emit_insn(Insn::OpenWrite {
cursor_id,
root_page: Literal(sqlite_stat1.root_page),
db: 0,
});
let after_loop = program.allocate_label();
program.emit_insn(Insn::Rewind {
cursor_id,
pc_if_empty: after_loop,
});
let loophead = program.allocate_label();
program.preassign_label_to_next_insn(loophead);
let column_reg = program.alloc_register();
program.emit_insn(Insn::Column {
cursor_id,
column: 0,
dest: column_reg,
default: None,
});
let tablename_reg = program.alloc_register();
program.emit_insn(Insn::String8 {
value: target_schema.get_name().to_string(),
dest: tablename_reg,
});
program.mark_last_insn_constant();
// FIXME: The SQLite instruction says p4=BINARY-8 and p5=81. Neither are currently supported in Turso.
program.emit_insn(Insn::Ne {
lhs: column_reg,
rhs: tablename_reg,
target_pc: after_loop,
flags: Default::default(),
collation: None,
});
let rowid_reg = program.alloc_register();
program.emit_insn(Insn::RowId {
cursor_id,
dest: rowid_reg,
});
program.emit_insn(Insn::Delete {
cursor_id,
table_name: "sqlite_stat1".to_string(),
});
program.emit_insn(Insn::Next {
cursor_id,
pc_if_next: loophead,
});
program.preassign_label_to_next_insn(after_loop);
} else {
bail_parse_error!("ANALYZE without an existing sqlite_stat1 is not supported");
};
if target_schema.columns().iter().any(|c| c.primary_key) {
bail_parse_error!("ANALYZE on tables with primary key is not supported");
}
if !target_btree.has_rowid {
bail_parse_error!("ANALYZE on tables without rowid is not supported");
}
// Count the number of rows in the target table, and insert it into sqlite_stat1.
let sqlite_stat1 = schema
.get_btree_table("sqlite_stat1")
.expect("sqlite_stat1 either pre-existed or was just created");
let stat_cursor = program.alloc_cursor_id(CursorType::BTreeTable(sqlite_stat1.clone()));
program.emit_insn(Insn::OpenWrite {
cursor_id: stat_cursor,
root_page: Literal(sqlite_stat1.root_page),
db: 0,
});
let target_cursor = program.alloc_cursor_id(CursorType::BTreeTable(target_btree.clone()));
program.emit_insn(Insn::OpenRead {
cursor_id: target_cursor,
root_page: target_btree.root_page,
db: 0,
});
let rowid_reg = program.alloc_register();
let record_reg = program.alloc_register();
let tablename_reg = program.alloc_register();
let indexname_reg = program.alloc_register();
let count_reg = program.alloc_register();
program.emit_insn(Insn::String8 {
value: target_schema.get_name().to_string(),
dest: tablename_reg,
});
program.emit_insn(Insn::Count {
cursor_id: target_cursor,
target_reg: count_reg,
exact: true,
});
let after_insert = program.allocate_label();
program.emit_insn(Insn::IfNot {
reg: count_reg,
target_pc: after_insert,
jump_if_null: false,
});
program.emit_insn(Insn::Null {
dest: indexname_reg,
dest_end: None,
});
program.emit_insn(Insn::MakeRecord {
start_reg: tablename_reg,
count: 3,
dest_reg: record_reg,
index_name: None,
});
program.emit_insn(Insn::NewRowid {
cursor: stat_cursor,
rowid_reg,
prev_largest_reg: 0,
});
// FIXME: SQLite sets OPFLAG_APPEND on the insert, but that's not supported in turso right now.
// SQLite doesn't emit the table name, but like... why not?
program.emit_insn(Insn::Insert {
cursor: stat_cursor,
key_reg: rowid_reg,
record_reg,
flag: Default::default(),
table_name: "sqlite_stat1".to_string(),
});
program.preassign_label_to_next_insn(after_insert);
// FIXME: Emit LoadAnalysis
// FIXME: Emit Expire
Ok(program)
}

View File

@@ -9,6 +9,7 @@
pub(crate) mod aggregation;
pub(crate) mod alter;
pub(crate) mod analyze;
pub(crate) mod attach;
pub(crate) mod collate;
mod compound_select;
@@ -43,6 +44,7 @@ use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts, QueryMode};
use crate::vdbe::Program;
use crate::{bail_parse_error, Connection, Result, SymbolTable};
use alter::translate_alter_table;
use analyze::translate_analyze;
use index::{translate_create_index, translate_drop_index};
use insert::translate_insert;
use rollback::translate_rollback;
@@ -146,7 +148,7 @@ pub fn translate_inner(
ast::Stmt::AlterTable(alter) => {
translate_alter_table(alter, syms, schema, program, connection, input)?
}
ast::Stmt::Analyze { .. } => bail_parse_error!("ANALYZE not supported yet"),
ast::Stmt::Analyze { name } => translate_analyze(name, schema, program)?,
ast::Stmt::Attach { expr, db_name, key } => {
attach::translate_attach(&expr, &db_name, &key, schema, syms, program)?
}

63
testing/analyze.test Executable file
View File

@@ -0,0 +1,63 @@
#!/usr/bin/env tclsh
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# Things that do work:
do_execsql_test_on_specific_db {:memory:} empty-table {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
CREATE TABLE temp (a integer);
ANALYZE temp;
SELECT * FROM sqlite_stat1;
} {}
do_execsql_test_on_specific_db {:memory:} one-row-table {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
CREATE TABLE temp (a integer);
INSERT INTO temp VALUES (1);
ANALYZE temp;
SELECT * FROM sqlite_stat1;
} {temp||1}
do_execsql_test_on_specific_db {:memory:} analyze-deletes {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
INSERT INTO sqlite_stat1 VALUES ('temp', NULL, 10);
CREATE TABLE temp (a integer);
INSERT INTO temp VALUES (1);
ANALYZE temp;
SELECT * FROM sqlite_stat1;
} {temp||1}
# Things that don't work:
do_execsql_test_in_memory_error analyze-all-databases-fails {
ANALYZE;
} {.*ANALYZE.*not supported.*}
do_execsql_test_in_memory_error analyze-one-database-fails {
ANALYZE main;
} {.*ANALYZE.*not supported.*}
do_execsql_test_in_memory_error analyze-without-stat-table-fails {
CREATE TABLE temp (a integer);
ANALYZE temp;
} {.*ANALYZE.*not supported.*}
do_execsql_test_in_memory_error analyze-table-with-pk-fails {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
CREATE TABLE temp (a integer primary key);
ANALYZE temp;
} {.*ANALYZE.*not supported.*}
do_execsql_test_in_memory_error analyze-table-without-rowid-fails {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
CREATE TABLE temp (a integer primary key) WITHOUT ROWID;
ANALYZE temp;
} {.*ANALYZE.*not supported.*}
do_execsql_test_in_memory_error analyze-index-fails {
CREATE TABLE sqlite_stat1(tbl,idx,stat);
CREATE TABLE temp (a integer, b integer);
CREATE INDEX temp_b ON temp (b);
ANALYZE temp_b;
} {.*ANALYZE.*not supported.*}