Merge 'parse_schema_rows optimizations' from Levy A.

- Also added a benchmark for opening databases, the main thing that is
slowing `Database::open_file` is `parse_schema_rows`.
- `to_uppercase` was being called multiple times, leaving a relevant
mark on stack traces due to multiple allocations. `make_ascii_upper`
reuses the memory and is faster due to not handling unicode characters
(still compatible with sqlite).
- Do direct btree calls instead of creating a program for updating
`Schema` with `Schema::make_from_btree`.
- Faster type substr comparison using fixed size `u8` slices.
<img width="952" height="507" alt="image" src="https://github.com/user-
attachments/assets/0d0c52ff-05a1-431e-a93d-e333b53c0bb8" />

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #2042
This commit is contained in:
Pekka Enberg
2025-07-12 09:11:20 +03:00
4 changed files with 247 additions and 88 deletions

View File

@@ -11,6 +11,47 @@ fn rusqlite_open() -> rusqlite::Connection {
sqlite_conn
}
fn bench_open(criterion: &mut Criterion) {
// https://github.com/tursodatabase/turso/issues/174
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err();
if !std::fs::exists("../testing/schema_5k.db").unwrap() {
#[allow(clippy::arc_with_non_send_sync)]
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap();
let conn = db.connect().unwrap();
for i in 0..5000 {
conn.execute(
format!("CREATE TABLE table_{i} ( id INTEGER PRIMARY KEY, name TEXT, value INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP )")
).unwrap();
}
}
let mut group = criterion.benchmark_group("Open/Connect");
group.bench_function(BenchmarkId::new("limbo_schema", ""), |b| {
b.iter(|| {
#[allow(clippy::arc_with_non_send_sync)]
let io = Arc::new(PlatformIO::new().unwrap());
let db =
Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap();
black_box(db.connect().unwrap());
});
});
if enable_rusqlite {
group.bench_function(BenchmarkId::new("sqlite_schema", ""), |b| {
b.iter(|| {
black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap());
});
});
}
group.finish();
}
fn bench_prepare_query(criterion: &mut Criterion) {
// https://github.com/tursodatabase/turso/issues/174
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
@@ -233,6 +274,6 @@ fn bench_execute_select_count(criterion: &mut Criterion) {
criterion_group! {
name = benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count
targets = bench_open, bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count
}
criterion_main!(benches);

View File

@@ -220,13 +220,15 @@ impl Database {
let conn = db.connect()?;
let schema_version = get_schema_version(&conn)?;
schema.write().schema_version = schema_version;
let rows = conn.query("SELECT * FROM sqlite_schema")?;
let mut schema = schema
.try_write()
.expect("lock on schema should succeed first try");
let syms = conn.syms.borrow();
if let Err(LimboError::ExtensionError(e)) =
parse_schema_rows(rows, &mut schema, &syms, None)
schema.make_from_btree(None, conn.pager.clone(), &syms)
{
// this means that a vtab exists and we no longer have the module loaded. we print
// a warning to the user to load the module

View File

@@ -1,9 +1,15 @@
use crate::result::LimboResult;
use crate::storage::btree::BTreeCursor;
use crate::translate::collate::CollationSeq;
use crate::translate::plan::SelectPlan;
use crate::types::CursorResult;
use crate::util::{module_args_from_sql, module_name_from_sql, UnparsedFromSqlIndex};
use crate::{util::normalize_ident, Result};
use crate::{LimboError, VirtualTable};
use crate::{LimboError, MvCursor, Pager, SymbolTable, VirtualTable};
use core::fmt;
use fallible_iterator::FallibleIterator;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::{BTreeSet, HashMap};
use std::rc::Rc;
use std::sync::Arc;
@@ -134,6 +140,148 @@ impl Schema {
pub fn indexes_enabled(&self) -> bool {
self.indexes_enabled
}
/// Update [Schema] by scanning the first root page (sqlite_schema)
pub fn make_from_btree(
&mut self,
mv_cursor: Option<Rc<RefCell<MvCursor>>>,
pager: Rc<Pager>,
syms: &SymbolTable,
) -> Result<()> {
let mut cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), 1);
let mut from_sql_indexes = Vec::with_capacity(10);
let mut automatic_indices: HashMap<String, Vec<(String, usize)>> =
HashMap::with_capacity(10);
match pager.begin_read_tx()? {
CursorResult::Ok(v) => {
if matches!(v, LimboResult::Busy) {
return Err(LimboError::Busy);
}
}
CursorResult::IO => pager.io.run_once()?,
}
match cursor.rewind()? {
CursorResult::Ok(v) => v,
CursorResult::IO => pager.io.run_once()?,
};
loop {
let Some(row) = (loop {
match cursor.record()? {
CursorResult::Ok(v) => break v,
CursorResult::IO => pager.io.run_once()?,
}
}) else {
break;
};
let ty = row.get::<&str>(0)?;
match ty {
"table" => {
let root_page = row.get::<i64>(3)?;
let sql = row.get::<&str>(4)?;
let create_virtual = "create virtual";
if root_page == 0
&& sql[0..create_virtual.len()].eq_ignore_ascii_case(create_virtual)
{
let name: &str = row.get::<&str>(1)?;
// a virtual table is found in the sqlite_schema, but it's no
// longer in the in-memory schema. We need to recreate it if
// the module is loaded in the symbol table.
let vtab = if let Some(vtab) = syms.vtabs.get(name) {
vtab.clone()
} else {
let mod_name = module_name_from_sql(sql)?;
crate::VirtualTable::table(
Some(name),
mod_name,
module_args_from_sql(sql)?,
syms,
)?
};
self.add_virtual_table(vtab);
continue;
}
let table = BTreeTable::from_sql(sql, root_page as usize)?;
self.add_btree_table(Rc::new(table));
}
"index" => {
let root_page = row.get::<i64>(3)?;
match row.get::<&str>(4) {
Ok(sql) => {
from_sql_indexes.push(UnparsedFromSqlIndex {
table_name: row.get::<&str>(2)?.to_string(),
root_page: root_page as usize,
sql: sql.to_string(),
});
}
_ => {
// Automatic index on primary key and/or unique constraint, e.g.
// table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b)
// index|sqlite_autoindex_foo_1|foo|3|
let index_name = row.get::<&str>(1)?.to_string();
let table_name = row.get::<&str>(2)?.to_string();
let root_page = row.get::<i64>(3)?;
match automatic_indices.entry(table_name) {
Entry::Vacant(e) => {
e.insert(vec![(index_name, root_page as usize)]);
}
Entry::Occupied(mut e) => {
e.get_mut().push((index_name, root_page as usize));
}
};
}
}
}
_ => {}
};
drop(row);
if matches!(cursor.next()?, CursorResult::IO) {
pager.io.run_once()?;
};
}
pager.end_read_tx()?;
for unparsed_sql_from_index in from_sql_indexes {
if !self.indexes_enabled() {
self.table_set_has_index(&unparsed_sql_from_index.table_name);
} else {
let table = self
.get_btree_table(&unparsed_sql_from_index.table_name)
.unwrap();
let index = Index::from_sql(
&unparsed_sql_from_index.sql,
unparsed_sql_from_index.root_page,
table.as_ref(),
)?;
self.add_index(Arc::new(index));
}
}
for automatic_index in automatic_indices {
if !self.indexes_enabled() {
self.table_set_has_index(&automatic_index.0);
} else {
let table = self.get_btree_table(&automatic_index.0).unwrap();
let ret_index = Index::automatic_from_primary_key_and_unique(
table.as_ref(),
automatic_index.1,
)?;
for index in ret_index {
self.add_index(Arc::new(index));
}
}
}
Ok(())
}
}
#[derive(Clone, Debug)]
@@ -261,10 +409,11 @@ impl BTreeTable {
sql.push_str(", ");
}
sql.push_str(column.name.as_ref().expect("column name is None"));
if !matches!(column.ty, Type::Null) {
if !column.ty_str.is_empty() {
sql.push(' ');
sql.push_str(&column.ty_str);
}
sql.push_str(&column.ty.to_string());
if column.unique {
sql.push_str(" UNIQUE");
@@ -419,43 +568,47 @@ fn create_table(
// A column defined as exactly INTEGER PRIMARY KEY is a rowid alias, meaning that the rowid
// and the value of this column are the same.
// https://www.sqlite.org/lang_createtable.html#rowids_and_the_integer_primary_key
let mut typename_exactly_integer = false;
let (ty, ty_str) = match col_def.col_type {
Some(data_type) => {
let s = data_type.name.as_str();
let ty_str = if matches!(
s.to_uppercase().as_str(),
"TEXT" | "INT" | "INTEGER" | "BLOB" | "REAL"
) {
s.to_uppercase().to_string()
} else {
s.to_string()
};
let ty_str = col_def
.col_type
.as_ref()
.map(|ast::Type { name, .. }| name.clone())
.unwrap_or_default();
let mut typename_exactly_integer = false;
let ty = match col_def.col_type {
Some(data_type) => 'ty: {
// https://www.sqlite.org/datatype3.html
let type_name = ty_str.to_uppercase();
if type_name.contains("INT") {
typename_exactly_integer = type_name == "INTEGER";
(Type::Integer, ty_str)
} else if type_name.contains("CHAR")
|| type_name.contains("CLOB")
|| type_name.contains("TEXT")
{
(Type::Text, ty_str)
} else if type_name.contains("BLOB") {
(Type::Blob, ty_str)
} else if type_name.is_empty() {
(Type::Blob, "".to_string())
} else if type_name.contains("REAL")
|| type_name.contains("FLOA")
|| type_name.contains("DOUB")
{
(Type::Real, ty_str)
} else {
(Type::Numeric, ty_str)
let mut type_name = data_type.name;
type_name.make_ascii_uppercase();
if type_name.is_empty() {
break 'ty Type::Blob;
}
if type_name == "INTEGER" {
typename_exactly_integer = true;
break 'ty Type::Integer;
}
if let Some(ty) = type_name.as_bytes().windows(3).find_map(|s| match s {
b"INT" => Some(Type::Integer),
_ => None,
}) {
break 'ty ty;
}
if let Some(ty) = type_name.as_bytes().windows(4).find_map(|s| match s {
b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text),
b"BLOB" => Some(Type::Blob),
b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real),
_ => None,
}) {
break 'ty ty;
}
Type::Numeric
}
None => (Type::Null, "".to_string()),
None => Type::Null,
};
let mut default = None;
@@ -464,22 +617,22 @@ fn create_table(
let mut order = SortOrder::Asc;
let mut unique = false;
let mut collation = None;
for c_def in &col_def.constraints {
match &c_def.constraint {
for c_def in col_def.constraints {
match c_def.constraint {
turso_sqlite3_parser::ast::ColumnConstraint::PrimaryKey {
order: o,
..
} => {
primary_key = true;
if let Some(o) = o {
order = *o;
order = o;
}
}
turso_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => {
notnull = true;
}
turso_sqlite3_parser::ast::ColumnConstraint::Default(expr) => {
default = Some(expr.clone())
default = Some(expr)
}
// TODO: for now we don't check Resolve type of unique
turso_sqlite3_parser::ast::ColumnConstraint::Unique(on_conflict) => {
@@ -491,7 +644,6 @@ fn create_table(
turso_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => {
collation = Some(CollationSeq::new(collation_name.0.as_str())?);
}
// Collate
_ => {}
}
}
@@ -1453,49 +1605,13 @@ mod tests {
let sql = r#"CREATE TABLE t1 (a InTeGeR);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let column = table.get_column("a").unwrap().1;
assert_eq!(column.ty_str, "INTEGER");
Ok(())
}
#[test]
pub fn test_col_type_string_int() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a InT);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let column = table.get_column("a").unwrap().1;
assert_eq!(column.ty_str, "INT");
Ok(())
}
#[test]
pub fn test_col_type_string_blob() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a bLoB);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let column = table.get_column("a").unwrap().1;
assert_eq!(column.ty_str, "BLOB");
Ok(())
}
#[test]
pub fn test_col_type_string_empty() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let column = table.get_column("a").unwrap().1;
assert_eq!(column.ty_str, "");
Ok(())
}
#[test]
pub fn test_col_type_string_some_nonsense() -> Result<()> {
let sql = r#"CREATE TABLE t1 (a someNonsenseName);"#;
let table = BTreeTable::from_sql(sql, 0)?;
let column = table.get_column("a").unwrap().1;
assert_eq!(column.ty_str, "someNonsenseName");
assert_eq!(column.ty_str, "InTeGeR");
Ok(())
}
#[test]
pub fn test_sqlite_schema() {
let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INTEGER, sql TEXT)"#;
let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INT, sql TEXT)"#;
let actual = sqlite_schema_table().to_sql();
assert_eq!(expected, actual);
}

View File

@@ -42,10 +42,10 @@ pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_";
/// Unparsed index that comes from a sql query, i.e not an automatic index
///
/// CREATE INDEX idx ON table_name(sql)
struct UnparsedFromSqlIndex {
table_name: String,
root_page: usize,
sql: String,
pub struct UnparsedFromSqlIndex {
pub table_name: String,
pub root_page: usize,
pub sql: String,
}
pub fn parse_schema_rows(
@@ -188,7 +188,7 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool {
strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2))
}
fn module_name_from_sql(sql: &str) -> Result<&str> {
pub fn module_name_from_sql(sql: &str) -> Result<&str> {
if let Some(start) = sql.find("USING") {
let start = start + 6;
// stop at the first space, semicolon, or parenthesis
@@ -206,7 +206,7 @@ fn module_name_from_sql(sql: &str) -> Result<&str> {
// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...);
// CREATE VIRTUAL TABLE table_name USING module_name;
fn module_args_from_sql(sql: &str) -> Result<Vec<turso_ext::Value>> {
pub fn module_args_from_sql(sql: &str) -> Result<Vec<turso_ext::Value>> {
if !sql.contains('(') {
return Ok(vec![]);
}