From 9c0e850622d578a66be8d510dc844fc1baa89c7e Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 10 Jul 2025 14:53:22 -0300 Subject: [PATCH 1/8] add connection benchmark --- core/benches/benchmark.rs | 49 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 5ff69cba1..e74cc0508 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -11,6 +11,53 @@ fn rusqlite_open() -> rusqlite::Connection { sqlite_conn } +fn bench_open(criterion: &mut Criterion) { + // https://github.com/tursodatabase/turso/issues/174 + // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features + let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err(); + + if !std::fs::exists("../testing/schema_5k.db").unwrap() { + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap(); + let conn = db.connect().unwrap(); + + for i in 0..5000 { + conn.execute( + format!("CREATE TABLE table_{i} ( id INTEGER PRIMARY KEY, name TEXT, value INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP )") + ).unwrap(); + } + } + + let mut group = criterion.benchmark_group("Open/Connect"); + + group.bench_function( + BenchmarkId::new("limbo_schema", ""), + |b| { + b.iter(|| { + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false) + .unwrap(); + black_box(db.connect().unwrap()); + }); + }, + ); + + if enable_rusqlite { + group.bench_function( + BenchmarkId::new("sqlite_schema", ""), + |b| { + b.iter(|| { + black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap()); + }); + }, + ); + } + + group.finish(); +} + fn bench_prepare_query(criterion: &mut Criterion) { // https://github.com/tursodatabase/turso/issues/174 // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features @@ -233,6 +280,6 @@ fn bench_execute_select_count(criterion: &mut Criterion) { criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count + targets = bench_open, bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count } criterion_main!(benches); From a479d0d5e86a28bc040273864ad0af0d92b9fa42 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Wed, 9 Jul 2025 18:55:43 -0300 Subject: [PATCH 2/8] prevent calling `to_uppercase` --- core/benches/benchmark.rs | 34 ++++++++++++++-------------------- core/schema.rs | 39 +++++++++++++++++---------------------- 2 files changed, 31 insertions(+), 42 deletions(-) diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index e74cc0508..1486a84b9 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -31,28 +31,22 @@ fn bench_open(criterion: &mut Criterion) { let mut group = criterion.benchmark_group("Open/Connect"); - group.bench_function( - BenchmarkId::new("limbo_schema", ""), - |b| { - b.iter(|| { - #[allow(clippy::arc_with_non_send_sync)] - let io = Arc::new(PlatformIO::new().unwrap()); - let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false) - .unwrap(); - black_box(db.connect().unwrap()); - }); - }, - ); + group.bench_function(BenchmarkId::new("limbo_schema", ""), |b| { + b.iter(|| { + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = + Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap(); + black_box(db.connect().unwrap()); + }); + }); if enable_rusqlite { - group.bench_function( - BenchmarkId::new("sqlite_schema", ""), - |b| { - b.iter(|| { - black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap()); - }); - }, - ); + group.bench_function(BenchmarkId::new("sqlite_schema", ""), |b| { + b.iter(|| { + black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap()); + }); + }); } group.finish(); diff --git a/core/schema.rs b/core/schema.rs index 55e453678..57ae6e7e0 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -419,43 +419,38 @@ fn create_table( // A column defined as exactly INTEGER PRIMARY KEY is a rowid alias, meaning that the rowid // and the value of this column are the same. // https://www.sqlite.org/lang_createtable.html#rowids_and_the_integer_primary_key - let mut typename_exactly_integer = false; - let (ty, ty_str) = match col_def.col_type { - Some(data_type) => { - let s = data_type.name.as_str(); - let ty_str = if matches!( - s.to_uppercase().as_str(), - "TEXT" | "INT" | "INTEGER" | "BLOB" | "REAL" - ) { - s.to_uppercase().to_string() - } else { - s.to_string() - }; + let ty_str = col_def + .col_type + .as_ref() + .map(|ast::Type { name, .. }| name.clone()) + .unwrap_or_default(); + let mut typename_exactly_integer = false; + let ty = match col_def.col_type { + Some(data_type) => { // https://www.sqlite.org/datatype3.html - let type_name = ty_str.to_uppercase(); + let mut type_name = data_type.name; + type_name.make_ascii_uppercase(); if type_name.contains("INT") { typename_exactly_integer = type_name == "INTEGER"; - (Type::Integer, ty_str) + Type::Integer } else if type_name.contains("CHAR") || type_name.contains("CLOB") || type_name.contains("TEXT") { - (Type::Text, ty_str) - } else if type_name.contains("BLOB") { - (Type::Blob, ty_str) - } else if type_name.is_empty() { - (Type::Blob, "".to_string()) + Type::Text + } else if type_name.contains("BLOB") || type_name.is_empty() { + Type::Blob } else if type_name.contains("REAL") || type_name.contains("FLOA") || type_name.contains("DOUB") { - (Type::Real, ty_str) + Type::Real } else { - (Type::Numeric, ty_str) + Type::Numeric } } - None => (Type::Null, "".to_string()), + None => Type::Null, }; let mut default = None; From c145577bce54f05034af1e3ca0a37a4f064bfaa4 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 10 Jul 2025 16:55:56 -0300 Subject: [PATCH 3/8] fix: use `ty_str` for SQL conversion --- core/schema.rs | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 57ae6e7e0..2fb1860c5 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -261,10 +261,7 @@ impl BTreeTable { sql.push_str(", "); } sql.push_str(column.name.as_ref().expect("column name is None")); - if !matches!(column.ty, Type::Null) { - sql.push(' '); - } - sql.push_str(&column.ty.to_string()); + sql.push_str(&column.ty_str.to_string()); if column.unique { sql.push_str(" UNIQUE"); @@ -1452,42 +1449,6 @@ mod tests { Ok(()) } - #[test] - pub fn test_col_type_string_int() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a InT);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "INT"); - Ok(()) - } - - #[test] - pub fn test_col_type_string_blob() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a bLoB);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "BLOB"); - Ok(()) - } - - #[test] - pub fn test_col_type_string_empty() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, ""); - Ok(()) - } - - #[test] - pub fn test_col_type_string_some_nonsense() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a someNonsenseName);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "someNonsenseName"); - Ok(()) - } - #[test] pub fn test_sqlite_schema() { let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INTEGER, sql TEXT)"#; From cc17211189edf23a3256e6f19f68103fd4c7c309 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Thu, 10 Jul 2025 19:21:32 -0300 Subject: [PATCH 4/8] direct btree calls --- core/lib.rs | 6 +- core/schema.rs | 151 ++++++++++++++++++++++++++++++++++++++++++++++++- core/util.rs | 12 ++-- 3 files changed, 160 insertions(+), 9 deletions(-) diff --git a/core/lib.rs b/core/lib.rs index f5cb80b57..5af423ac6 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -220,13 +220,15 @@ impl Database { let conn = db.connect()?; let schema_version = get_schema_version(&conn)?; schema.write().schema_version = schema_version; - let rows = conn.query("SELECT * FROM sqlite_schema")?; + let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); + let syms = conn.syms.borrow(); + if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, &syms, None) + schema.make_from_btree(None, conn.pager.clone(), &syms) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module diff --git a/core/schema.rs b/core/schema.rs index 2fb1860c5..9738a25d2 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,9 +1,15 @@ +use crate::result::LimboResult; +use crate::storage::btree::BTreeCursor; use crate::translate::collate::CollationSeq; use crate::translate::plan::SelectPlan; +use crate::types::CursorResult; +use crate::util::{module_args_from_sql, module_name_from_sql, UnparsedFromSqlIndex}; use crate::{util::normalize_ident, Result}; -use crate::{LimboError, VirtualTable}; +use crate::{LimboError, MvCursor, Pager, SymbolTable, VirtualTable}; use core::fmt; use fallible_iterator::FallibleIterator; +use std::cell::RefCell; +use std::collections::hash_map::Entry; use std::collections::{BTreeSet, HashMap}; use std::rc::Rc; use std::sync::Arc; @@ -134,6 +140,149 @@ impl Schema { pub fn indexes_enabled(&self) -> bool { self.indexes_enabled } + + /// Update [Schema] by scanning the first root page (sqlite_schema) + pub fn make_from_btree( + &mut self, + mv_cursor: Option>>, + pager: Rc, + syms: &SymbolTable, + ) -> Result<()> { + let mut cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), 1); + + let mut from_sql_indexes = Vec::with_capacity(10); + let mut automatic_indices: HashMap> = + HashMap::with_capacity(10); + + match pager.begin_read_tx()? { + CursorResult::Ok(v) => { + if matches!(v, LimboResult::Busy) { + return Err(LimboError::Busy); + } + } + CursorResult::IO => pager.io.run_once()?, + } + + match cursor.rewind()? { + CursorResult::Ok(v) => v, + CursorResult::IO => pager.io.run_once()?, + }; + + loop { + let Some(row) = (loop { + match cursor.record()? { + CursorResult::Ok(v) => break v, + CursorResult::IO => pager.io.run_once()?, + } + }) else { + break; + }; + + let ty = row.get::<&str>(0)?; + match ty { + "table" => { + let root_page = row.get::(3)?; + let sql = row.get::<&str>(4)?; + let create_virtual = "create virtual"; + if root_page == 0 + && sql[0..create_virtual.len()].eq_ignore_ascii_case(create_virtual) + { + let name: &str = row.get::<&str>(1)?; + // a virtual table is found in the sqlite_schema, but it's no + // longer in the in-memory schema. We need to recreate it if + // the module is loaded in the symbol table. + let vtab = if let Some(vtab) = syms.vtabs.get(name) { + vtab.clone() + } else { + let mod_name = module_name_from_sql(sql)?; + crate::VirtualTable::table( + Some(name), + mod_name, + module_args_from_sql(sql)?, + syms, + )? + }; + self.add_virtual_table(vtab); + continue; + } + + let table = BTreeTable::from_sql(sql, root_page as usize)?; + self.add_btree_table(Rc::new(table)); + } + "index" => { + let root_page = row.get::(3)?; + match row.get::<&str>(4) { + Ok(sql) => { + from_sql_indexes.push(UnparsedFromSqlIndex { + table_name: row.get::<&str>(2)?.to_string(), + root_page: root_page as usize, + sql: sql.to_string(), + }); + } + _ => { + // Automatic index on primary key and/or unique constraint, e.g. + // table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b) + // index|sqlite_autoindex_foo_1|foo|3| + let index_name = row.get::<&str>(1)?.to_string(); + let table_name = row.get::<&str>(2)?.to_string(); + let root_page = row.get::(3)?; + match automatic_indices.entry(table_name) { + Entry::Vacant(e) => { + e.insert(vec![(index_name, root_page as usize)]); + } + Entry::Occupied(mut e) => { + e.get_mut().push((index_name, root_page as usize)); + } + }; + } + } + } + _ => {} + }; + + drop(row); + + match cursor.next()? { + CursorResult::IO => pager.io.run_once()?, + _ => {} + }; + } + + pager.end_read_tx()?; + + for unparsed_sql_from_index in from_sql_indexes { + if !self.indexes_enabled() { + self.table_set_has_index(&unparsed_sql_from_index.table_name); + } else { + let table = self + .get_btree_table(&unparsed_sql_from_index.table_name) + .unwrap(); + let index = Index::from_sql( + &unparsed_sql_from_index.sql, + unparsed_sql_from_index.root_page, + table.as_ref(), + )?; + self.add_index(Arc::new(index)); + } + } + + for automatic_index in automatic_indices { + if !self.indexes_enabled() { + self.table_set_has_index(&automatic_index.0); + } else { + let table = self.get_btree_table(&automatic_index.0).unwrap(); + let ret_index = Index::automatic_from_primary_key_and_unique( + table.as_ref(), + automatic_index.1, + )?; + for index in ret_index { + self.add_index(Arc::new(index)); + } + } + } + + Ok(()) + } } #[derive(Clone, Debug)] diff --git a/core/util.rs b/core/util.rs index a89620950..f96256e82 100644 --- a/core/util.rs +++ b/core/util.rs @@ -42,10 +42,10 @@ pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_"; /// Unparsed index that comes from a sql query, i.e not an automatic index /// /// CREATE INDEX idx ON table_name(sql) -struct UnparsedFromSqlIndex { - table_name: String, - root_page: usize, - sql: String, +pub struct UnparsedFromSqlIndex { + pub table_name: String, + pub root_page: usize, + pub sql: String, } pub fn parse_schema_rows( @@ -188,7 +188,7 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) } -fn module_name_from_sql(sql: &str) -> Result<&str> { +pub fn module_name_from_sql(sql: &str) -> Result<&str> { if let Some(start) = sql.find("USING") { let start = start + 6; // stop at the first space, semicolon, or parenthesis @@ -206,7 +206,7 @@ fn module_name_from_sql(sql: &str) -> Result<&str> { // CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...); // CREATE VIRTUAL TABLE table_name USING module_name; -fn module_args_from_sql(sql: &str) -> Result> { +pub fn module_args_from_sql(sql: &str) -> Result> { if !sql.contains('(') { return Ok(vec![]); } From c300a01120e9b9c10b5ae5374c9aea310e47c3c1 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Fri, 11 Jul 2025 02:05:56 -0300 Subject: [PATCH 5/8] fix: add space between column name and type --- core/schema.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/schema.rs b/core/schema.rs index 9738a25d2..cd5bbfb5b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -410,7 +410,11 @@ impl BTreeTable { sql.push_str(", "); } sql.push_str(column.name.as_ref().expect("column name is None")); - sql.push_str(&column.ty_str.to_string()); + + if !column.ty_str.is_empty() { + sql.push(' '); + sql.push_str(&column.ty_str); + } if column.unique { sql.push_str(" UNIQUE"); From b008c787b71d3635b372db1877cb24fa727601ca Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Fri, 11 Jul 2025 05:11:43 -0300 Subject: [PATCH 6/8] faster type substr comparison --- core/schema.rs | 54 +++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index cd5bbfb5b..db6225df7 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -577,28 +577,37 @@ fn create_table( let mut typename_exactly_integer = false; let ty = match col_def.col_type { - Some(data_type) => { + Some(data_type) => 'ty: { // https://www.sqlite.org/datatype3.html let mut type_name = data_type.name; type_name.make_ascii_uppercase(); - if type_name.contains("INT") { - typename_exactly_integer = type_name == "INTEGER"; - Type::Integer - } else if type_name.contains("CHAR") - || type_name.contains("CLOB") - || type_name.contains("TEXT") - { - Type::Text - } else if type_name.contains("BLOB") || type_name.is_empty() { - Type::Blob - } else if type_name.contains("REAL") - || type_name.contains("FLOA") - || type_name.contains("DOUB") - { - Type::Real - } else { - Type::Numeric + + if type_name.is_empty() { + break 'ty Type::Blob; } + + if type_name == "INTEGER" { + typename_exactly_integer = true; + break 'ty Type::Integer; + } + + if let Some(ty) = type_name.as_bytes().windows(3).find_map(|s| match s { + b"INT" => Some(Type::Integer), + _ => None, + }) { + break 'ty ty; + } + + if let Some(ty) = type_name.as_bytes().windows(4).find_map(|s| match s { + b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text), + b"BLOB" => Some(Type::Blob), + b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real), + _ => None, + }) { + break 'ty ty; + } + + Type::Numeric } None => Type::Null, }; @@ -609,22 +618,22 @@ fn create_table( let mut order = SortOrder::Asc; let mut unique = false; let mut collation = None; - for c_def in &col_def.constraints { - match &c_def.constraint { + for c_def in col_def.constraints { + match c_def.constraint { turso_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { order: o, .. } => { primary_key = true; if let Some(o) = o { - order = *o; + order = o; } } turso_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => { notnull = true; } turso_sqlite3_parser::ast::ColumnConstraint::Default(expr) => { - default = Some(expr.clone()) + default = Some(expr) } // TODO: for now we don't check Resolve type of unique turso_sqlite3_parser::ast::ColumnConstraint::Unique(on_conflict) => { @@ -636,7 +645,6 @@ fn create_table( turso_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => { collation = Some(CollationSeq::new(collation_name.0.as_str())?); } - // Collate _ => {} } } From b1341113d7174813ec59bc4ebcf734f4b1611149 Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Fri, 11 Jul 2025 05:26:14 -0300 Subject: [PATCH 7/8] clippy --- core/schema.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index db6225df7..0c3f8ab95 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -242,9 +242,8 @@ impl Schema { drop(row); - match cursor.next()? { - CursorResult::IO => pager.io.run_once()?, - _ => {} + if matches!(cursor.next()?, CursorResult::IO) { + pager.io.run_once()?; }; } From a1e418c999ec2259333f37914da2ace4a9e97f2b Mon Sep 17 00:00:00 2001 From: "Levy A." Date: Fri, 11 Jul 2025 05:32:18 -0300 Subject: [PATCH 8/8] fix tests --- core/schema.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/schema.rs b/core/schema.rs index 0c3f8ab95..9fe86851b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1605,13 +1605,13 @@ mod tests { let sql = r#"CREATE TABLE t1 (a InTeGeR);"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "INTEGER"); + assert_eq!(column.ty_str, "InTeGeR"); Ok(()) } #[test] pub fn test_sqlite_schema() { - let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INTEGER, sql TEXT)"#; + let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INT, sql TEXT)"#; let actual = sqlite_schema_table().to_sql(); assert_eq!(expected, actual); }