diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 5ff69cba1..1486a84b9 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -11,6 +11,47 @@ fn rusqlite_open() -> rusqlite::Connection { sqlite_conn } +fn bench_open(criterion: &mut Criterion) { + // https://github.com/tursodatabase/turso/issues/174 + // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features + let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err(); + + if !std::fs::exists("../testing/schema_5k.db").unwrap() { + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap(); + let conn = db.connect().unwrap(); + + for i in 0..5000 { + conn.execute( + format!("CREATE TABLE table_{i} ( id INTEGER PRIMARY KEY, name TEXT, value INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP )") + ).unwrap(); + } + } + + let mut group = criterion.benchmark_group("Open/Connect"); + + group.bench_function(BenchmarkId::new("limbo_schema", ""), |b| { + b.iter(|| { + #[allow(clippy::arc_with_non_send_sync)] + let io = Arc::new(PlatformIO::new().unwrap()); + let db = + Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap(); + black_box(db.connect().unwrap()); + }); + }); + + if enable_rusqlite { + group.bench_function(BenchmarkId::new("sqlite_schema", ""), |b| { + b.iter(|| { + black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap()); + }); + }); + } + + group.finish(); +} + fn bench_prepare_query(criterion: &mut Criterion) { // https://github.com/tursodatabase/turso/issues/174 // The rusqlite benchmark crashes on Mac M1 when using the flamegraph features @@ -233,6 +274,6 @@ fn bench_execute_select_count(criterion: &mut Criterion) { criterion_group! { name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count + targets = bench_open, bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count } criterion_main!(benches); diff --git a/core/lib.rs b/core/lib.rs index f5cb80b57..5af423ac6 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -220,13 +220,15 @@ impl Database { let conn = db.connect()?; let schema_version = get_schema_version(&conn)?; schema.write().schema_version = schema_version; - let rows = conn.query("SELECT * FROM sqlite_schema")?; + let mut schema = schema .try_write() .expect("lock on schema should succeed first try"); + let syms = conn.syms.borrow(); + if let Err(LimboError::ExtensionError(e)) = - parse_schema_rows(rows, &mut schema, &syms, None) + schema.make_from_btree(None, conn.pager.clone(), &syms) { // this means that a vtab exists and we no longer have the module loaded. we print // a warning to the user to load the module diff --git a/core/schema.rs b/core/schema.rs index 55e453678..9fe86851b 100644 --- a/core/schema.rs +++ b/core/schema.rs @@ -1,9 +1,15 @@ +use crate::result::LimboResult; +use crate::storage::btree::BTreeCursor; use crate::translate::collate::CollationSeq; use crate::translate::plan::SelectPlan; +use crate::types::CursorResult; +use crate::util::{module_args_from_sql, module_name_from_sql, UnparsedFromSqlIndex}; use crate::{util::normalize_ident, Result}; -use crate::{LimboError, VirtualTable}; +use crate::{LimboError, MvCursor, Pager, SymbolTable, VirtualTable}; use core::fmt; use fallible_iterator::FallibleIterator; +use std::cell::RefCell; +use std::collections::hash_map::Entry; use std::collections::{BTreeSet, HashMap}; use std::rc::Rc; use std::sync::Arc; @@ -134,6 +140,148 @@ impl Schema { pub fn indexes_enabled(&self) -> bool { self.indexes_enabled } + + /// Update [Schema] by scanning the first root page (sqlite_schema) + pub fn make_from_btree( + &mut self, + mv_cursor: Option>>, + pager: Rc, + syms: &SymbolTable, + ) -> Result<()> { + let mut cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), 1); + + let mut from_sql_indexes = Vec::with_capacity(10); + let mut automatic_indices: HashMap> = + HashMap::with_capacity(10); + + match pager.begin_read_tx()? { + CursorResult::Ok(v) => { + if matches!(v, LimboResult::Busy) { + return Err(LimboError::Busy); + } + } + CursorResult::IO => pager.io.run_once()?, + } + + match cursor.rewind()? { + CursorResult::Ok(v) => v, + CursorResult::IO => pager.io.run_once()?, + }; + + loop { + let Some(row) = (loop { + match cursor.record()? { + CursorResult::Ok(v) => break v, + CursorResult::IO => pager.io.run_once()?, + } + }) else { + break; + }; + + let ty = row.get::<&str>(0)?; + match ty { + "table" => { + let root_page = row.get::(3)?; + let sql = row.get::<&str>(4)?; + let create_virtual = "create virtual"; + if root_page == 0 + && sql[0..create_virtual.len()].eq_ignore_ascii_case(create_virtual) + { + let name: &str = row.get::<&str>(1)?; + // a virtual table is found in the sqlite_schema, but it's no + // longer in the in-memory schema. We need to recreate it if + // the module is loaded in the symbol table. + let vtab = if let Some(vtab) = syms.vtabs.get(name) { + vtab.clone() + } else { + let mod_name = module_name_from_sql(sql)?; + crate::VirtualTable::table( + Some(name), + mod_name, + module_args_from_sql(sql)?, + syms, + )? + }; + self.add_virtual_table(vtab); + continue; + } + + let table = BTreeTable::from_sql(sql, root_page as usize)?; + self.add_btree_table(Rc::new(table)); + } + "index" => { + let root_page = row.get::(3)?; + match row.get::<&str>(4) { + Ok(sql) => { + from_sql_indexes.push(UnparsedFromSqlIndex { + table_name: row.get::<&str>(2)?.to_string(), + root_page: root_page as usize, + sql: sql.to_string(), + }); + } + _ => { + // Automatic index on primary key and/or unique constraint, e.g. + // table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b) + // index|sqlite_autoindex_foo_1|foo|3| + let index_name = row.get::<&str>(1)?.to_string(); + let table_name = row.get::<&str>(2)?.to_string(); + let root_page = row.get::(3)?; + match automatic_indices.entry(table_name) { + Entry::Vacant(e) => { + e.insert(vec![(index_name, root_page as usize)]); + } + Entry::Occupied(mut e) => { + e.get_mut().push((index_name, root_page as usize)); + } + }; + } + } + } + _ => {} + }; + + drop(row); + + if matches!(cursor.next()?, CursorResult::IO) { + pager.io.run_once()?; + }; + } + + pager.end_read_tx()?; + + for unparsed_sql_from_index in from_sql_indexes { + if !self.indexes_enabled() { + self.table_set_has_index(&unparsed_sql_from_index.table_name); + } else { + let table = self + .get_btree_table(&unparsed_sql_from_index.table_name) + .unwrap(); + let index = Index::from_sql( + &unparsed_sql_from_index.sql, + unparsed_sql_from_index.root_page, + table.as_ref(), + )?; + self.add_index(Arc::new(index)); + } + } + + for automatic_index in automatic_indices { + if !self.indexes_enabled() { + self.table_set_has_index(&automatic_index.0); + } else { + let table = self.get_btree_table(&automatic_index.0).unwrap(); + let ret_index = Index::automatic_from_primary_key_and_unique( + table.as_ref(), + automatic_index.1, + )?; + for index in ret_index { + self.add_index(Arc::new(index)); + } + } + } + + Ok(()) + } } #[derive(Clone, Debug)] @@ -261,10 +409,11 @@ impl BTreeTable { sql.push_str(", "); } sql.push_str(column.name.as_ref().expect("column name is None")); - if !matches!(column.ty, Type::Null) { + + if !column.ty_str.is_empty() { sql.push(' '); + sql.push_str(&column.ty_str); } - sql.push_str(&column.ty.to_string()); if column.unique { sql.push_str(" UNIQUE"); @@ -419,43 +568,47 @@ fn create_table( // A column defined as exactly INTEGER PRIMARY KEY is a rowid alias, meaning that the rowid // and the value of this column are the same. // https://www.sqlite.org/lang_createtable.html#rowids_and_the_integer_primary_key - let mut typename_exactly_integer = false; - let (ty, ty_str) = match col_def.col_type { - Some(data_type) => { - let s = data_type.name.as_str(); - let ty_str = if matches!( - s.to_uppercase().as_str(), - "TEXT" | "INT" | "INTEGER" | "BLOB" | "REAL" - ) { - s.to_uppercase().to_string() - } else { - s.to_string() - }; + let ty_str = col_def + .col_type + .as_ref() + .map(|ast::Type { name, .. }| name.clone()) + .unwrap_or_default(); + let mut typename_exactly_integer = false; + let ty = match col_def.col_type { + Some(data_type) => 'ty: { // https://www.sqlite.org/datatype3.html - let type_name = ty_str.to_uppercase(); - if type_name.contains("INT") { - typename_exactly_integer = type_name == "INTEGER"; - (Type::Integer, ty_str) - } else if type_name.contains("CHAR") - || type_name.contains("CLOB") - || type_name.contains("TEXT") - { - (Type::Text, ty_str) - } else if type_name.contains("BLOB") { - (Type::Blob, ty_str) - } else if type_name.is_empty() { - (Type::Blob, "".to_string()) - } else if type_name.contains("REAL") - || type_name.contains("FLOA") - || type_name.contains("DOUB") - { - (Type::Real, ty_str) - } else { - (Type::Numeric, ty_str) + let mut type_name = data_type.name; + type_name.make_ascii_uppercase(); + + if type_name.is_empty() { + break 'ty Type::Blob; } + + if type_name == "INTEGER" { + typename_exactly_integer = true; + break 'ty Type::Integer; + } + + if let Some(ty) = type_name.as_bytes().windows(3).find_map(|s| match s { + b"INT" => Some(Type::Integer), + _ => None, + }) { + break 'ty ty; + } + + if let Some(ty) = type_name.as_bytes().windows(4).find_map(|s| match s { + b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text), + b"BLOB" => Some(Type::Blob), + b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real), + _ => None, + }) { + break 'ty ty; + } + + Type::Numeric } - None => (Type::Null, "".to_string()), + None => Type::Null, }; let mut default = None; @@ -464,22 +617,22 @@ fn create_table( let mut order = SortOrder::Asc; let mut unique = false; let mut collation = None; - for c_def in &col_def.constraints { - match &c_def.constraint { + for c_def in col_def.constraints { + match c_def.constraint { turso_sqlite3_parser::ast::ColumnConstraint::PrimaryKey { order: o, .. } => { primary_key = true; if let Some(o) = o { - order = *o; + order = o; } } turso_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => { notnull = true; } turso_sqlite3_parser::ast::ColumnConstraint::Default(expr) => { - default = Some(expr.clone()) + default = Some(expr) } // TODO: for now we don't check Resolve type of unique turso_sqlite3_parser::ast::ColumnConstraint::Unique(on_conflict) => { @@ -491,7 +644,6 @@ fn create_table( turso_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => { collation = Some(CollationSeq::new(collation_name.0.as_str())?); } - // Collate _ => {} } } @@ -1453,49 +1605,13 @@ mod tests { let sql = r#"CREATE TABLE t1 (a InTeGeR);"#; let table = BTreeTable::from_sql(sql, 0)?; let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "INTEGER"); - Ok(()) - } - - #[test] - pub fn test_col_type_string_int() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a InT);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "INT"); - Ok(()) - } - - #[test] - pub fn test_col_type_string_blob() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a bLoB);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "BLOB"); - Ok(()) - } - - #[test] - pub fn test_col_type_string_empty() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, ""); - Ok(()) - } - - #[test] - pub fn test_col_type_string_some_nonsense() -> Result<()> { - let sql = r#"CREATE TABLE t1 (a someNonsenseName);"#; - let table = BTreeTable::from_sql(sql, 0)?; - let column = table.get_column("a").unwrap().1; - assert_eq!(column.ty_str, "someNonsenseName"); + assert_eq!(column.ty_str, "InTeGeR"); Ok(()) } #[test] pub fn test_sqlite_schema() { - let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INTEGER, sql TEXT)"#; + let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INT, sql TEXT)"#; let actual = sqlite_schema_table().to_sql(); assert_eq!(expected, actual); } diff --git a/core/util.rs b/core/util.rs index a89620950..f96256e82 100644 --- a/core/util.rs +++ b/core/util.rs @@ -42,10 +42,10 @@ pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_"; /// Unparsed index that comes from a sql query, i.e not an automatic index /// /// CREATE INDEX idx ON table_name(sql) -struct UnparsedFromSqlIndex { - table_name: String, - root_page: usize, - sql: String, +pub struct UnparsedFromSqlIndex { + pub table_name: String, + pub root_page: usize, + pub sql: String, } pub fn parse_schema_rows( @@ -188,7 +188,7 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool { strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2)) } -fn module_name_from_sql(sql: &str) -> Result<&str> { +pub fn module_name_from_sql(sql: &str) -> Result<&str> { if let Some(start) = sql.find("USING") { let start = start + 6; // stop at the first space, semicolon, or parenthesis @@ -206,7 +206,7 @@ fn module_name_from_sql(sql: &str) -> Result<&str> { // CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...); // CREATE VIRTUAL TABLE table_name USING module_name; -fn module_args_from_sql(sql: &str) -> Result> { +pub fn module_args_from_sql(sql: &str) -> Result> { if !sql.contains('(') { return Ok(vec![]); }