mirror of
https://github.com/aljazceru/turso.git
synced 2025-12-30 14:34:22 +01:00
Merge 'parse_schema_rows optimizations' from Levy A.
- Also added a benchmark for opening databases, the main thing that is slowing `Database::open_file` is `parse_schema_rows`. - `to_uppercase` was being called multiple times, leaving a relevant mark on stack traces due to multiple allocations. `make_ascii_upper` reuses the memory and is faster due to not handling unicode characters (still compatible with sqlite). - Do direct btree calls instead of creating a program for updating `Schema` with `Schema::make_from_btree`. - Faster type substr comparison using fixed size `u8` slices. <img width="952" height="507" alt="image" src="https://github.com/user- attachments/assets/0d0c52ff-05a1-431e-a93d-e333b53c0bb8" /> Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com> Closes #2042
This commit is contained in:
@@ -11,6 +11,47 @@ fn rusqlite_open() -> rusqlite::Connection {
|
||||
sqlite_conn
|
||||
}
|
||||
|
||||
fn bench_open(criterion: &mut Criterion) {
|
||||
// https://github.com/tursodatabase/turso/issues/174
|
||||
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
|
||||
let enable_rusqlite = std::env::var("DISABLE_RUSQLITE_BENCHMARK").is_err();
|
||||
|
||||
if !std::fs::exists("../testing/schema_5k.db").unwrap() {
|
||||
#[allow(clippy::arc_with_non_send_sync)]
|
||||
let io = Arc::new(PlatformIO::new().unwrap());
|
||||
let db = Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap();
|
||||
let conn = db.connect().unwrap();
|
||||
|
||||
for i in 0..5000 {
|
||||
conn.execute(
|
||||
format!("CREATE TABLE table_{i} ( id INTEGER PRIMARY KEY, name TEXT, value INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP )")
|
||||
).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let mut group = criterion.benchmark_group("Open/Connect");
|
||||
|
||||
group.bench_function(BenchmarkId::new("limbo_schema", ""), |b| {
|
||||
b.iter(|| {
|
||||
#[allow(clippy::arc_with_non_send_sync)]
|
||||
let io = Arc::new(PlatformIO::new().unwrap());
|
||||
let db =
|
||||
Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap();
|
||||
black_box(db.connect().unwrap());
|
||||
});
|
||||
});
|
||||
|
||||
if enable_rusqlite {
|
||||
group.bench_function(BenchmarkId::new("sqlite_schema", ""), |b| {
|
||||
b.iter(|| {
|
||||
black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap());
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_prepare_query(criterion: &mut Criterion) {
|
||||
// https://github.com/tursodatabase/turso/issues/174
|
||||
// The rusqlite benchmark crashes on Mac M1 when using the flamegraph features
|
||||
@@ -233,6 +274,6 @@ fn bench_execute_select_count(criterion: &mut Criterion) {
|
||||
criterion_group! {
|
||||
name = benches;
|
||||
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets = bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count
|
||||
targets = bench_open, bench_prepare_query, bench_execute_select_1, bench_execute_select_rows, bench_execute_select_count
|
||||
}
|
||||
criterion_main!(benches);
|
||||
|
||||
@@ -220,13 +220,15 @@ impl Database {
|
||||
let conn = db.connect()?;
|
||||
let schema_version = get_schema_version(&conn)?;
|
||||
schema.write().schema_version = schema_version;
|
||||
let rows = conn.query("SELECT * FROM sqlite_schema")?;
|
||||
|
||||
let mut schema = schema
|
||||
.try_write()
|
||||
.expect("lock on schema should succeed first try");
|
||||
|
||||
let syms = conn.syms.borrow();
|
||||
|
||||
if let Err(LimboError::ExtensionError(e)) =
|
||||
parse_schema_rows(rows, &mut schema, &syms, None)
|
||||
schema.make_from_btree(None, conn.pager.clone(), &syms)
|
||||
{
|
||||
// this means that a vtab exists and we no longer have the module loaded. we print
|
||||
// a warning to the user to load the module
|
||||
|
||||
274
core/schema.rs
274
core/schema.rs
@@ -1,9 +1,15 @@
|
||||
use crate::result::LimboResult;
|
||||
use crate::storage::btree::BTreeCursor;
|
||||
use crate::translate::collate::CollationSeq;
|
||||
use crate::translate::plan::SelectPlan;
|
||||
use crate::types::CursorResult;
|
||||
use crate::util::{module_args_from_sql, module_name_from_sql, UnparsedFromSqlIndex};
|
||||
use crate::{util::normalize_ident, Result};
|
||||
use crate::{LimboError, VirtualTable};
|
||||
use crate::{LimboError, MvCursor, Pager, SymbolTable, VirtualTable};
|
||||
use core::fmt;
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use std::cell::RefCell;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
@@ -134,6 +140,148 @@ impl Schema {
|
||||
pub fn indexes_enabled(&self) -> bool {
|
||||
self.indexes_enabled
|
||||
}
|
||||
|
||||
/// Update [Schema] by scanning the first root page (sqlite_schema)
|
||||
pub fn make_from_btree(
|
||||
&mut self,
|
||||
mv_cursor: Option<Rc<RefCell<MvCursor>>>,
|
||||
pager: Rc<Pager>,
|
||||
syms: &SymbolTable,
|
||||
) -> Result<()> {
|
||||
let mut cursor = BTreeCursor::new_table(mv_cursor, pager.clone(), 1);
|
||||
|
||||
let mut from_sql_indexes = Vec::with_capacity(10);
|
||||
let mut automatic_indices: HashMap<String, Vec<(String, usize)>> =
|
||||
HashMap::with_capacity(10);
|
||||
|
||||
match pager.begin_read_tx()? {
|
||||
CursorResult::Ok(v) => {
|
||||
if matches!(v, LimboResult::Busy) {
|
||||
return Err(LimboError::Busy);
|
||||
}
|
||||
}
|
||||
CursorResult::IO => pager.io.run_once()?,
|
||||
}
|
||||
|
||||
match cursor.rewind()? {
|
||||
CursorResult::Ok(v) => v,
|
||||
CursorResult::IO => pager.io.run_once()?,
|
||||
};
|
||||
|
||||
loop {
|
||||
let Some(row) = (loop {
|
||||
match cursor.record()? {
|
||||
CursorResult::Ok(v) => break v,
|
||||
CursorResult::IO => pager.io.run_once()?,
|
||||
}
|
||||
}) else {
|
||||
break;
|
||||
};
|
||||
|
||||
let ty = row.get::<&str>(0)?;
|
||||
match ty {
|
||||
"table" => {
|
||||
let root_page = row.get::<i64>(3)?;
|
||||
let sql = row.get::<&str>(4)?;
|
||||
let create_virtual = "create virtual";
|
||||
if root_page == 0
|
||||
&& sql[0..create_virtual.len()].eq_ignore_ascii_case(create_virtual)
|
||||
{
|
||||
let name: &str = row.get::<&str>(1)?;
|
||||
// a virtual table is found in the sqlite_schema, but it's no
|
||||
// longer in the in-memory schema. We need to recreate it if
|
||||
// the module is loaded in the symbol table.
|
||||
let vtab = if let Some(vtab) = syms.vtabs.get(name) {
|
||||
vtab.clone()
|
||||
} else {
|
||||
let mod_name = module_name_from_sql(sql)?;
|
||||
crate::VirtualTable::table(
|
||||
Some(name),
|
||||
mod_name,
|
||||
module_args_from_sql(sql)?,
|
||||
syms,
|
||||
)?
|
||||
};
|
||||
self.add_virtual_table(vtab);
|
||||
continue;
|
||||
}
|
||||
|
||||
let table = BTreeTable::from_sql(sql, root_page as usize)?;
|
||||
self.add_btree_table(Rc::new(table));
|
||||
}
|
||||
"index" => {
|
||||
let root_page = row.get::<i64>(3)?;
|
||||
match row.get::<&str>(4) {
|
||||
Ok(sql) => {
|
||||
from_sql_indexes.push(UnparsedFromSqlIndex {
|
||||
table_name: row.get::<&str>(2)?.to_string(),
|
||||
root_page: root_page as usize,
|
||||
sql: sql.to_string(),
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
// Automatic index on primary key and/or unique constraint, e.g.
|
||||
// table|foo|foo|2|CREATE TABLE foo (a text PRIMARY KEY, b)
|
||||
// index|sqlite_autoindex_foo_1|foo|3|
|
||||
let index_name = row.get::<&str>(1)?.to_string();
|
||||
let table_name = row.get::<&str>(2)?.to_string();
|
||||
let root_page = row.get::<i64>(3)?;
|
||||
match automatic_indices.entry(table_name) {
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(vec![(index_name, root_page as usize)]);
|
||||
}
|
||||
Entry::Occupied(mut e) => {
|
||||
e.get_mut().push((index_name, root_page as usize));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
drop(row);
|
||||
|
||||
if matches!(cursor.next()?, CursorResult::IO) {
|
||||
pager.io.run_once()?;
|
||||
};
|
||||
}
|
||||
|
||||
pager.end_read_tx()?;
|
||||
|
||||
for unparsed_sql_from_index in from_sql_indexes {
|
||||
if !self.indexes_enabled() {
|
||||
self.table_set_has_index(&unparsed_sql_from_index.table_name);
|
||||
} else {
|
||||
let table = self
|
||||
.get_btree_table(&unparsed_sql_from_index.table_name)
|
||||
.unwrap();
|
||||
let index = Index::from_sql(
|
||||
&unparsed_sql_from_index.sql,
|
||||
unparsed_sql_from_index.root_page,
|
||||
table.as_ref(),
|
||||
)?;
|
||||
self.add_index(Arc::new(index));
|
||||
}
|
||||
}
|
||||
|
||||
for automatic_index in automatic_indices {
|
||||
if !self.indexes_enabled() {
|
||||
self.table_set_has_index(&automatic_index.0);
|
||||
} else {
|
||||
let table = self.get_btree_table(&automatic_index.0).unwrap();
|
||||
let ret_index = Index::automatic_from_primary_key_and_unique(
|
||||
table.as_ref(),
|
||||
automatic_index.1,
|
||||
)?;
|
||||
for index in ret_index {
|
||||
self.add_index(Arc::new(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -261,10 +409,11 @@ impl BTreeTable {
|
||||
sql.push_str(", ");
|
||||
}
|
||||
sql.push_str(column.name.as_ref().expect("column name is None"));
|
||||
if !matches!(column.ty, Type::Null) {
|
||||
|
||||
if !column.ty_str.is_empty() {
|
||||
sql.push(' ');
|
||||
sql.push_str(&column.ty_str);
|
||||
}
|
||||
sql.push_str(&column.ty.to_string());
|
||||
|
||||
if column.unique {
|
||||
sql.push_str(" UNIQUE");
|
||||
@@ -419,43 +568,47 @@ fn create_table(
|
||||
// A column defined as exactly INTEGER PRIMARY KEY is a rowid alias, meaning that the rowid
|
||||
// and the value of this column are the same.
|
||||
// https://www.sqlite.org/lang_createtable.html#rowids_and_the_integer_primary_key
|
||||
let mut typename_exactly_integer = false;
|
||||
let (ty, ty_str) = match col_def.col_type {
|
||||
Some(data_type) => {
|
||||
let s = data_type.name.as_str();
|
||||
let ty_str = if matches!(
|
||||
s.to_uppercase().as_str(),
|
||||
"TEXT" | "INT" | "INTEGER" | "BLOB" | "REAL"
|
||||
) {
|
||||
s.to_uppercase().to_string()
|
||||
} else {
|
||||
s.to_string()
|
||||
};
|
||||
let ty_str = col_def
|
||||
.col_type
|
||||
.as_ref()
|
||||
.map(|ast::Type { name, .. }| name.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut typename_exactly_integer = false;
|
||||
let ty = match col_def.col_type {
|
||||
Some(data_type) => 'ty: {
|
||||
// https://www.sqlite.org/datatype3.html
|
||||
let type_name = ty_str.to_uppercase();
|
||||
if type_name.contains("INT") {
|
||||
typename_exactly_integer = type_name == "INTEGER";
|
||||
(Type::Integer, ty_str)
|
||||
} else if type_name.contains("CHAR")
|
||||
|| type_name.contains("CLOB")
|
||||
|| type_name.contains("TEXT")
|
||||
{
|
||||
(Type::Text, ty_str)
|
||||
} else if type_name.contains("BLOB") {
|
||||
(Type::Blob, ty_str)
|
||||
} else if type_name.is_empty() {
|
||||
(Type::Blob, "".to_string())
|
||||
} else if type_name.contains("REAL")
|
||||
|| type_name.contains("FLOA")
|
||||
|| type_name.contains("DOUB")
|
||||
{
|
||||
(Type::Real, ty_str)
|
||||
} else {
|
||||
(Type::Numeric, ty_str)
|
||||
let mut type_name = data_type.name;
|
||||
type_name.make_ascii_uppercase();
|
||||
|
||||
if type_name.is_empty() {
|
||||
break 'ty Type::Blob;
|
||||
}
|
||||
|
||||
if type_name == "INTEGER" {
|
||||
typename_exactly_integer = true;
|
||||
break 'ty Type::Integer;
|
||||
}
|
||||
|
||||
if let Some(ty) = type_name.as_bytes().windows(3).find_map(|s| match s {
|
||||
b"INT" => Some(Type::Integer),
|
||||
_ => None,
|
||||
}) {
|
||||
break 'ty ty;
|
||||
}
|
||||
|
||||
if let Some(ty) = type_name.as_bytes().windows(4).find_map(|s| match s {
|
||||
b"CHAR" | b"CLOB" | b"TEXT" => Some(Type::Text),
|
||||
b"BLOB" => Some(Type::Blob),
|
||||
b"REAL" | b"FLOA" | b"DOUB" => Some(Type::Real),
|
||||
_ => None,
|
||||
}) {
|
||||
break 'ty ty;
|
||||
}
|
||||
|
||||
Type::Numeric
|
||||
}
|
||||
None => (Type::Null, "".to_string()),
|
||||
None => Type::Null,
|
||||
};
|
||||
|
||||
let mut default = None;
|
||||
@@ -464,22 +617,22 @@ fn create_table(
|
||||
let mut order = SortOrder::Asc;
|
||||
let mut unique = false;
|
||||
let mut collation = None;
|
||||
for c_def in &col_def.constraints {
|
||||
match &c_def.constraint {
|
||||
for c_def in col_def.constraints {
|
||||
match c_def.constraint {
|
||||
turso_sqlite3_parser::ast::ColumnConstraint::PrimaryKey {
|
||||
order: o,
|
||||
..
|
||||
} => {
|
||||
primary_key = true;
|
||||
if let Some(o) = o {
|
||||
order = *o;
|
||||
order = o;
|
||||
}
|
||||
}
|
||||
turso_sqlite3_parser::ast::ColumnConstraint::NotNull { .. } => {
|
||||
notnull = true;
|
||||
}
|
||||
turso_sqlite3_parser::ast::ColumnConstraint::Default(expr) => {
|
||||
default = Some(expr.clone())
|
||||
default = Some(expr)
|
||||
}
|
||||
// TODO: for now we don't check Resolve type of unique
|
||||
turso_sqlite3_parser::ast::ColumnConstraint::Unique(on_conflict) => {
|
||||
@@ -491,7 +644,6 @@ fn create_table(
|
||||
turso_sqlite3_parser::ast::ColumnConstraint::Collate { collation_name } => {
|
||||
collation = Some(CollationSeq::new(collation_name.0.as_str())?);
|
||||
}
|
||||
// Collate
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
@@ -1453,49 +1605,13 @@ mod tests {
|
||||
let sql = r#"CREATE TABLE t1 (a InTeGeR);"#;
|
||||
let table = BTreeTable::from_sql(sql, 0)?;
|
||||
let column = table.get_column("a").unwrap().1;
|
||||
assert_eq!(column.ty_str, "INTEGER");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_col_type_string_int() -> Result<()> {
|
||||
let sql = r#"CREATE TABLE t1 (a InT);"#;
|
||||
let table = BTreeTable::from_sql(sql, 0)?;
|
||||
let column = table.get_column("a").unwrap().1;
|
||||
assert_eq!(column.ty_str, "INT");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_col_type_string_blob() -> Result<()> {
|
||||
let sql = r#"CREATE TABLE t1 (a bLoB);"#;
|
||||
let table = BTreeTable::from_sql(sql, 0)?;
|
||||
let column = table.get_column("a").unwrap().1;
|
||||
assert_eq!(column.ty_str, "BLOB");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_col_type_string_empty() -> Result<()> {
|
||||
let sql = r#"CREATE TABLE t1 (a);"#;
|
||||
let table = BTreeTable::from_sql(sql, 0)?;
|
||||
let column = table.get_column("a").unwrap().1;
|
||||
assert_eq!(column.ty_str, "");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_col_type_string_some_nonsense() -> Result<()> {
|
||||
let sql = r#"CREATE TABLE t1 (a someNonsenseName);"#;
|
||||
let table = BTreeTable::from_sql(sql, 0)?;
|
||||
let column = table.get_column("a").unwrap().1;
|
||||
assert_eq!(column.ty_str, "someNonsenseName");
|
||||
assert_eq!(column.ty_str, "InTeGeR");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_sqlite_schema() {
|
||||
let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INTEGER, sql TEXT)"#;
|
||||
let expected = r#"CREATE TABLE sqlite_schema (type TEXT, name TEXT, tbl_name TEXT, rootpage INT, sql TEXT)"#;
|
||||
let actual = sqlite_schema_table().to_sql();
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
12
core/util.rs
12
core/util.rs
@@ -42,10 +42,10 @@ pub const PRIMARY_KEY_AUTOMATIC_INDEX_NAME_PREFIX: &str = "sqlite_autoindex_";
|
||||
/// Unparsed index that comes from a sql query, i.e not an automatic index
|
||||
///
|
||||
/// CREATE INDEX idx ON table_name(sql)
|
||||
struct UnparsedFromSqlIndex {
|
||||
table_name: String,
|
||||
root_page: usize,
|
||||
sql: String,
|
||||
pub struct UnparsedFromSqlIndex {
|
||||
pub table_name: String,
|
||||
pub root_page: usize,
|
||||
pub sql: String,
|
||||
}
|
||||
|
||||
pub fn parse_schema_rows(
|
||||
@@ -188,7 +188,7 @@ pub fn check_ident_equivalency(ident1: &str, ident2: &str) -> bool {
|
||||
strip_quotes(ident1).eq_ignore_ascii_case(strip_quotes(ident2))
|
||||
}
|
||||
|
||||
fn module_name_from_sql(sql: &str) -> Result<&str> {
|
||||
pub fn module_name_from_sql(sql: &str) -> Result<&str> {
|
||||
if let Some(start) = sql.find("USING") {
|
||||
let start = start + 6;
|
||||
// stop at the first space, semicolon, or parenthesis
|
||||
@@ -206,7 +206,7 @@ fn module_name_from_sql(sql: &str) -> Result<&str> {
|
||||
|
||||
// CREATE VIRTUAL TABLE table_name USING module_name(arg1, arg2, ...);
|
||||
// CREATE VIRTUAL TABLE table_name USING module_name;
|
||||
fn module_args_from_sql(sql: &str) -> Result<Vec<turso_ext::Value>> {
|
||||
pub fn module_args_from_sql(sql: &str) -> Result<Vec<turso_ext::Value>> {
|
||||
if !sql.contains('(') {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user