diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e79e1f0c9..f1d77fe2e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,6 +17,10 @@ If you are new to Rust, the following books are recommended reading: * Jim Blandy et al. [Programming Rust, 2nd Edition](https://www.oreilly.com/library/view/programming-rust-2nd/9781492052586/). 2021 * Steve Klabnik and Carol Nichols. [The Rust Programming Language](https://doc.rust-lang.org/book/#the-rust-programming-language). 2022 +Examples of contributing + +* [How to contribute a SQL function implementation](docs/internals/functions.md) + ## Finding things to work on The issue tracker has issues tagged with [good first issue](https://github.com/penberg/limbo/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22), diff --git a/bindings/python/build.rs b/bindings/python/build.rs index 8f01c1a67..0475124bb 100644 --- a/bindings/python/build.rs +++ b/bindings/python/build.rs @@ -1,4 +1,3 @@ fn main() { pyo3_build_config::use_pyo3_cfgs(); - println!("cargo::rustc-check-cfg=cfg(allocator, values(\"default\", \"mimalloc\"))"); } diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 1b3514032..764fcfcf5 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -1,11 +1,11 @@ use anyhow::Result; use errors::*; -use limbo_core::IO; use pyo3::prelude::*; use pyo3::types::PyList; use pyo3::types::PyTuple; +use std::cell::RefCell; use std::rc::Rc; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; mod errors; @@ -78,7 +78,7 @@ pub struct Cursor { #[pyo3(get)] rowcount: i64, - smt: Option>>, + smt: Option>>, } // SAFETY: The limbo_core crate guarantees that `Cursor` is thread-safe. @@ -90,26 +90,33 @@ impl Cursor { #[pyo3(signature = (sql, parameters=None))] pub fn execute(&mut self, sql: &str, parameters: Option>) -> Result { let stmt_is_dml = stmt_is_dml(sql); + let stmt_is_ddl = stmt_is_ddl(sql); - let conn_lock = - self.conn.conn.lock().map_err(|_| { - PyErr::new::("Failed to acquire connection lock") - })?; - - let statement = conn_lock.prepare(sql).map_err(|e| { + let statement = self.conn.conn.prepare(sql).map_err(|e| { PyErr::new::(format!("Failed to prepare statement: {:?}", e)) })?; - self.smt = Some(Arc::new(Mutex::new(statement))); + let stmt = Rc::new(RefCell::new(statement)); - // TODO: use stmt_is_dml to set rowcount - if stmt_is_dml { - return Err(PyErr::new::( - "DML statements (INSERT/UPDATE/DELETE) are not fully supported in this version", - ) - .into()); + // For DDL and DML statements, + // we need to execute the statement immediately + if stmt_is_ddl || stmt_is_dml { + loop { + match stmt.borrow_mut().step().map_err(|e| { + PyErr::new::(format!("Step error: {:?}", e)) + })? { + limbo_core::StepResult::IO => { + self.conn.io.run_once().map_err(|e| { + PyErr::new::(format!("IO error: {:?}", e)) + })?; + } + _ => break, + } + } } + self.smt = Some(stmt); + Ok(Cursor { smt: self.smt.clone(), conn: self.conn.clone(), @@ -121,11 +128,8 @@ impl Cursor { pub fn fetchone(&mut self, py: Python) -> Result> { if let Some(smt) = &self.smt { - let mut smt_lock = smt.lock().map_err(|_| { - PyErr::new::("Failed to acquire statement lock") - })?; loop { - match smt_lock.step().map_err(|e| { + match smt.borrow_mut().step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { limbo_core::StepResult::Row(row) => { @@ -157,14 +161,9 @@ impl Cursor { pub fn fetchall(&mut self, py: Python) -> Result> { let mut results = Vec::new(); - if let Some(smt) = &self.smt { - let mut smt_lock = smt.lock().map_err(|_| { - PyErr::new::("Failed to acquire statement lock") - })?; - loop { - match smt_lock.step().map_err(|e| { + match smt.borrow_mut().step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { limbo_core::StepResult::Row(row) => { @@ -221,11 +220,17 @@ fn stmt_is_dml(sql: &str) -> bool { sql.starts_with("INSERT") || sql.starts_with("UPDATE") || sql.starts_with("DELETE") } +fn stmt_is_ddl(sql: &str) -> bool { + let sql = sql.trim(); + let sql = sql.to_uppercase(); + sql.starts_with("CREATE") || sql.starts_with("ALTER") || sql.starts_with("DROP") +} + #[pyclass] #[derive(Clone)] pub struct Connection { - conn: Arc>>, - io: Arc, + conn: Rc, + io: Arc, } // SAFETY: The limbo_core crate guarantees that `Connection` is thread-safe. @@ -263,16 +268,24 @@ impl Connection { #[allow(clippy::arc_with_non_send_sync)] #[pyfunction] pub fn connect(path: &str) -> Result { - let io = Arc::new(limbo_core::PlatformIO::new().map_err(|e| { - PyErr::new::(format!("IO initialization failed: {:?}", e)) - })?); - let db = limbo_core::Database::open_file(io.clone(), path) - .map_err(|e| PyErr::new::(format!("Failed to open database: {:?}", e)))?; - let conn: Rc = db.connect(); - Ok(Connection { - conn: Arc::new(Mutex::new(conn)), - io, - }) + match path { + ":memory:" => { + let io: Arc = Arc::new(limbo_core::MemoryIO::new()?); + let db = limbo_core::Database::open_file(io.clone(), path).map_err(|e| { + PyErr::new::(format!("Failed to open database: {:?}", e)) + })?; + let conn: Rc = db.connect(); + Ok(Connection { conn, io }) + } + path => { + let io: Arc = Arc::new(limbo_core::PlatformIO::new()?); + let db = limbo_core::Database::open_file(io.clone(), path).map_err(|e| { + PyErr::new::(format!("Failed to open database: {:?}", e)) + })?; + let conn: Rc = db.connect(); + Ok(Connection { conn, io }) + } + } } fn row_to_py(py: Python, row: &limbo_core::Row) -> PyObject { diff --git a/bindings/python/tests/test_database.py b/bindings/python/tests/test_database.py index 601c67135..63241f19a 100644 --- a/bindings/python/tests/test_database.py +++ b/bindings/python/tests/test_database.py @@ -27,6 +27,20 @@ def test_fetchall_select_user_ids(provider): assert user_ids == [(1,), (2,)] +@pytest.mark.parametrize("provider", ["sqlite3", "limbo"]) +def test_in_memory_fetchone_select_all_users(provider): + conn = connect(provider, ":memory:") + cursor = conn.cursor() + cursor.execute("CREATE TABLE users (id INT PRIMARY KEY, username TEXT)") + cursor.execute("INSERT INTO users VALUES (1, 'alice')") + + cursor.execute("SELECT * FROM users") + + alice = cursor.fetchone() + assert alice + assert alice == (1, "alice") + + @pytest.mark.parametrize("provider", ["sqlite3", "limbo"]) def test_fetchone_select_all_users(provider): conn = connect(provider, "tests/database.db") diff --git a/core/function.rs b/core/function.rs index 8628d1d37..25949983a 100644 --- a/core/function.rs +++ b/core/function.rs @@ -17,9 +17,9 @@ impl Display for JsonFunc { f, "{}", match self { - JsonFunc::Json => "json".to_string(), - JsonFunc::JsonArray => "json_array".to_string(), - JsonFunc::JsonExtract => "json_extract".to_string(), + Self::Json => "json".to_string(), + Self::JsonArray => "json_array".to_string(), + Self::JsonExtract => "json_extract".to_string(), Self::JsonArrayLength => "json_array_length".to_string(), } ) diff --git a/core/io/linux.rs b/core/io/linux.rs index e765cc8ac..70de8ef79 100644 --- a/core/io/linux.rs +++ b/core/io/linux.rs @@ -11,6 +11,7 @@ use std::rc::Rc; use thiserror::Error; const MAX_IOVECS: usize = 128; +const SQPOLL_IDLE: u32 = 1000; #[derive(Debug, Error)] enum LinuxIOError { @@ -49,7 +50,13 @@ struct InnerLinuxIO { impl LinuxIO { pub fn new() -> Result { - let ring = io_uring::IoUring::new(MAX_IOVECS as u32)?; + let ring = match io_uring::IoUring::builder() + .setup_sqpoll(SQPOLL_IDLE) + .build(MAX_IOVECS as u32) + { + Ok(ring) => ring, + Err(_) => io_uring::IoUring::new(MAX_IOVECS as u32)?, + }; let inner = InnerLinuxIO { ring: WrappedIOUring { ring, diff --git a/core/json/de.rs b/core/json/de.rs index 8d12f2957..efc78994e 100644 --- a/core/json/de.rs +++ b/core/json/de.rs @@ -41,7 +41,7 @@ impl<'de> Deserializer<'de> { } } -impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { +impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { type Error = Error; fn deserialize_any(self, visitor: V) -> Result diff --git a/core/json/mod.rs b/core/json/mod.rs index 848605f3f..0878378a3 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -66,7 +66,7 @@ fn get_json_value(json_value: &OwnedValue) -> crate::Result { } }, OwnedValue::Blob(b) => { - if let Ok(json) = jsonb::from_slice(b) { + if let Ok(_json) = jsonb::from_slice(b) { todo!("jsonb to json conversion"); } else { crate::bail_parse_error!("malformed JSON"); @@ -137,7 +137,7 @@ pub fn json_array_length( }; match arr_val { - Val::Array(val) => (Ok(OwnedValue::Integer(val.len() as i64))), + Val::Array(val) => Ok(OwnedValue::Integer(val.len() as i64)), Val::Null => Ok(OwnedValue::Null), _ => Ok(OwnedValue::Integer(0)), } diff --git a/core/json/ser.rs b/core/json/ser.rs index 1afb8b497..3d5646584 100644 --- a/core/json/ser.rs +++ b/core/json/ser.rs @@ -30,7 +30,7 @@ impl Serializer { } } -impl<'a> ser::Serializer for &'a mut Serializer { +impl ser::Serializer for &mut Serializer { type Ok = (); type Error = Error; @@ -237,7 +237,7 @@ impl<'a> ser::Serializer for &'a mut Serializer { } } -impl<'a> ser::SerializeSeq for &'a mut Serializer { +impl ser::SerializeSeq for &mut Serializer { type Ok = (); type Error = Error; @@ -257,7 +257,7 @@ impl<'a> ser::SerializeSeq for &'a mut Serializer { } } -impl<'a> ser::SerializeTuple for &'a mut Serializer { +impl ser::SerializeTuple for &mut Serializer { type Ok = (); type Error = Error; @@ -273,7 +273,7 @@ impl<'a> ser::SerializeTuple for &'a mut Serializer { } } -impl<'a> ser::SerializeTupleStruct for &'a mut Serializer { +impl ser::SerializeTupleStruct for &mut Serializer { type Ok = (); type Error = Error; @@ -289,7 +289,7 @@ impl<'a> ser::SerializeTupleStruct for &'a mut Serializer { } } -impl<'a> ser::SerializeTupleVariant for &'a mut Serializer { +impl ser::SerializeTupleVariant for &mut Serializer { type Ok = (); type Error = Error; @@ -306,7 +306,7 @@ impl<'a> ser::SerializeTupleVariant for &'a mut Serializer { } } -impl<'a> ser::SerializeMap for &'a mut Serializer { +impl ser::SerializeMap for &mut Serializer { type Ok = (); type Error = Error; @@ -334,7 +334,7 @@ impl<'a> ser::SerializeMap for &'a mut Serializer { } } -impl<'a> ser::SerializeStruct for &'a mut Serializer { +impl ser::SerializeStruct for &mut Serializer { type Ok = (); type Error = Error; @@ -351,7 +351,7 @@ impl<'a> ser::SerializeStruct for &'a mut Serializer { } } -impl<'a> ser::SerializeStructVariant for &'a mut Serializer { +impl ser::SerializeStructVariant for &mut Serializer { type Ok = (); type Error = Error; diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 77f459968..ed9070514 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -1378,10 +1378,10 @@ impl BTreeCursor { PageType::IndexLeaf => todo!(), }; cbrk -= size; - if cbrk < first_cell as u64 || pc + size > usable_space { + if cbrk < first_cell || pc + size > usable_space { todo!("corrupt"); } - assert!(cbrk + size <= usable_space && cbrk >= first_cell as u64); + assert!(cbrk + size <= usable_space && cbrk >= first_cell); // set new pointer write_buf[cell_idx..cell_idx + 2].copy_from_slice(&(cbrk as u16).to_be_bytes()); // copy payload @@ -1394,7 +1394,7 @@ impl BTreeCursor { // if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){ // return SQLITE_CORRUPT_PAGE(pPage); // } - assert!(cbrk >= first_cell as u64); + assert!(cbrk >= first_cell); let write_buf = page.as_ptr(); // set new first byte of cell content @@ -1437,7 +1437,7 @@ impl BTreeCursor { // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) let mut free_space_bytes = - page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize; + page.unallocated_region_size() + page.num_frag_free_bytes() as usize; // #3 is computed by iterating over the freeblocks linked list let mut cur_freeblock_ptr = page.first_freeblock() as usize; diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index c9b66d98e..9d1c124da 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -241,12 +241,7 @@ fn emit_program_for_select( inner_loop_emit(&mut program, &mut plan, &mut metadata)?; // Clean up and close the main execution loop - close_loop( - &mut program, - &plan.source, - &mut metadata, - &plan.referenced_tables, - )?; + close_loop(&mut program, &plan.source, &mut metadata)?; if let Some(skip_loops_label) = skip_loops_label { program.resolve_label(skip_loops_label, program.offset()); @@ -338,12 +333,7 @@ fn emit_program_for_delete( emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?; // Clean up and close the main execution loop - close_loop( - &mut program, - &plan.source, - &mut metadata, - &plan.referenced_tables, - )?; + close_loop(&mut program, &plan.source, &mut metadata)?; if let Some(skip_loops_label) = skip_loops_label { program.resolve_label(skip_loops_label, program.offset()); @@ -663,7 +653,7 @@ fn open_loop( }); } - return Ok(()); + Ok(()) } SourceOperator::Scan { id, @@ -722,7 +712,7 @@ fn open_loop( } } - return Ok(()); + Ok(()) } SourceOperator::Search { id, @@ -905,11 +895,9 @@ fn open_loop( } } - return Ok(()); - } - SourceOperator::Nothing => { - return Ok(()); + Ok(()) } + SourceOperator::Nothing => Ok(()), } } @@ -978,14 +966,14 @@ fn inner_loop_emit( ); } // if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero. - return inner_loop_source_emit( + inner_loop_source_emit( program, &plan.result_columns, &plan.aggregates, metadata, InnerLoopEmitTarget::ResultRow { limit: plan.limit }, &plan.referenced_tables, - ); + ) } /// This is a helper function for inner_loop_emit, @@ -1111,7 +1099,6 @@ fn close_loop( program: &mut ProgramBuilder, source: &SourceOperator, metadata: &mut Metadata, - referenced_tables: &[BTreeTableReference], ) -> Result<()> { match source { SourceOperator::Join { @@ -1121,7 +1108,7 @@ fn close_loop( outer, .. } => { - close_loop(program, right, metadata, referenced_tables)?; + close_loop(program, right, metadata)?; if *outer { let lj_meta = metadata.left_joins.get(id).unwrap(); @@ -1168,7 +1155,7 @@ fn close_loop( assert!(program.offset() == jump_offset); } - close_loop(program, left, metadata, referenced_tables)?; + close_loop(program, left, metadata)?; Ok(()) } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index bbcd23b74..fcc211a91 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -2281,14 +2281,10 @@ pub fn translate_aggregation( let delimiter_reg = program.alloc_register(); let expr = &agg.args[0]; - let delimiter_expr: ast::Expr; - - match &agg.args[1] { - ast::Expr::Column { .. } => { - delimiter_expr = agg.args[1].clone(); - } + let delimiter_expr = match &agg.args[1] { + ast::Expr::Column { .. } => agg.args[1].clone(), ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + ast::Expr::Literal(ast::Literal::String(s.to_string())) } _ => crate::bail_parse_error!("Incorrect delimiter parameter"), }; @@ -2464,14 +2460,10 @@ pub fn translate_aggregation_groupby( let expr_reg = program.alloc_register(); let delimiter_reg = program.alloc_register(); - let delimiter_expr: ast::Expr; - - match &agg.args[1] { - ast::Expr::Column { .. } => { - delimiter_expr = agg.args[1].clone(); - } + let delimiter_expr = match &agg.args[1] { + ast::Expr::Column { .. } => agg.args[1].clone(), ast::Expr::Literal(ast::Literal::String(s)) => { - delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string())); + ast::Expr::Literal(ast::Literal::String(s.to_string())) } _ => crate::bail_parse_error!("Incorrect delimiter parameter"), }; diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 31b263195..50d0277f7 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -349,7 +349,7 @@ pub fn translate_insert( // Create new rowid if a) not provided by user or b) provided by user but is NULL program.emit_insn(Insn::NewRowid { cursor: cursor_id, - rowid_reg: rowid_reg, + rowid_reg, prev_largest_reg: 0, }); @@ -366,7 +366,7 @@ pub fn translate_insert( program.emit_insn_with_label_dependency( Insn::NotExists { cursor: cursor_id, - rowid_reg: rowid_reg, + rowid_reg, target_pc: make_record_label, }, make_record_label, diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 1463c0402..217241f2a 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -666,7 +666,7 @@ impl Optimizable for ast::Expr { if id.0.eq_ignore_ascii_case("false") { return Ok(Some(ConstantPredicate::AlwaysFalse)); } - return Ok(None); + Ok(None) } Self::Literal(lit) => match lit { ast::Literal::Null => Ok(Some(ConstantPredicate::AlwaysFalse)), diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 154f7e1c0..24b5a5d73 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -476,7 +476,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ Blob(&'a Vec), } -impl<'a> Display for Value<'a> { +impl Display for Value<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Null => write!(f, "NULL"), @@ -647,7 +647,8 @@ mod tests { #[test] fn test_serialize_float() { - let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]); + #[warn(clippy::approx_constant)] + let record = OwnedRecord::new(vec![OwnedValue::Float(3.15555)]); let mut buf = Vec::new(); record.serialize(&mut buf); @@ -660,7 +661,7 @@ mod tests { // Check that the bytes after the header can be interpreted as the float let float_bytes = &buf[header_length..header_length + size_of::()]; let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); - assert_eq!(float, 3.14159); + assert_eq!(float, 3.15555); // Check that buffer length is correct assert_eq!(buf.len(), header_length + size_of::()); } @@ -709,7 +710,7 @@ mod tests { let record = OwnedRecord::new(vec![ OwnedValue::Null, OwnedValue::Integer(42), - OwnedValue::Float(3.14), + OwnedValue::Float(3.15), OwnedValue::Text(LimboText::new(text.clone())), ]); let mut buf = Vec::new(); @@ -741,7 +742,7 @@ mod tests { let val_text = String::from_utf8(text_bytes.to_vec()).unwrap(); assert_eq!(val_int8, 42); - assert_eq!(val_float, 3.14); + assert_eq!(val_float, 3.15); assert_eq!(val_text, "test"); // Check that buffer length is correct diff --git a/core/vdbe/likeop.rs b/core/vdbe/likeop.rs index f4ef62f8d..ad69ba97c 100644 --- a/core/vdbe/likeop.rs +++ b/core/vdbe/likeop.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use regex::{Regex, RegexBuilder}; use crate::{types::OwnedValue, LimboError}; @@ -8,11 +10,9 @@ pub fn construct_like_escape_arg(escape_value: &OwnedValue) -> Result Ok(escape), - _ => { - return Result::Err(LimboError::Constraint( - "ESCAPE expression must be a single character".to_string(), - )) - } + _ => Result::Err(LimboError::Constraint( + "ESCAPE expression must be a single character".to_string(), + )), } } _ => { @@ -63,6 +63,124 @@ fn construct_like_regex_with_escape(pattern: &str, escape: char) -> Regex { .unwrap() } +// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided +pub fn exec_glob( + regex_cache: Option<&mut HashMap>, + pattern: &str, + text: &str, +) -> bool { + if let Some(cache) = regex_cache { + match cache.get(pattern) { + Some(re) => re.is_match(text), + None => match construct_glob_regex(pattern) { + Ok(re) => { + let res = re.is_match(text); + cache.insert(pattern.to_string(), re); + res + } + Err(_) => false, + }, + } + } else { + construct_glob_regex(pattern) + .map(|re| re.is_match(text)) + .unwrap_or(false) + } +} + +fn push_char_to_regex_pattern(c: char, regex_pattern: &mut String) { + if regex_syntax::is_meta_character(c) { + regex_pattern.push('\\'); + } + regex_pattern.push(c); +} + +fn construct_glob_regex(pattern: &str) -> Result { + let mut regex_pattern = String::with_capacity(pattern.len() * 2); + + regex_pattern.push('^'); + + let mut chars = pattern.chars(); + let mut bracket_closed = true; + + while let Some(ch) = chars.next() { + match ch { + '[' => { + bracket_closed = false; + regex_pattern.push('['); + if let Some(next_ch) = chars.next() { + match next_ch { + ']' => { + // The string enclosed by the brackets cannot be empty; + // therefore ']' can be allowed between the brackets, + // provided that it is the first character. + // so this means + // - `[]]` will be translated to `[\]]` + // - `[[]` will be translated to `[\[]` + regex_pattern.push_str("\\]"); + } + '^' => { + // For the most cases we can pass `^` directly to regex + // but in certain cases like [^][a] , `[^]` will make regex crate + // throw unenclosed character class. So this means + // - `[^][a]` will be translated to `[^\]a]` + regex_pattern.push('^'); + if let Some(next_ch_2) = chars.next() { + match next_ch_2 { + ']' => { + regex_pattern.push('\\'); + regex_pattern.push(']'); + } + c => { + push_char_to_regex_pattern(c, &mut regex_pattern); + } + } + } + } + c => { + push_char_to_regex_pattern(c, &mut regex_pattern); + } + } + }; + + while let Some(next_ch) = chars.next() { + match next_ch { + ']' => { + bracket_closed = true; + regex_pattern.push(']'); + break; + } + '-' => { + regex_pattern.push('-'); + } + c => { + push_char_to_regex_pattern(c, &mut regex_pattern); + } + } + } + } + '?' => { + regex_pattern.push('.'); + } + '*' => { + regex_pattern.push_str(".*"); + } + c => { + push_char_to_regex_pattern(c, &mut regex_pattern); + } + } + } + regex_pattern.push('$'); + + if bracket_closed { + Ok(Regex::new(®ex_pattern).unwrap()) + } else { + Result::Err(LimboError::Constraint( + "blob pattern is not closed".to_string(), + )) + } +} + #[cfg(test)] mod test { use super::*; @@ -84,4 +202,16 @@ mod test { assert!(!exec_like_with_escape("abcXX", "abc", 'X')); assert!(!exec_like_with_escape("abcXX", "abcXX", 'X')); } + + #[test] + fn test_glob_no_cache() { + assert!(exec_glob(None, r#"?*/abc/?*"#, r#"x//a/ab/abc/y"#)); + assert!(exec_glob(None, r#"a[1^]"#, r#"a1"#)); + assert!(exec_glob(None, r#"a[1^]*"#, r#"a^"#)); + assert!(!exec_glob(None, r#"a[a*"#, r#"a["#)); + assert!(!exec_glob(None, r#"a[a"#, r#"a[a"#)); + assert!(exec_glob(None, r#"a[[]"#, r#"a["#)); + assert!(exec_glob(None, r#"abc[^][*?]efg"#, r#"abcdefg"#)); + assert!(!exec_glob(None, r#"abc[^][*?]efg"#, r#"abc]efg"#)); + } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 9589b3faa..1e5b30790 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -39,12 +39,13 @@ use crate::types::{ use crate::util::parse_schema_rows; use crate::vdbe::insn::Insn; #[cfg(feature = "json")] -use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; +use crate::{ + function::JsonFunc, json::get_json, json::json_array, json::json_array_length, + json::json_extract, +}; use crate::{Connection, Result, Rows, TransactionState, DATABASE_VERSION}; use datetime::{exec_date, exec_time, exec_unixepoch}; -use likeop::{construct_like_escape_arg, exec_like_with_escape}; - -use crate::json::json_extract; +use likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; use regex::{Regex, RegexBuilder}; @@ -2701,7 +2702,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { let word: String = s .value .chars() - .filter(|c| !c.is_digit(10)) + .filter(|c| !c.is_ascii_digit()) .collect::() .replace(" ", ""); if word.is_empty() { @@ -2743,7 +2744,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { // Remove adjacent same digits let tmp = tmp.chars().fold(String::new(), |mut acc, ch| { - if acc.chars().last() != Some(ch) { + if !acc.ends_with(ch) { acc.push(ch); } acc @@ -2759,7 +2760,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { // If the first symbol is a digit, replace it with the saved first letter if let Some(first_digit) = result.chars().next() { - if first_digit.is_digit(10) { + if first_digit.is_ascii_digit() { result.replace_range(0..1, &first_letter.to_string()); } } @@ -2898,31 +2899,6 @@ fn exec_like(regex_cache: Option<&mut HashMap>, pattern: &str, te } } -fn construct_glob_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::from("^"); - regex_pattern.push_str(&pattern.replace('*', ".*").replace("?", ".")); - regex_pattern.push('$'); - Regex::new(®ex_pattern).unwrap() -} - -// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided -fn exec_glob(regex_cache: Option<&mut HashMap>, pattern: &str, text: &str) -> bool { - if let Some(cache) = regex_cache { - match cache.get(pattern) { - Some(re) => re.is_match(text), - None => { - let re = construct_glob_regex(pattern); - let res = re.is_match(text); - cache.insert(pattern.to_string(), re); - res - } - } - } else { - let re = construct_glob_regex(pattern); - re.is_match(text) - } -} - fn exec_min(regs: Vec<&OwnedValue>) -> OwnedValue { regs.iter() .min() @@ -4114,7 +4090,7 @@ mod tests { expected_len: 2, }, TestCase { - input: OwnedValue::Float(-3.14), + input: OwnedValue::Float(-3.15), expected_len: 1, }, TestCase { diff --git a/docs/internals/functions.md b/docs/internals/functions.md new file mode 100644 index 000000000..ff71b6864 --- /dev/null +++ b/docs/internals/functions.md @@ -0,0 +1,225 @@ +# How to contribute a SQL function implementation? + +Steps +1. Pick a `SQL functions` in [COMPAT.md](../../COMPAT.md) file with a No (not implemented yet) status. +2. Create an issue for that function. +3. Implement the function in a feature branch. +4. Push it as a Merge Request, get it review. + +## An example with function `date(..)` + +> Note that the files, code location, steps might be not exactly the same because of refactor but the idea of the changes needed in each layer stays. + +[Issue #158](https://github.com/tursodatabase/limbo/issues/158) was created for it. +Refer to commit [4ff7058](https://github.com/tursodatabase/limbo/commit/4ff705868a054643f6113cbe009655c32bc5f235). + +``` +sql function: string +--Parser--> +enum Func +--translate--> +Instruction +--VDBE--> +Result +``` + +TODO for implementing the function: +- analysis + - read and try out how the function works in SQLite. + - compare `explain` output of SQLite and Limbo. +- add/ update the function definition in `functions.rs`. +- add/ update how to function is translated from `definition` to `instruction` in virtual machine layer VDBE. +- add/ update the function Rust execution code and tests in vdbe layer. +- add/ update how the bytecode `Program` executes when steps into the function. +- add/ update TCL tests for this function in limbo/testing. +- update doc for function compatibility. + +### Analysis + +How `date` works in SQLite? +```bash +> sqlite3 + +sqlite> explain select date('now'); +addr opcode p1 p2 p3 p4 p5 comment +---- ------------- ---- ---- ---- ------------- -- ------------- +0 Init 0 6 0 0 Start at 6 +1 Once 0 3 0 0 +2 Function 0 0 2 date(-1) 0 r[2]=func() +3 Copy 2 1 0 0 r[1]=r[2] +4 ResultRow 1 1 0 0 output=r[1] +5 Halt 0 0 0 0 +6 Goto 0 1 0 0 +``` + +Comparing that with `Limbo`: +```bash +# created a sqlite database file database.db +# or cargo run to use the memory mode if it is already available. +> cargo run database.db + +Enter ".help" for usage hints. +limbo> explain select date('now'); +Parse error: unknown function date +``` + +We can see that the function is not implemented yet so the Parser did not understand it and throw an error `Parse error: unknown function date`. +- we only need to pay attention to opcode `Function` at addr 2. The rest is already set up in limbo. +- we have up to 5 registers p1 to p5 for each opcode. + +### Function definition + +For limbo to understand the meaning of `date`, we need to define it as a Function somewhere. +That place can be found currently in `core/functions.rs`. We need to edit 3 places +1. add to ScalarFunc as `date` is a scalar function. +```diff +// file core/functions.rs +pub enum ScalarFunc { + // other funcs... + Soundex, ++ Date, + Time, + // other funcs... +} +``` +2. add to Display to show the function as string in our program. +```diff +// file core/functions.rs +impl Display for ScalarFunc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let str = match self { + // ... + ScalarFunc::Soundex => "soundex".to_string(), ++ ScalarFunc::Date => "date".to_string(), + ScalarFunc::Time => "time".to_string(), + // ... +} +``` +3. add to `fn resolve_function(..)` of `impl Func` to enable parsing from str to this function. +```diff +// file core/functions.rs +impl Func { + pub fn resolve_function(name: &str, arg_count: usize) -> Result { + match name { + // ... ++ "date" => Ok(Func::Scalar(ScalarFunc::Date)), + // ... +} +``` + +### Function translation + +How to translate the function into bytecode `Instruction`? +- `date` function can have zero to many arguments. +- in case there are arguments, we loop through the args and allocate a register `let target_reg = program.alloc_register();` +for each argument expression. +- then we emit the bytecode instruction for Function `program.emit_insn(Insn::Function {...})` + +https://github.com/tursodatabase/limbo/blob/69e3dd28f77e59927da4313e517b2b428ede480d/core/translate/expr.rs#L1235C1-L1256C26 + + +```diff +// file core/translate/expr.rs +pub fn translate_expr(...) -> Result { + // ... + match expr { + // .. + ast::Expr::FunctionCall { + // ... + match &func_ctx.func { + // ... + Func::Scalar(srf) => { + // ... ++ ScalarFunc::Date => { ++ if let Some(args) = args { ++ for arg in args.iter() { ++ // register containing result of each argument expression ++ let target_reg = program.alloc_register(); ++ _ = translate_expr( ++ program, ++ referenced_tables, ++ arg, ++ target_reg, ++ precomputed_exprs_to_registers, ++ )?; ++ } ++ } ++ program.emit_insn(Insn::Function { ++ constant_mask: 0, ++ start_reg: target_register + 1, ++ dest: target_register, ++ func: func_ctx, ++ }); ++ Ok(target_register) ++ } +// ... +``` + +### Function execution + +The function execution code is implemented in `vdbe/datetime.rs` file [here](https://github.com/tursodatabase/limbo/commit/9cc965186fecf4ba4dd81c783a841c71575123bf#diff-839435241d4ffb648ad2d162bc6ba6a94f052309865251dc2aff36eaa14fa3c5R11-R30) as we already implemented the datetime features in this file. +Note that for other functions it might be implemented in other location in vdbe module. + +```diff +// file vdbe/datetime.rs +// ... ++ pub fn exec_date(values: &[OwnedValue]) -> OwnedValue { ++ // ... implementation ++ } + +// ... +``` + +### Program bytecode execution + +Next step is to implement how the virtual machine (VDBE layer) executes the bytecode `Program` when the program step into the function instruction `Insn::Function` date `ScalarFunc::Date`. + +Per [SQLite spec](https://www.sqlite.org/lang_datefunc.html#time_values) if there is no `time value` (no start register) , we want to execute the function with default param `'now'`. +> In all functions other than timediff(), the time-value (and all modifiers) may be omitted, in which case a time value of 'now' is assumed. + +```diff +// file vdbe/mod.rs +impl Program { + pub fn step<'a>(...) { + loop { + // ... + match isin { + // ... + Insn::Function { + // ... ++ ScalarFunc::Date => { ++ let result = ++ exec_date(&state.registers[*start_reg..*start_reg + arg_count]); ++ state.registers[*dest] = result; ++ } + // ... +``` + +### Adding tests + +There are 2 kind of tests we need to add +1. tests for Rust code +2. TCL tests for executing the sql function + +One test for the Rust code is shown as example below +https://github.com/tursodatabase/limbo/blob/69e3dd28f77e59927da4313e517b2b428ede480d/core/vdbe/datetime.rs#L620C1-L661C1 + +TCL tests for `date` functions can be referenced from SQLite source code which is already very comprehensive. +- https://github.com/sqlite/sqlite/blob/f2b21a5f57e1a1db1a286c42af40563077635c3d/test/date3.test#L36 +- https://github.com/sqlite/sqlite/blob/f2b21a5f57e1a1db1a286c42af40563077635c3d/test/date.test#L611C1-L652C73 + +### Updating doc + +Update the [COMPAT.md](../../COMPAT.md) file to mark this function as implemented. Change Status to +- `Yes` if it is fully supported, +- `Partial` if supported but not fully yet compared to SQLite. + +An example: +```diff +// file COMPAT.md +| Function | Status | Comment | +|------------------------------|---------|------------------------------| +- | date() | No | | ++ | date() | Yes | partially supports modifiers | +... +``` \ No newline at end of file diff --git a/simulator/generation/mod.rs b/simulator/generation/mod.rs index 07a93492b..8158b2d17 100644 --- a/simulator/generation/mod.rs +++ b/simulator/generation/mod.rs @@ -1,5 +1,7 @@ +use std::{iter::Sum, ops::SubAssign}; + use anarchist_readable_name_generator_lib::readable_name_custom; -use rand::Rng; +use rand::{distributions::uniform::SampleUniform, Rng}; pub mod plan; pub mod query; @@ -13,12 +15,17 @@ pub trait ArbitraryFrom { fn arbitrary_from(rng: &mut R, t: &T) -> Self; } -pub(crate) fn frequency<'a, T, R: rand::Rng>( - choices: Vec<(usize, Box T + 'a>)>, +pub(crate) fn frequency< + 'a, + T, + R: rand::Rng, + N: Sum + PartialOrd + Copy + Default + SampleUniform + SubAssign, +>( + choices: Vec<(N, Box T + 'a>)>, rng: &mut R, ) -> T { - let total = choices.iter().map(|(weight, _)| weight).sum::(); - let mut choice = rng.gen_range(0..total); + let total = choices.iter().map(|(weight, _)| *weight).sum::(); + let mut choice = rng.gen_range(N::default()..total); for (weight, f) in choices { if choice < weight { @@ -38,7 +45,7 @@ pub(crate) fn one_of<'a, T, R: rand::Rng>( choices[index](rng) } -pub(crate) fn pick<'a, T, R: rand::Rng>(choices: &'a Vec, rng: &mut R) -> &'a T { +pub(crate) fn pick<'a, T, R: rand::Rng>(choices: &'a [T], rng: &mut R) -> &'a T { let index = rng.gen_range(0..choices.len()); &choices[index] } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 0ff16af52..dbf6dd7f6 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -16,7 +16,7 @@ use crate::generation::{frequency, Arbitrary, ArbitraryFrom}; use super::{pick, pick_index}; -pub(crate) type ResultSet = Vec>; +pub(crate) type ResultSet = Result>>; pub(crate) struct InteractionPlan { pub(crate) plan: Vec, @@ -45,14 +45,15 @@ pub(crate) struct InteractionStats { pub(crate) read_count: usize, pub(crate) write_count: usize, pub(crate) delete_count: usize, + pub(crate) create_count: usize, } impl Display for InteractionStats { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "Read: {}, Write: {}, Delete: {}", - self.read_count, self.write_count, self.delete_count + "Read: {}, Write: {}, Delete: {}, Create: {}", + self.read_count, self.write_count, self.delete_count, self.create_count ) } } @@ -100,7 +101,9 @@ impl Interactions { match interaction { Interaction::Query(query) => match query { Query::Create(create) => { - env.tables.push(create.table.clone()); + if !env.tables.iter().any(|t| t.name == create.table.name) { + env.tables.push(create.table.clone()); + } } Query::Insert(insert) => { let table = env @@ -137,6 +140,7 @@ impl InteractionPlan { let mut read = 0; let mut write = 0; let mut delete = 0; + let mut create = 0; for interaction in &self.plan { match interaction { @@ -144,7 +148,7 @@ impl InteractionPlan { Query::Select(_) => read += 1, Query::Insert(_) => write += 1, Query::Delete(_) => delete += 1, - Query::Create(_) => {} + Query::Create(_) => create += 1, }, Interaction::Assertion(_) => {} Interaction::Fault(_) => {} @@ -155,6 +159,7 @@ impl InteractionPlan { read_count: read, write_count: write, delete_count: delete, + create_count: create, } } } @@ -172,7 +177,7 @@ impl ArbitraryFrom for InteractionPlan { rng: ChaCha8Rng::seed_from_u64(rng.next_u64()), }; - let num_interactions = rng.gen_range(0..env.opts.max_interactions); + let num_interactions = env.opts.max_interactions; // First create at least one table let create_query = Create::arbitrary(rng); @@ -197,7 +202,7 @@ impl ArbitraryFrom for InteractionPlan { } impl Interaction { - pub(crate) fn execute_query(&self, conn: &mut Rc) -> Result { + pub(crate) fn execute_query(&self, conn: &mut Rc) -> ResultSet { match self { Self::Query(query) => { let query_str = query.to_string(); @@ -342,13 +347,40 @@ fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Inte ), func: Box::new(move |stack: &Vec| { let rows = stack.last().unwrap(); - rows.iter().any(|r| r == &row) + match rows { + Ok(rows) => rows.iter().any(|r| r == &row), + Err(_) => false, + } }), }); Interactions(vec![insert_query, select_query, assertion]) } +fn property_double_create_failure(rng: &mut R, _env: &SimulatorEnv) -> Interactions { + let create_query = Create::arbitrary(rng); + let table_name = create_query.table.name.clone(); + let cq1 = Interaction::Query(Query::Create(create_query.clone())); + let cq2 = Interaction::Query(Query::Create(create_query.clone())); + + let assertion = Interaction::Assertion(Assertion { + message: + "creating two tables with the name should result in a failure for the second query" + .to_string(), + func: Box::new(move |stack: &Vec| { + let last = stack.last().unwrap(); + match last { + Ok(_) => false, + Err(e) => e + .to_string() + .contains(&format!("Table {table_name} already exists")), + } + }), + }); + + Interactions(vec![cq1, cq2, assertion]) +} + fn create_table(rng: &mut R, _env: &SimulatorEnv) -> Interactions { let create_query = Interaction::Query(Query::Create(Create::arbitrary(rng))); Interactions(vec![create_query]) @@ -375,17 +407,21 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { rng: &mut R, (env, stats): &(&SimulatorEnv, InteractionStats), ) -> Self { - let remaining_read = - ((((env.opts.max_interactions * env.opts.read_percent) as f64) / 100.0) as usize) - .saturating_sub(stats.read_count); - let remaining_write = ((((env.opts.max_interactions * env.opts.write_percent) as f64) - / 100.0) as usize) - .saturating_sub(stats.write_count); + let remaining_read = ((env.opts.max_interactions as f64 * env.opts.read_percent / 100.0) + - (stats.read_count as f64)) + .max(0.0); + let remaining_write = ((env.opts.max_interactions as f64 * env.opts.write_percent / 100.0) + - (stats.write_count as f64)) + .max(0.0); + let remaining_create = ((env.opts.max_interactions as f64 * env.opts.create_percent + / 100.0) + - (stats.create_count as f64)) + .max(0.0); frequency( vec![ ( - usize::min(remaining_read, remaining_write), + f64::min(remaining_read, remaining_write), Box::new(|rng: &mut R| property_insert_select(rng, env)), ), ( @@ -397,10 +433,14 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions { Box::new(|rng: &mut R| random_write(rng, env)), ), ( - remaining_write / 10, + remaining_create, Box::new(|rng: &mut R| create_table(rng, env)), ), - (1, Box::new(|rng: &mut R| random_fault(rng, env))), + (1.0, Box::new(|rng: &mut R| random_fault(rng, env))), + ( + remaining_create / 2.0, + Box::new(|rng: &mut R| property_double_create_failure(rng, env)), + ), ], rng, ) diff --git a/simulator/generation/table.rs b/simulator/generation/table.rs index 332aeb1f3..179c53436 100644 --- a/simulator/generation/table.rs +++ b/simulator/generation/table.rs @@ -41,11 +41,7 @@ impl Arbitrary for Column { impl Arbitrary for ColumnType { fn arbitrary(rng: &mut R) -> Self { - pick( - &vec![Self::Integer, Self::Float, Self::Text, Self::Blob], - rng, - ) - .to_owned() + pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned() } } diff --git a/simulator/main.rs b/simulator/main.rs index ce4fe64c8..52c33d5ec 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -121,6 +121,15 @@ fn main() { // Move the old database and plan file back std::fs::rename(&old_db_path, &db_path).unwrap(); std::fs::rename(&old_plan_path, &plan_path).unwrap(); + } else if let Ok(result) = result { + match result { + Ok(_) => { + log::info!("simulation completed successfully"); + } + Err(e) => { + log::error!("simulation failed: {:?}", e); + } + } } // Print the seed, the locations of the database and the plan file at the end again for easily accessing them. println!("database path: {:?}", db_path); @@ -136,30 +145,50 @@ fn run_simulation( ) -> Result<()> { let mut rng = ChaCha8Rng::seed_from_u64(seed); - let (read_percent, write_percent, delete_percent) = { - let mut remaining = 100; - let read_percent = rng.gen_range(0..=remaining); + let (create_percent, read_percent, write_percent, delete_percent) = { + let mut remaining = 100.0; + let read_percent = rng.gen_range(0.0..=remaining); remaining -= read_percent; - let write_percent = rng.gen_range(0..=remaining); + let write_percent = rng.gen_range(0.0..=remaining); remaining -= write_percent; let delete_percent = remaining; - (read_percent, write_percent, delete_percent) + + let create_percent = write_percent / 10.0; + let write_percent = write_percent - create_percent; + + (create_percent, read_percent, write_percent, delete_percent) }; + if cli_opts.minimum_size < 1 { + return Err(limbo_core::LimboError::InternalError( + "minimum size must be at least 1".to_string(), + )); + } + if cli_opts.maximum_size < 1 { - panic!("maximum size must be at least 1"); + return Err(limbo_core::LimboError::InternalError( + "maximum size must be at least 1".to_string(), + )); + } + + if cli_opts.maximum_size < cli_opts.minimum_size { + return Err(limbo_core::LimboError::InternalError( + "maximum size must be greater than or equal to minimum size".to_string(), + )); } let opts = SimulatorOpts { - ticks: rng.gen_range(1..=cli_opts.maximum_size), + ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), max_connections: 1, // TODO: for now let's use one connection as we didn't implement // correct transactions procesing max_tables: rng.gen_range(0..128), + create_percent, read_percent, write_percent, delete_percent, page_size: 4096, // TODO: randomize this too - max_interactions: rng.gen_range(1..=cli_opts.maximum_size), + max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size), + max_time_simulation: cli_opts.maximum_time, }; let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap()); @@ -207,12 +236,19 @@ fn run_simulation( } fn execute_plans(env: &mut SimulatorEnv, plans: &mut [InteractionPlan]) -> Result<()> { + let now = std::time::Instant::now(); // todo: add history here by recording which interaction was executed at which tick for _tick in 0..env.opts.ticks { // Pick the connection to interact with let connection_index = pick_index(env.connections.len(), &mut env.rng); // Execute the interaction for the selected connection execute_plan(env, connection_index, plans)?; + // Check if the maximum time for the simulation has been reached + if now.elapsed().as_secs() >= env.opts.max_time_simulation as u64 { + return Err(limbo_core::LimboError::InternalError( + "maximum time for simulation reached".into(), + )); + } } Ok(()) @@ -266,7 +302,7 @@ fn execute_interaction( }; log::debug!("{}", interaction); - let results = interaction.execute_query(conn)?; + let results = interaction.execute_query(conn); log::debug!("{:?}", results); stack.push(results); } diff --git a/simulator/model/query.rs b/simulator/model/query.rs index 080d7c577..66297b2ad 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -61,7 +61,7 @@ pub(crate) enum Query { Delete(Delete), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct Create { pub(crate) table: Table, } diff --git a/simulator/runner/cli.rs b/simulator/runner/cli.rs index f977937bb..8ad42c8b3 100644 --- a/simulator/runner/cli.rs +++ b/simulator/runner/cli.rs @@ -15,10 +15,24 @@ pub struct SimulatorCLI { )] pub doublecheck: bool, #[clap( - short, + short = 'n', long, help = "change the maximum size of the randomly generated sequence of interactions", default_value_t = 1024 )] pub maximum_size: usize, + #[clap( + short = 'k', + long, + help = "change the minimum size of the randomly generated sequence of interactions", + default_value_t = 1 + )] + pub minimum_size: usize, + #[clap( + short = 't', + long, + help = "change the maximum time of the simulation(in seconds)", + default_value_t = 60 * 60 // default to 1 hour + )] + pub maximum_time: usize, } diff --git a/simulator/runner/env.rs b/simulator/runner/env.rs index 0624b94b4..7edad025f 100644 --- a/simulator/runner/env.rs +++ b/simulator/runner/env.rs @@ -30,9 +30,11 @@ pub(crate) struct SimulatorOpts { pub(crate) max_tables: usize, // this next options are the distribution of workload where read_percent + write_percent + // delete_percent == 100% - pub(crate) read_percent: usize, - pub(crate) write_percent: usize, - pub(crate) delete_percent: usize, + pub(crate) create_percent: f64, + pub(crate) read_percent: f64, + pub(crate) write_percent: f64, + pub(crate) delete_percent: f64, pub(crate) max_interactions: usize, pub(crate) page_size: usize, + pub(crate) max_time_simulation: usize, } diff --git a/testing/glob.test b/testing/glob.test index 249ea8151..730fd20d6 100644 --- a/testing/glob.test +++ b/testing/glob.test @@ -68,3 +68,75 @@ Robert|Roberts} do_execsql_test where-glob-impossible { select * from products where 'foobar' glob 'fooba'; } {} + +foreach {testnum pattern text ans} { + 1 abcdefg abcdefg 1 + 2 abcdefG abcdefg 0 + 3 abcdef abcdefg 0 + 4 abcdefgh abcdefg 0 + 5 abcdef? abcdefg 1 + 6 abcdef? abcdef 0 + 7 abcdef? abcdefgh 0 + 8 abcdefg abcdef? 0 + 9 abcdef? abcdef? 1 + 10 abc/def abc/def 1 + 11 abc//def abc/def 0 + 12 */abc/* x/abc/y 1 + 13 */abc/* /abc/ 1 + 16 */abc/* x///a/ab/abc 0 + 17 */abc/* x//a/ab/abc/ 1 + 16 */abc/* x///a/ab/abc 0 + 17 */abc/* x//a/ab/abc/ 1 + 18 **/abc/** x//a/ab/abc/ 1 + 19 *?/abc/*? x//a/ab/abc/y 1 + 20 ?*/abc/?* x//a/ab/abc/y 1 + 21 {abc[cde]efg} abcbefg 0 + 22 {abc[cde]efg} abccefg 1 + 23 {abc[cde]efg} abcdefg 1 + 24 {abc[cde]efg} abceefg 1 + 25 {abc[cde]efg} abcfefg 0 + 26 {abc[^cde]efg} abcbefg 1 + 27 {abc[^cde]efg} abccefg 0 + 28 {abc[^cde]efg} abcdefg 0 + 29 {abc[^cde]efg} abceefg 0 + 30 {abc[^cde]efg} abcfefg 1 + 31 {abc[c-e]efg} abcbefg 0 + 32 {abc[c-e]efg} abccefg 1 + 33 {abc[c-e]efg} abcdefg 1 + 34 {abc[c-e]efg} abceefg 1 + 35 {abc[c-e]efg} abcfefg 0 + 36 {abc[^c-e]efg} abcbefg 1 + 37 {abc[^c-e]efg} abccefg 0 + 38 {abc[^c-e]efg} abcdefg 0 + 39 {abc[^c-e]efg} abceefg 0 + 40 {abc[^c-e]efg} abcfefg 1 + 41 {abc[c-e]efg} abc-efg 0 + 42 {abc[-ce]efg} abc-efg 1 + 43 {abc[ce-]efg} abc-efg 1 + 44 {abc[][*?]efg} {abc]efg} 1 + 45 {abc[][*?]efg} {abc*efg} 1 + 46 {abc[][*?]efg} {abc?efg} 1 + 47 {abc[][*?]efg} {abc[efg} 1 + 48 {abc[^][*?]efg} {abc]efg} 0 + 49 {abc[^][*?]efg} {abc*efg} 0 + 50 {abc[^][*?]efg} {abc?efg} 0 + 51 {abc[^][*?]efg} {abc[efg} 0 + 52 {abc[^][*?]efg} {abcdefg} 1 + 53 {*[xyz]efg} {abcxefg} 1 + 54 {*[xyz]efg} {abcwefg} 0 + 55 {[-c]} {c} 1 + 56 {[-c]} {-} 1 + 57 {[-c]} {x} 0 +} { + do_execsql_test glob-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans +} + + +foreach {testnum pattern text ans} { + 1 {abc[} {abc[} 0 + 2 {abc[} {abc} 0 + 3 {a]b} {a]b} 1 + 4 {a]b} {a[b} 0 +} { + do_execsql_test glob-unenclosed-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans +} diff --git a/testing/json.test b/testing/json.test index 237d8f8bd..a0659b196 100755 --- a/testing/json.test +++ b/testing/json.test @@ -113,10 +113,10 @@ do_execsql_test json_extract_number { SELECT json_extract(1, '$') } {{1}} -# \x61 is the ASCII code for 'a', json_extract needs an exact match though +# \x61 is the ASCII code for 'a' do_execsql_test json_extract_with_escaping { SELECT json_extract('{"\x61": 1}', '$.a') -} {{}} +} {{1}} # TODO: fix me #do_execsql_test json_extract_with_escaping_2 {