diff --git a/COMPAT.md b/COMPAT.md index a7baaca83..71a00290b 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -234,8 +234,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | jsonb(json) | | | | json_array(value1,value2,...) | Yes | | | jsonb_array(value1,value2,...) | | | -| json_array_length(json) | | | -| json_array_length(json,path) | | | +| json_array_length(json) | Yes | | +| json_array_length(json,path) | Yes | | | json_error_position(json) | | | | json_extract(json,path,...) | | | | jsonb_extract(json,path,...) | | | diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index c31520a82..1b3514032 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -128,22 +128,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); return Ok(Some(py_row)); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(None); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(None); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); @@ -167,22 +167,22 @@ impl Cursor { match smt_lock.step().map_err(|e| { PyErr::new::(format!("Step error: {:?}", e)) })? { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { let py_row = row_to_py(py, &row); results.push(py_row); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { self.conn.io.run_once().map_err(|e| { PyErr::new::(format!("IO error: {:?}", e)) })?; } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { return Ok(results); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { return Ok(results); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { return Err( PyErr::new::("Busy error".to_string()).into() ); diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index a2ae5b266..a06321f16 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -75,7 +75,7 @@ impl Statement { pub fn get(&self) -> JsValue { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -83,10 +83,10 @@ impl Statement { } JsValue::from(row_array) } - Ok(limbo_core::RowResult::IO) - | Ok(limbo_core::RowResult::Done) - | Ok(limbo_core::RowResult::Interrupt) - | Ok(limbo_core::RowResult::Busy) => JsValue::UNDEFINED, + Ok(limbo_core::StepResult::IO) + | Ok(limbo_core::StepResult::Done) + | Ok(limbo_core::StepResult::Interrupt) + | Ok(limbo_core::StepResult::Busy) => JsValue::UNDEFINED, Err(e) => panic!("Error: {:?}", e), } } @@ -95,7 +95,7 @@ impl Statement { let array = js_sys::Array::new(); loop { match self.inner.borrow_mut().step() { - Ok(limbo_core::RowResult::Row(row)) => { + Ok(limbo_core::StepResult::Row(row)) => { let row_array = js_sys::Array::new(); for value in row.values { let value = to_js_value(value); @@ -103,10 +103,10 @@ impl Statement { } array.push(&row_array); } - Ok(limbo_core::RowResult::IO) => {} - Ok(limbo_core::RowResult::Interrupt) => break, - Ok(limbo_core::RowResult::Done) => break, - Ok(limbo_core::RowResult::Busy) => break, + Ok(limbo_core::StepResult::IO) => {} + Ok(limbo_core::StepResult::Interrupt) => break, + Ok(limbo_core::StepResult::Done) => break, + Ok(limbo_core::StepResult::Busy) => break, Err(e) => panic!("Error: {:?}", e), } } diff --git a/cli/app.rs b/cli/app.rs index cbce1ca5c..7e6155543 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -1,6 +1,6 @@ use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS; use cli_table::{Cell, Table}; -use limbo_core::{Database, LimboError, RowResult, Value}; +use limbo_core::{Database, LimboError, StepResult, Value}; use clap::{Parser, ValueEnum}; use std::{ @@ -498,7 +498,7 @@ impl Limbo { } match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { for (i, value) in row.values.iter().enumerate() { if i > 0 { let _ = self.writer.write(b"|"); @@ -518,15 +518,15 @@ impl Limbo { } let _ = self.writeln(""); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => { + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => { break; } - Ok(RowResult::Busy) => { - self.writeln("database is busy"); + Ok(StepResult::Busy) => { + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -543,7 +543,7 @@ impl Limbo { let mut table_rows: Vec> = vec![]; loop { match rows.next_row() { - Ok(RowResult::Row(row)) => { + Ok(StepResult::Row(row)) => { table_rows.push( row.values .iter() @@ -559,13 +559,13 @@ impl Limbo { .collect(), ); } - Ok(RowResult::IO) => { + Ok(StepResult::IO) => { self.io.run_once()?; } - Ok(RowResult::Interrupt) => break, - Ok(RowResult::Done) => break, - Ok(RowResult::Busy) => { - self.writeln("database is busy"); + Ok(StepResult::Interrupt) => break, + Ok(StepResult::Done) => break, + Ok(StepResult::Busy) => { + let _ = self.writeln("database is busy"); break; } Err(err) => { @@ -607,19 +607,19 @@ impl Limbo { let mut found = false; loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(schema)) = row.values.first() { let _ = self.write_fmt(format_args!("{};", schema)); found = true; } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { - self.writeln("database is busy"); + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + let _ = self.writeln("database is busy"); break; } } @@ -664,19 +664,19 @@ impl Limbo { let mut tables = String::new(); loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Some(Value::Text(table)) = row.values.first() { tables.push_str(table); tables.push(' '); } } - RowResult::IO => { + StepResult::IO => { self.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { - self.writeln("database is busy"); + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { + let _ = self.writeln("database is busy"); break; } } diff --git a/core/Cargo.toml b/core/Cargo.toml index 4d731eb2b..1f2285f5f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -44,7 +44,8 @@ sieve-cache = "0.1.4" sqlite3-parser = { path = "../vendored/sqlite3-parser" } thiserror = "1.0.61" getrandom = { version = "0.2.15", features = ["js"] } -regex = "1.10.5" +regex = "1.11.1" +regex-syntax = { version = "0.8.5", default-features = false, features = ["unicode"] } chrono = "0.4.38" julian_day_converter = "0.3.2" jsonb = { version = "0.4.4", optional = true } diff --git a/core/benches/benchmark.rs b/core/benches/benchmark.rs index 0fe17d991..0dff08b5b 100644 --- a/core/benches/benchmark.rs +++ b/core/benches/benchmark.rs @@ -40,19 +40,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!(); } } @@ -68,19 +68,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } @@ -97,19 +97,19 @@ fn limbo_bench(criterion: &mut Criterion) { b.iter(|| { let mut rows = stmt.query().unwrap(); match rows.next_row().unwrap() { - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::Row(row) => { assert_eq!(row.get::(0).unwrap(), 1); } - limbo_core::RowResult::IO => { + limbo_core::StepResult::IO => { io.run_once().unwrap(); } - limbo_core::RowResult::Interrupt => { + limbo_core::StepResult::Interrupt => { unreachable!(); } - limbo_core::RowResult::Done => { + limbo_core::StepResult::Done => { unreachable!(); } - limbo_core::RowResult::Busy => { + limbo_core::StepResult::Busy => { unreachable!() } } diff --git a/core/function.rs b/core/function.rs index 8681a4fdf..0b19a5474 100644 --- a/core/function.rs +++ b/core/function.rs @@ -6,6 +6,7 @@ use std::fmt::Display; pub enum JsonFunc { Json, JsonArray, + JsonArrayLength, } #[cfg(feature = "json")] @@ -17,6 +18,7 @@ impl Display for JsonFunc { match self { JsonFunc::Json => "json".to_string(), JsonFunc::JsonArray => "json_array".to_string(), + JsonFunc::JsonArrayLength => "json_array_length".to_string(), } ) } @@ -334,6 +336,8 @@ impl Func { "json" => Ok(Func::Json(JsonFunc::Json)), #[cfg(feature = "json")] "json_array" => Ok(Func::Json(JsonFunc::JsonArray)), + #[cfg(feature = "json")] + "json_array_length" => Ok(Func::Json(JsonFunc::JsonArrayLength)), "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), diff --git a/core/json/mod.rs b/core/json/mod.rs index b1394b2bd..046f66237 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -1,5 +1,6 @@ mod de; mod error; +mod path; mod ser; use std::rc::Rc; @@ -8,9 +9,10 @@ pub use crate::json::de::from_str; pub use crate::json::ser::to_string; use crate::types::{LimboText, OwnedValue, TextSubtype}; use indexmap::IndexMap; +use path::get_json_val_by_path; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Debug)] #[serde(untagged)] pub enum Val { Null, @@ -88,6 +90,49 @@ pub fn json_array(values: Vec<&OwnedValue>) -> crate::Result { Ok(OwnedValue::Text(LimboText::json(Rc::new(s)))) } +pub fn json_array_length( + json_value: &OwnedValue, + json_path: Option<&OwnedValue>, +) -> crate::Result { + let path = match json_path { + Some(OwnedValue::Text(t)) => Some(t.value.to_string()), + Some(OwnedValue::Integer(i)) => Some(i.to_string()), + Some(OwnedValue::Float(f)) => Some(f.to_string()), + _ => None::, + }; + + let top_val = match json_value { + OwnedValue::Text(ref t) => crate::json::from_str::(&t.value), + OwnedValue::Blob(b) => match jsonb::from_slice(b) { + Ok(j) => { + let json = j.to_string(); + crate::json::from_str(&json) + } + Err(_) => crate::bail_parse_error!("malformed JSON"), + }, + _ => return Ok(OwnedValue::Integer(0)), + }; + + let Ok(top_val) = top_val else { + crate::bail_parse_error!("malformed JSON") + }; + + let arr_val = if let Some(path) = path { + match get_json_val_by_path(&top_val, &path) { + Ok(Some(val)) => val, + Ok(None) => return Ok(OwnedValue::Null), + Err(e) => return Err(e), + } + } else { + &top_val + }; + + if let Val::Array(val) = &arr_val { + return Ok(OwnedValue::Integer(val.len() as i64)); + } + Ok(OwnedValue::Integer(0)) +} + #[cfg(test)] mod tests { use super::*; @@ -266,4 +311,121 @@ mod tests { Err(e) => assert!(e.to_string().contains("JSON cannot hold BLOB values")), } } + + #[test] + fn test_json_array_length() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_empty() { + let input = OwnedValue::build_text(Rc::new("[]".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_root() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_not_array() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length(&input, None).unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.one".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index() { + let input = OwnedValue::build_text(Rc::new("[[1,2,3,4]]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[0]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 4); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_not_array() { + let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$[2]".to_string()))), + ) + .unwrap(); + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 0); + } else { + panic!("Expected OwnedValue::Integer"); + } + } + + #[test] + fn test_json_array_length_via_index_bad_prop() { + let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string())); + let result = json_array_length( + &input, + Some(&OwnedValue::build_text(Rc::new("$.two".to_string()))), + ) + .unwrap(); + assert_eq!(OwnedValue::Null, result); + } + + #[test] + fn test_json_array_length_simple_json_subtype() { + let input = OwnedValue::build_text(Rc::new("[1,2,3]".to_string())); + let wrapped = get_json(&input).unwrap(); + let result = json_array_length(&wrapped, None).unwrap(); + + if let OwnedValue::Integer(res) = result { + assert_eq!(res, 3); + } else { + panic!("Expected OwnedValue::Integer"); + } + } } diff --git a/core/json/path.rs b/core/json/path.rs new file mode 100644 index 000000000..e475f6647 --- /dev/null +++ b/core/json/path.rs @@ -0,0 +1,181 @@ +use super::Val; + +pub fn get_json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + match path.strip_prefix('$') { + Some(tail) => json_val_by_path(val, tail), + None => crate::bail_parse_error!("malformed path"), + } +} + +fn json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result> { + if path.is_empty() { + return Ok(Some(val)); + } + + match val { + Val::Array(inner) => { + if inner.is_empty() { + return Ok(None); + } + let Some(tail) = path.strip_prefix('[') else { + return Ok(None); + }; + let (from_end, tail) = if let Some(updated_tail) = tail.strip_prefix("#-") { + (true, updated_tail) + } else { + (false, tail) + }; + + let Some((idx_str, tail)) = tail.split_once("]") else { + crate::bail_parse_error!("malformed path"); + }; + + if idx_str.is_empty() { + return Ok(None); + } + let Ok(idx) = idx_str.parse::() else { + crate::bail_parse_error!("malformed path"); + }; + let result = if from_end { + inner.get(inner.len() - 1 - idx) + } else { + inner.get(idx) + }; + + if let Some(result) = result { + return json_val_by_path(result, tail); + } + Ok(None) + } + Val::Object(inner) => { + let Some(tail) = path.strip_prefix('.') else { + return Ok(None); + }; + + let (property, tail) = if let Some(tail) = tail.strip_prefix('"') { + if let Some((property, tail)) = tail.split_once('"') { + (property, tail) + } else { + crate::bail_parse_error!("malformed path"); + } + } else if let Some(idx) = tail.find('.') { + (&tail[..idx], &tail[idx..]) + } else { + (tail, "") + }; + + if let Some(result) = inner.get(property) { + return json_val_by_path(result, tail); + } + Ok(None) + } + _ => Ok(None), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_path_root() { + assert_eq!( + get_json_val_by_path(&Val::Bool(true), "$",).unwrap(), + Some(&Val::Bool(true)) + ); + } + + #[test] + fn test_path_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[2]", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_negative_index() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]), + "$[#-2]", + ) + .unwrap(), + Some(&Val::Integer(33)) + ); + } + + #[test] + fn test_path_index_deep() { + assert_eq!( + get_json_val_by_path( + &Val::Array(vec![Val::Array(vec![ + Val::Integer(33), + Val::Integer(55), + Val::Integer(66) + ])]), + "$[0][1]", + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } + + #[test] + fn test_path_prop_simple() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + "$.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_nested() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [( + "foo".into(), + Val::Object([("bar".into(), Val::Integer(66))].into()) + )] + .into() + ), + "$.foo.bar", + ) + .unwrap(), + Some(&Val::Integer(66)) + ); + } + + #[test] + fn test_path_prop_quoted() { + assert_eq!( + get_json_val_by_path( + &Val::Object( + [ + ("foo.baz".into(), Val::Integer(55)), + ("bar".into(), Val::Integer(66)) + ] + .into() + ), + r#"$."foo.baz""#, + ) + .unwrap(), + Some(&Val::Integer(55)) + ); + } +} diff --git a/core/lib.rs b/core/lib.rs index 255c47217..85fff4a59 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -36,12 +36,12 @@ pub use storage::wal::WalFile; pub use storage::wal::WalFileShared; use util::parse_schema_rows; -use translate::optimizer::optimize_plan; use translate::planner::prepare_select_plan; pub use error::LimboError; pub type Result = std::result::Result; +use crate::translate::optimizer::optimize_plan; pub use io::OpenFlags; #[cfg(feature = "fs")] pub use io::PlatformIO; @@ -374,14 +374,14 @@ impl Statement { self.state.interrupt(); } - pub fn step(&mut self) -> Result> { + pub fn step(&mut self) -> Result> { let result = self.program.step(&mut self.state, self.pager.clone())?; match result { - vdbe::StepResult::Row(row) => Ok(RowResult::Row(Row { values: row.values })), - vdbe::StepResult::IO => Ok(RowResult::IO), - vdbe::StepResult::Done => Ok(RowResult::Done), - vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt), - vdbe::StepResult::Busy => Ok(RowResult::Busy), + vdbe::StepResult::Row(row) => Ok(StepResult::Row(Row { values: row.values })), + vdbe::StepResult::IO => Ok(StepResult::IO), + vdbe::StepResult::Done => Ok(StepResult::Done), + vdbe::StepResult::Interrupt => Ok(StepResult::Interrupt), + vdbe::StepResult::Busy => Ok(StepResult::Busy), } } @@ -393,7 +393,7 @@ impl Statement { pub fn reset(&self) {} } -pub enum RowResult<'a> { +pub enum StepResult<'a> { Row(Row<'a>), IO, Done, @@ -421,7 +421,7 @@ impl Rows { Self { stmt } } - pub fn next_row(&mut self) -> Result> { + pub fn next_row(&mut self) -> Result> { self.stmt.step() } } diff --git a/core/pseudo.rs b/core/pseudo.rs index a87647d2b..45f47856e 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -79,6 +79,10 @@ impl Cursor for PseudoCursor { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn get_null_flag(&self) -> bool { false } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index bdd27932b..77f459968 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -742,7 +742,8 @@ impl BTreeCursor { /// i.e. whether we need to balance the btree after the insert. fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) { let free = self.compute_free_space(page, RefCell::borrow(&self.database_header)); - let enough_space = payload.len() + 2 <= free as usize; + const CELL_POINTER_SIZE_BYTES: usize = 2; + let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize; if !enough_space { // add to overflow cell page.overflow_cells.push(OverflowCell { @@ -753,27 +754,30 @@ impl BTreeCursor { } // TODO: insert into cell payload in internal page - let pc = self.allocate_cell_space(page, payload.len() as u16); + let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16); let buf = page.as_ptr(); // copy data - buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload); + buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()] + .copy_from_slice(payload); // memmove(pIns+2, pIns, 2*(pPage->nCell - i)); - let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region(); - let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx); + let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size(); + let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx); - // move previous pointers forward and insert new pointer there - let n_cells_forward = 2 * (page.cell_count() - cell_idx); - if n_cells_forward > 0 { + // move existing pointers forward by CELL_POINTER_SIZE_BYTES... + let n_cells_forward = page.cell_count() - cell_idx; + let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward; + if n_bytes_forward > 0 { buf.copy_within( - pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward, - pointer_area_pc_by_idx + 2, + cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward, + cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES, ); } - page.write_u16(pointer_area_pc_by_idx - page.offset, pc); + // ...and insert new cell pointer at the current index + page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer); - // update first byte of content area - page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, pc); + // update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data) + page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer); // update cell count let new_n_cells = (page.cell_count() + 1) as u16; @@ -1228,7 +1232,7 @@ impl BTreeCursor { if is_page_1 { // Remove header from child and set offset to 0 let contents = child.get().contents.as_mut().unwrap(); - let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region(); + let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size(); // change cell pointers for cell_idx in 0..contents.cell_count() { let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset; @@ -1284,7 +1288,7 @@ impl BTreeCursor { fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 { let amount = amount as usize; - let (cell_offset, _) = page_ref.cell_get_raw_pointer_region(); + let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size(); let gap = cell_offset + 2 * page_ref.cell_count(); let mut top = page_ref.cell_content_area() as usize; @@ -1326,10 +1330,7 @@ impl BTreeCursor { // TODO: implement fast algorithm let last_cell = usable_space - 4; - let first_cell = { - let (start, end) = cloned_page.cell_get_raw_pointer_region(); - start + end - }; + let first_cell = cloned_page.unallocated_region_start() as u64; if cloned_page.cell_count() > 0 { let page_type = page.page_type(); @@ -1411,10 +1412,12 @@ impl BTreeCursor { #[allow(unused_assignments)] fn compute_free_space(&self, page: &PageContent, db_header: Ref) -> u16 { // TODO(pere): maybe free space is not calculated correctly with offset - let buf = page.as_ptr(); + // Usable space, not the same as free space, simply means: + // space that is not reserved for extensions by sqlite. Usually reserved_space is 0. let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize; - let mut first_byte_in_cell_content = page.cell_content_area(); + + let mut cell_content_area_start = page.cell_content_area(); // A zero value for the cell content area pointer is interpreted as 65536. // See https://www.sqlite.org/fileformat.html // The max page size for a sqlite database is 64kiB i.e. 65536 bytes. @@ -1424,26 +1427,23 @@ impl BTreeCursor { // 1. the page size is 64kiB // 2. there are no cells on the page // 3. there is no reserved space at the end of the page - if first_byte_in_cell_content == 0 { - first_byte_in_cell_content = u16::MAX; + if cell_content_area_start == 0 { + cell_content_area_start = u16::MAX; } - let fragmented_free_bytes = page.num_frag_free_bytes(); - let free_block_pointer = page.first_freeblock(); - let ncell = page.cell_count(); - - // 8 + 4 == header end - let child_pointer_size = if page.is_leaf() { 0 } else { 4 }; - let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16; - // The amount of free space is the sum of: - // 1. 0..first_byte_in_cell_content (everything to the left of the cell content area pointer is unused free space) - // 2. fragmented_free_bytes. - let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize; + // #1. the size of the unallocated region + // #2. fragments (isolated 1-3 byte chunks of free space within the cell content area) + // #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions) - let mut pc = free_block_pointer as usize; - if pc > 0 { - if pc < first_byte_in_cell_content as usize { + let mut free_space_bytes = + page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize; + + // #3 is computed by iterating over the freeblocks linked list + let mut cur_freeblock_ptr = page.first_freeblock() as usize; + let page_buf = page.as_ptr(); + if cur_freeblock_ptr > 0 { + if cur_freeblock_ptr < cell_content_area_start as usize { // Freeblocks exist in the cell content area e.g. after deletions // They should never exist in the unused area of the page. todo!("corrupted page"); @@ -1453,30 +1453,47 @@ impl BTreeCursor { let mut size = 0; loop { // TODO: check corruption icellast - next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize; - size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize; - nfree += size; - if next <= pc + size + 3 { + next = u16::from_be_bytes( + page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2] + .try_into() + .unwrap(), + ) as usize; // first 2 bytes in freeblock = next freeblock pointer + size = u16::from_be_bytes( + page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4] + .try_into() + .unwrap(), + ) as usize; // next 2 bytes in freeblock = size of current freeblock + free_space_bytes += size; + // Freeblocks are in order from left to right on the page, + // so next pointer should > current pointer + its size, or 0 if no next block exists. + if next <= cur_freeblock_ptr + size + 3 { break; } - pc = next; + cur_freeblock_ptr = next; } - if next > 0 { - todo!("corrupted page ascending order"); - } + // Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list + assert!( + next == 0, + "corrupted page: freeblocks list not in ascending order" + ); - if pc + size > usable_space { - todo!("corrupted page last freeblock extends last page end"); - } + assert!( + cur_freeblock_ptr + size <= usable_space, + "corrupted page: last freeblock extends last page end" + ); } + assert!( + free_space_bytes <= usable_space, + "corrupted page: free space is greater than usable space" + ); + // if( nFree>usableSize || nFree Result> { + println!("rowid: {:?}", self.rowid.borrow()); + Ok(CursorResult::Ok(())) + } + fn set_null_flag(&mut self, flag: bool) { self.null_flag = flag; } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 15d5b2c6c..0403bee87 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -485,10 +485,29 @@ impl PageContent { self.read_u16(1) } + /// The number of cells on the page. pub fn cell_count(&self) -> usize { self.read_u16(3) as usize } + /// The size of the cell pointer array in bytes. + /// 2 bytes per cell pointer + pub fn cell_pointer_array_size(&self) -> usize { + const CELL_POINTER_SIZE_BYTES: usize = 2; + self.cell_count() * CELL_POINTER_SIZE_BYTES + } + + /// The start of the unallocated region. + /// Effectively: the offset after the page header + the cell pointer array. + pub fn unallocated_region_start(&self) -> usize { + let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size(); + cell_ptr_array_start + cell_ptr_array_size + } + + pub fn unallocated_region_size(&self) -> usize { + self.cell_content_area() as usize - self.unallocated_region_start() + } + /// The start of the cell content area. /// SQLite strives to place cells as far toward the end of the b-tree page as it can, /// in order to leave space for future growth of the cell pointer array. @@ -497,6 +516,17 @@ impl PageContent { self.read_u16(5) } + /// The size of the page header in bytes. + /// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer) + pub fn header_size(&self) -> usize { + match self.page_type() { + PageType::IndexInterior => 12, + PageType::TableInterior => 12, + PageType::IndexLeaf => 8, + PageType::TableLeaf => 8, + } + } + /// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header. /// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area. pub fn num_frag_free_bytes(&self) -> u8 { @@ -526,12 +556,7 @@ impl PageContent { let ncells = self.cell_count(); // the page header is 12 bytes for interior pages, 8 bytes for leaf pages // this is because the 4 last bytes in the interior page's header are used for the rightmost pointer. - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); let cell_pointer = self.read_u16(cell_pointer) as usize; @@ -552,14 +577,9 @@ impl PageContent { /// The cell pointers are arranged in key order with: /// - left-most cell (the cell with the smallest key) first and /// - the right-most cell (the cell with the largest key) last. - pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) { - let cell_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; - (self.offset + cell_start, self.cell_count() * 2) + pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) { + let header_size = self.header_size(); + (self.offset + header_size, self.cell_pointer_array_size()) } /* Get region of a cell's payload */ @@ -572,12 +592,7 @@ impl PageContent { ) -> (usize, usize) { let buf = self.as_ptr(); let ncells = self.cell_count(); - let cell_pointer_array_start = match self.page_type() { - PageType::IndexInterior => 12, - PageType::TableInterior => 12, - PageType::IndexLeaf => 8, - PageType::TableLeaf => 8, - }; + let cell_pointer_array_start = self.header_size(); assert!(idx < ncells, "cell_get: idx out of bounds"); let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each let cell_pointer = self.read_u16(cell_pointer) as usize; diff --git a/core/storage/wal.rs b/core/storage/wal.rs index 3cdad9263..8648efe1c 100644 --- a/core/storage/wal.rs +++ b/core/storage/wal.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::RwLock; use std::{cell::RefCell, rc::Rc, sync::Arc}; @@ -16,7 +16,6 @@ use crate::{Completion, Page}; use self::sqlite3_ondisk::{checksum_wal, PageContent, WAL_MAGIC_BE, WAL_MAGIC_LE}; use super::buffer_pool::BufferPool; -use super::page_cache::PageCacheKey; use super::pager::{PageRef, Pager}; use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader}; diff --git a/core/translate/delete.rs b/core/translate/delete.rs new file mode 100644 index 000000000..135c9d76d --- /dev/null +++ b/core/translate/delete.rs @@ -0,0 +1,21 @@ +use crate::translate::emitter::emit_program; +use crate::translate::optimizer::optimize_plan; +use crate::translate::planner::prepare_delete_plan; +use crate::{schema::Schema, storage::sqlite3_ondisk::DatabaseHeader, vdbe::Program}; +use crate::{Connection, Result}; +use sqlite3_parser::ast::{Expr, Limit, QualifiedName}; +use std::rc::Weak; +use std::{cell::RefCell, rc::Rc}; + +pub fn translate_delete( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, + limit: Option, + database_header: Rc>, + connection: Weak, +) -> Result { + let delete_plan = prepare_delete_plan(schema, tbl_name, where_clause, limit)?; + let optimized_plan = optimize_plan(delete_plan)?; + emit_program(database_header, optimized_plan, connection) +} diff --git a/core/translate/emitter.rs b/core/translate/emitter.rs index 38311b9d9..a032aa09e 100644 --- a/core/translate/emitter.rs +++ b/core/translate/emitter.rs @@ -9,7 +9,7 @@ use sqlite3_parser::ast::{self}; use crate::schema::{Column, PseudoTable, Table}; use crate::storage::sqlite3_ondisk::DatabaseHeader; -use crate::translate::plan::{IterationDirection, Search}; +use crate::translate::plan::{DeletePlan, IterationDirection, Plan, Search}; use crate::types::{OwnedRecord, OwnedValue}; use crate::util::exprs_are_equivalent; use crate::vdbe::builder::ProgramBuilder; @@ -20,7 +20,7 @@ use super::expr::{ translate_aggregation, translate_aggregation_groupby, translate_condition_expr, translate_expr, ConditionMetadata, }; -use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, Plan}; +use super::plan::{Aggregate, BTreeTableReference, Direction, GroupBy, SelectPlan}; use super::plan::{ResultSetColumn, SourceOperator}; // Metadata for handling LEFT JOIN operations @@ -101,6 +101,15 @@ pub struct Metadata { pub result_columns_to_skip_in_orderby_sorter: Option>, } +/// Used to distinguish database operations +#[derive(Debug, Clone)] +pub enum OperationMode { + SELECT, + INSERT, + UPDATE, + DELETE, +} + /// Initialize the program with basic setup and return initial metadata and labels fn prologue() -> Result<(ProgramBuilder, Metadata, BranchOffset, BranchOffset)> { let mut program = ProgramBuilder::new(); @@ -166,6 +175,17 @@ pub fn emit_program( database_header: Rc>, mut plan: Plan, connection: Weak, +) -> Result { + match plan { + Plan::Select(plan) => emit_program_for_select(database_header, plan, connection), + Plan::Delete(plan) => emit_program_for_delete(database_header, plan, connection), + } +} + +fn emit_program_for_select( + database_header: Rc>, + mut plan: SelectPlan, + connection: Weak, ) -> Result { let (mut program, mut metadata, init_label, start_offset) = prologue()?; @@ -201,7 +221,12 @@ pub fn emit_program( if let Some(ref mut group_by) = plan.group_by { init_group_by(&mut program, group_by, &plan.aggregates, &mut metadata)?; } - init_source(&mut program, &plan.source, &mut metadata)?; + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::SELECT, + )?; // Set up main query execution loop open_loop( @@ -272,6 +297,63 @@ pub fn emit_program( Ok(program.build(database_header, connection)) } +fn emit_program_for_delete( + database_header: Rc>, + mut plan: DeletePlan, + connection: Weak, +) -> Result { + let (mut program, mut metadata, init_label, start_offset) = prologue()?; + + // No rows will be read from source table loops if there is a constant false condition eg. WHERE 0 + let skip_loops_label = if plan.contains_constant_false_condition { + let skip_loops_label = program.allocate_label(); + program.emit_insn_with_label_dependency( + Insn::Goto { + target_pc: skip_loops_label, + }, + skip_loops_label, + ); + Some(skip_loops_label) + } else { + None + }; + + // Initialize cursors and other resources needed for query execution + init_source( + &mut program, + &plan.source, + &mut metadata, + &OperationMode::DELETE, + )?; + + // Set up main query execution loop + open_loop( + &mut program, + &mut plan.source, + &plan.referenced_tables, + &mut metadata, + )?; + + emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?; + + // Clean up and close the main execution loop + close_loop( + &mut program, + &plan.source, + &mut metadata, + &plan.referenced_tables, + )?; + + if let Some(skip_loops_label) = skip_loops_label { + program.resolve_label(skip_loops_label, program.offset()); + } + + // Finalize program + epilogue(&mut program, &mut metadata, init_label, start_offset)?; + + Ok(program.build(database_header, connection)) +} + /// Initialize resources needed for ORDER BY processing fn init_order_by( program: &mut ProgramBuilder, @@ -385,6 +467,7 @@ fn init_source( program: &mut ProgramBuilder, source: &SourceOperator, metadata: &mut Metadata, + mode: &OperationMode, ) -> Result<()> { match source { SourceOperator::Join { @@ -402,10 +485,10 @@ fn init_source( }; metadata.left_joins.insert(*id, lj_metadata); } - init_source(program, left, metadata)?; - init_source(program, right, metadata)?; + init_source(program, left, metadata, mode)?; + init_source(program, right, metadata, mode)?; - return Ok(()); + Ok(()) } SourceOperator::Scan { id, @@ -419,13 +502,28 @@ fn init_source( let root_page = table_reference.table.root_page; let next_row_label = program.allocate_label(); metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenReadAwait); - return Ok(()); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } + + Ok(()) } SourceOperator::Search { id, @@ -442,27 +540,54 @@ fn init_source( metadata.next_row_labels.insert(*id, next_row_label); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: table_cursor_id, - root_page: table_reference.table.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenReadAwait {}); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: table_cursor_id, + root_page: table_reference.table.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } if let Search::IndexSearch { index, .. } = search { let index_cursor_id = program .alloc_cursor_id(Some(index.name.clone()), Some(Table::Index(index.clone()))); - program.emit_insn(Insn::OpenReadAsync { - cursor_id: index_cursor_id, - root_page: index.root_page, - }); - program.emit_insn(Insn::OpenReadAwait); + + match mode { + OperationMode::SELECT => { + program.emit_insn(Insn::OpenReadAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenReadAwait); + } + OperationMode::DELETE => { + program.emit_insn(Insn::OpenWriteAsync { + cursor_id: index_cursor_id, + root_page: index.root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + } + _ => { + unimplemented!() + } + } } - return Ok(()); - } - SourceOperator::Nothing => { - return Ok(()); + Ok(()) } + SourceOperator::Nothing => Ok(()), } } @@ -811,7 +936,7 @@ pub enum InnerLoopEmitTarget<'a> { /// At this point the cursors for all tables have been opened and rewound. fn inner_loop_emit( program: &mut ProgramBuilder, - plan: &mut Plan, + plan: &mut SelectPlan, metadata: &mut Metadata, ) -> Result<()> { // if we have a group by, we emit a record into the group by sorter. @@ -1121,6 +1246,60 @@ fn close_loop( } } +fn emit_delete_insns( + program: &mut ProgramBuilder, + source: &SourceOperator, + limit: &Option, + metadata: &Metadata, +) -> Result<()> { + let cursor_id = match source { + SourceOperator::Scan { + table_reference, .. + } => program.resolve_cursor_id(&table_reference.table_identifier), + SourceOperator::Search { + table_reference, + search, + .. + } => match search { + Search::RowidEq { .. } | Search::RowidSearch { .. } => { + program.resolve_cursor_id(&table_reference.table_identifier) + } + Search::IndexSearch { index, .. } => program.resolve_cursor_id(&index.name), + }, + _ => return Ok(()), + }; + + // Emit the instructions to delete the row + let key_reg = program.alloc_register(); + program.emit_insn(Insn::RowId { + cursor_id, + dest: key_reg, + }); + program.emit_insn(Insn::DeleteAsync { cursor_id }); + program.emit_insn(Insn::DeleteAwait { cursor_id }); + if let Some(limit) = limit { + let limit_reg = program.alloc_register(); + program.emit_insn(Insn::Integer { + value: *limit as i64, + dest: limit_reg, + }); + program.mark_last_insn_constant(); + let jump_label_on_limit_reached = metadata + .termination_label_stack + .last() + .expect("termination_label_stack should not be empty."); + program.emit_insn_with_label_dependency( + Insn::DecrJumpZero { + reg: limit_reg, + target_pc: *jump_label_on_limit_reached, + }, + *jump_label_on_limit_reached, + ) + } + + Ok(()) +} + /// Emits the bytecode for processing a GROUP BY clause. /// This is called when the main query execution loop has finished processing, /// and we now have data in the GROUP BY sorter. diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 734dbb98e..275851201 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -913,6 +913,51 @@ pub fn translate_expr( }); Ok(target_register) } + JsonFunc::JsonArrayLength => { + let args = if let Some(args) = args { + if args.len() > 2 { + crate::bail_parse_error!( + "{} function with wrong number of arguments", + j.to_string() + ) + } + args + } else { + crate::bail_parse_error!( + "{} function with no arguments", + j.to_string() + ); + }; + + let json_reg = program.alloc_register(); + let path_reg = program.alloc_register(); + + translate_expr( + program, + referenced_tables, + &args[0], + json_reg, + precomputed_exprs_to_registers, + )?; + + if args.len() == 2 { + translate_expr( + program, + referenced_tables, + &args[1], + path_reg, + precomputed_exprs_to_registers, + )?; + } + + program.emit_insn(Insn::Function { + constant_mask: 0, + start_reg: json_reg, + dest: target_register, + func: func_ctx, + }); + Ok(target_register) + } }, Func::Scalar(srf) => { match srf { @@ -1555,7 +1600,7 @@ pub fn translate_expr( program.emit_insn(Insn::Copy { src_reg: output_register, dst_reg: target_register, - amount: 1, + amount: 0, }); Ok(target_register) } diff --git a/core/translate/insert.rs b/core/translate/insert.rs index 614cde8b2..12c9ed016 100644 --- a/core/translate/insert.rs +++ b/core/translate/insert.rs @@ -2,32 +2,187 @@ use std::rc::Weak; use std::{cell::RefCell, ops::Deref, rc::Rc}; use sqlite3_parser::ast::{ - DistinctNames, InsertBody, QualifiedName, ResolveType, ResultColumn, With, + DistinctNames, Expr, InsertBody, QualifiedName, ResolveType, ResultColumn, With, }; use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY; +use crate::util::normalize_ident; use crate::{ - schema::{Schema, Table}, + schema::{Column, Schema, Table}, storage::sqlite3_ondisk::DatabaseHeader, translate::expr::translate_expr, vdbe::{builder::ProgramBuilder, Insn, Program}, }; use crate::{Connection, Result}; +#[derive(Debug)] +/// Represents how a column should be populated during an INSERT. +/// Contains both the column definition and optionally the index into the VALUES tuple. +struct ColumnMapping<'a> { + /// Reference to the column definition from the table schema + column: &'a Column, + /// If Some(i), use the i-th value from the VALUES tuple + /// If None, use NULL (column was not specified in INSERT statement) + value_index: Option, +} + +/// Resolves how each column in a table should be populated during an INSERT. +/// Returns a Vec of ColumnMapping, one for each column in the table's schema. +/// +/// For each column, specifies: +/// 1. The column definition (type, constraints, etc) +/// 2. Where to get the value from: +/// - Some(i) -> use i-th value from the VALUES tuple +/// - None -> use NULL (column wasn't specified in INSERT) +/// +/// Two cases are handled: +/// 1. No column list specified (INSERT INTO t VALUES ...): +/// - Values are assigned to columns in table definition order +/// - If fewer values than columns, remaining columns map to None +/// 2. Column list specified (INSERT INTO t (col1, col3) VALUES ...): +/// - Named columns map to their corresponding value index +/// - Unspecified columns map to None +fn resolve_columns_for_insert<'a>( + table: &'a Table, + columns: &Option, + values: &[Vec], +) -> Result>> { + if values.is_empty() { + crate::bail_parse_error!("no values to insert"); + } + + let table_columns = table.columns(); + + // Case 1: No columns specified - map values to columns in order + if columns.is_none() { + let num_values = values[0].len(); + if num_values > table_columns.len() { + crate::bail_parse_error!( + "table {} has {} columns but {} values were supplied", + table.get_name(), + table_columns.len(), + num_values + ); + } + + // Verify all value tuples have same length + for value in values.iter().skip(1) { + if value.len() != num_values { + crate::bail_parse_error!("all VALUES must have the same number of terms"); + } + } + + // Map each column to either its corresponding value index or None + return Ok(table_columns + .iter() + .enumerate() + .map(|(i, col)| ColumnMapping { + column: col, + value_index: if i < num_values { Some(i) } else { None }, + }) + .collect()); + } + + // Case 2: Columns specified - map named columns to their values + let mut mappings: Vec<_> = table_columns + .iter() + .map(|col| ColumnMapping { + column: col, + value_index: None, + }) + .collect(); + + // Map each named column to its value index + for (value_index, column_name) in columns.as_ref().unwrap().iter().enumerate() { + let column_name = normalize_ident(column_name.0.as_str()); + let table_index = table_columns + .iter() + .position(|c| c.name.eq_ignore_ascii_case(&column_name)); + + if table_index.is_none() { + crate::bail_parse_error!( + "table {} has no column named {}", + table.get_name(), + column_name + ); + } + + mappings[table_index.unwrap()].value_index = Some(value_index); + } + + Ok(mappings) +} + +/// Populates the column registers with values for a single row +fn populate_column_registers( + program: &mut ProgramBuilder, + value: &[Expr], + column_mappings: &[ColumnMapping], + column_registers_start: usize, + inserting_multiple_rows: bool, + rowid_reg: usize, +) -> Result<()> { + for (i, mapping) in column_mappings.iter().enumerate() { + let target_reg = column_registers_start + i; + + // Column has a value in the VALUES tuple + if let Some(value_index) = mapping.value_index { + // When inserting a single row, SQLite writes the value provided for the rowid alias column (INTEGER PRIMARY KEY) + // directly into the rowid register and writes a NULL into the rowid alias column. Not sure why this only happens + // in the single row case, but let's copy it. + let write_directly_to_rowid_reg = + mapping.column.is_rowid_alias && !inserting_multiple_rows; + let reg = if write_directly_to_rowid_reg { + rowid_reg + } else { + target_reg + }; + translate_expr( + program, + None, + value.get(value_index).expect("value index out of bounds"), + reg, + None, + )?; + if write_directly_to_rowid_reg { + program.emit_insn(Insn::SoftNull { reg: target_reg }); + } + } else { + // Column was not specified - use NULL if it is nullable, otherwise error + // Rowid alias columns can be NULL because we will autogenerate a rowid in that case. + let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias; + if is_nullable { + program.emit_insn(Insn::Null { + dest: target_reg, + dest_end: None, + }); + program.mark_last_insn_constant(); + } else { + crate::bail_parse_error!("column {} is not nullable", mapping.column.name); + } + } + } + Ok(()) +} + #[allow(clippy::too_many_arguments)] pub fn translate_insert( schema: &Schema, with: &Option, - or_conflict: &Option, + on_conflict: &Option, tbl_name: &QualifiedName, - _columns: &Option, + columns: &Option, body: &InsertBody, _returning: &Option>, database_header: Rc>, connection: Weak, ) -> Result { - assert!(with.is_none()); - assert!(or_conflict.is_none()); + if with.is_some() { + crate::bail_parse_error!("WITH clause is not supported"); + } + if on_conflict.is_some() { + crate::bail_parse_error!("ON CONFLICT clause is not supported"); + } let mut program = ProgramBuilder::new(); let init_label = program.allocate_label(); program.emit_insn_with_label_dependency( @@ -46,6 +201,10 @@ pub fn translate_insert( None => crate::bail_corrupt_error!("Parse error: no such table: {}", table_name), }; let table = Rc::new(Table::BTree(table)); + if !table.has_rowid() { + crate::bail_parse_error!("INSERT into WITHOUT ROWID table is not supported"); + } + let cursor_id = program.alloc_cursor_id( Some(table_name.0.clone()), Some(table.clone().deref().clone()), @@ -55,18 +214,49 @@ pub fn translate_insert( Table::Index(index) => index.root_page, Table::Pseudo(_) => todo!(), }; + let values = match body { + InsertBody::Select(select, None) => match &select.body.select { + sqlite3_parser::ast::OneSelect::Values(values) => values, + _ => todo!(), + }, + _ => todo!(), + }; - let mut num_cols = table.columns().len(); - if table.has_rowid() { - num_cols += 1; - } - // column_registers_start[0] == rowid if has rowid - let column_registers_start = program.alloc_registers(num_cols); + let column_mappings = resolve_columns_for_insert(&table, columns, values)?; + // Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias) + let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias); + let has_user_provided_rowid = { + assert!(column_mappings.len() == table.columns().len()); + if let Some(index) = rowid_alias_index { + column_mappings[index].value_index.is_some() + } else { + false + } + }; - // Coroutine for values - let yield_reg = program.alloc_register(); - let jump_on_definition_label = program.allocate_label(); - { + // allocate a register for each column in the table. if not provided by user, they will simply be set as null. + // allocate an extra register for rowid regardless of whether user provided a rowid alias column. + let num_cols = table.columns().len(); + let rowid_reg = program.alloc_registers(num_cols + 1); + let column_registers_start = rowid_reg + 1; + let rowid_alias_reg = { + if has_user_provided_rowid { + Some(column_registers_start + rowid_alias_index.unwrap()) + } else { + None + } + }; + + let record_register = program.alloc_register(); + let halt_label = program.allocate_label(); + let mut loop_start_offset = 0; + + let inserting_multiple_rows = values.len() > 1; + + // Multiple rows - use coroutine for value population + if inserting_multiple_rows { + let yield_reg = program.alloc_register(); + let jump_on_definition_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::InitCoroutine { yield_reg, @@ -75,134 +265,154 @@ pub fn translate_insert( }, jump_on_definition_label, ); - match body { - InsertBody::Select(select, None) => match &select.body.select { - sqlite3_parser::ast::OneSelect::Select { - distinctness: _, - columns: _, - from: _, - where_clause: _, - group_by: _, - window_clause: _, - } => todo!(), - sqlite3_parser::ast::OneSelect::Values(values) => { - for value in values { - for (col, expr) in value.iter().enumerate() { - let mut col = col; - if table.has_rowid() { - col += 1; - } - translate_expr( - &mut program, - None, - expr, - column_registers_start + col, - None, - )?; - } - program.emit_insn(Insn::Yield { - yield_reg, - end_offset: 0, - }); - } - } - }, - InsertBody::DefaultValues => todo!("default values not yet supported"), - _ => todo!(), + + for value in values { + populate_column_registers( + &mut program, + value, + &column_mappings, + column_registers_start, + true, + rowid_reg, + )?; + program.emit_insn(Insn::Yield { + yield_reg, + end_offset: 0, + }); } program.emit_insn(Insn::EndCoroutine { yield_reg }); + program.resolve_label(jump_on_definition_label, program.offset()); + + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + // Main loop + // FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation, + // the other row will still be inserted. + loop_start_offset = program.offset(); + program.emit_insn_with_label_dependency( + Insn::Yield { + yield_reg, + end_offset: halt_label, + }, + halt_label, + ); + } else { + // Single row - populate registers directly + program.emit_insn(Insn::OpenWriteAsync { + cursor_id, + root_page, + }); + program.emit_insn(Insn::OpenWriteAwait {}); + + populate_column_registers( + &mut program, + &values[0], + &column_mappings, + column_registers_start, + false, + rowid_reg, + )?; } - program.resolve_label(jump_on_definition_label, program.offset()); - program.emit_insn(Insn::OpenWriteAsync { - cursor_id, - root_page, - }); - program.emit_insn(Insn::OpenWriteAwait {}); - - // Main loop - let record_register = program.alloc_register(); - let halt_label = program.allocate_label(); - let loop_start_offset = program.offset(); - program.emit_insn_with_label_dependency( - Insn::Yield { - yield_reg, - end_offset: halt_label, - }, - halt_label, - ); - - if table.has_rowid() { - let row_id_reg = column_registers_start; - if let Some(rowid_alias_column) = table.get_rowid_alias_column() { - let key_reg = column_registers_start + 1 + rowid_alias_column.0; - // copy key to rowid + // Common record insertion logic for both single and multiple rows + let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label())); + if let Some(reg) = rowid_alias_reg { + // for the row record, the rowid alias column (INTEGER PRIMARY KEY) is always set to NULL + // and its value is copied to the rowid register. in the case where a single row is inserted, + // the value is written directly to the rowid register (see populate_column_registers()). + // again, not sure why this only happens in the single row case, but let's mimic sqlite. + // in the single row case we save a Copy instruction, but in the multiple rows case we do + // it here in the loop. + if inserting_multiple_rows { program.emit_insn(Insn::Copy { - src_reg: key_reg, - dst_reg: row_id_reg, - amount: 0, + src_reg: reg, + dst_reg: rowid_reg, + amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1 }); - program.emit_insn(Insn::SoftNull { reg: key_reg }); + // for the row record, the rowid alias column is always set to NULL + program.emit_insn(Insn::SoftNull { reg }); } - - let notnull_label = program.allocate_label(); + // the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction. program.emit_insn_with_label_dependency( Insn::NotNull { - reg: row_id_reg, - target_pc: notnull_label, + reg: rowid_reg, + target_pc: check_rowid_is_integer_label.unwrap(), }, - notnull_label, + check_rowid_is_integer_label.unwrap(), ); - program.emit_insn(Insn::NewRowid { - cursor: cursor_id, - rowid_reg: row_id_reg, - prev_largest_reg: 0, - }); + } - program.resolve_label(notnull_label, program.offset()); - program.emit_insn(Insn::MustBeInt { reg: row_id_reg }); + // Create new rowid if a) not provided by user or b) provided by user but is NULL + program.emit_insn(Insn::NewRowid { + cursor: cursor_id, + rowid_reg: rowid_reg, + prev_largest_reg: 0, + }); + + if let Some(must_be_int_label) = check_rowid_is_integer_label { + program.resolve_label(must_be_int_label, program.offset()); + // If the user provided a rowid, it must be an integer. + program.emit_insn(Insn::MustBeInt { reg: rowid_reg }); + } + + // Check uniqueness constraint for rowid if it was provided by user. + // When the DB allocates it there are no need for separate uniqueness checks. + if has_user_provided_rowid { let make_record_label = program.allocate_label(); program.emit_insn_with_label_dependency( Insn::NotExists { cursor: cursor_id, - rowid_reg: row_id_reg, + rowid_reg: rowid_reg, target_pc: make_record_label, }, make_record_label, ); - // TODO: rollback + let rowid_column_name = if let Some(index) = rowid_alias_index { + table.column_index_to_name(index).unwrap() + } else { + "rowid" + }; + program.emit_insn(Insn::Halt { err_code: SQLITE_CONSTRAINT_PRIMARYKEY, - description: format!( - "{}.{}", - table.get_name(), - table.column_index_to_name(0).unwrap() - ), + description: format!("{}.{}", table.get_name(), rowid_column_name), }); + program.resolve_label(make_record_label, program.offset()); - program.emit_insn(Insn::MakeRecord { - start_reg: column_registers_start + 1, - count: num_cols - 1, - dest_reg: record_register, - }); - program.emit_insn(Insn::InsertAsync { - cursor: cursor_id, - key_reg: column_registers_start, - record_reg: record_register, - flag: 0, - }); - program.emit_insn(Insn::InsertAwait { cursor_id }); } - program.emit_insn(Insn::Goto { - target_pc: loop_start_offset, + // Create and insert the record + program.emit_insn(Insn::MakeRecord { + start_reg: column_registers_start, + count: num_cols, + dest_reg: record_register, }); + program.emit_insn(Insn::InsertAsync { + cursor: cursor_id, + key_reg: rowid_reg, + record_reg: record_register, + flag: 0, + }); + program.emit_insn(Insn::InsertAwait { cursor_id }); + + if inserting_multiple_rows { + // For multiple rows, loop back + program.emit_insn(Insn::Goto { + target_pc: loop_start_offset, + }); + } + program.resolve_label(halt_label, program.offset()); program.emit_insn(Insn::Halt { err_code: 0, description: String::new(), }); + program.resolve_label(init_label, program.offset()); program.emit_insn(Insn::Transaction { write: true }); program.emit_constant_insns(); diff --git a/core/translate/mod.rs b/core/translate/mod.rs index db69f1578..e2b10b0e5 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -7,6 +7,7 @@ //! a SELECT statement will be translated into a sequence of instructions that //! will read rows from the database and filter them according to a WHERE clause. +pub(crate) mod delete; pub(crate) mod emitter; pub(crate) mod expr; pub(crate) mod insert; @@ -23,6 +24,7 @@ use std::str::FromStr; use crate::schema::Schema; use crate::storage::pager::Pager; use crate::storage::sqlite3_ondisk::{DatabaseHeader, MIN_PAGE_CACHE_SIZE}; +use crate::translate::delete::translate_delete; use crate::vdbe::{builder::ProgramBuilder, Insn, Program}; use crate::{bail_parse_error, Connection, Result}; use insert::translate_insert; @@ -68,7 +70,22 @@ pub fn translate( ast::Stmt::CreateVirtualTable { .. } => { bail_parse_error!("CREATE VIRTUAL TABLE not supported yet") } - ast::Stmt::Delete { .. } => bail_parse_error!("DELETE not supported yet"), + ast::Stmt::Delete { + with, + tbl_name, + indexed, + where_clause, + returning, + order_by, + limit, + } => translate_delete( + schema, + &tbl_name, + where_clause, + limit, + database_header, + connection, + ), ast::Stmt::Detach(_) => bail_parse_error!("DETACH not supported yet"), ast::Stmt::DropIndex { .. } => bail_parse_error!("DROP INDEX not supported yet"), ast::Stmt::DropTable { .. } => bail_parse_error!("DROP TABLE not supported yet"), @@ -369,7 +386,6 @@ fn update_pragma( query_pragma("journal_mode", header, program)?; Ok(()) } - _ => todo!("pragma `{name}`"), } } @@ -396,9 +412,6 @@ fn query_pragma( dest: register, }); } - _ => { - todo!("pragma `{name}`"); - } } program.emit_insn(Insn::ResultRow { diff --git a/core/translate/optimizer.rs b/core/translate/optimizer.rs index 4763f8b1e..f86c20c26 100644 --- a/core/translate/optimizer.rs +++ b/core/translate/optimizer.rs @@ -6,39 +6,68 @@ use crate::{schema::Index, Result}; use super::plan::{ get_table_ref_bitmask_for_ast_expr, get_table_ref_bitmask_for_operator, BTreeTableReference, - Direction, IterationDirection, Plan, Search, SourceOperator, + DeletePlan, Direction, IterationDirection, Plan, Search, SelectPlan, SourceOperator, }; +pub fn optimize_plan(mut plan: Plan) -> Result { + match plan { + Plan::Select(plan) => optimize_select_plan(plan).map(Plan::Select), + Plan::Delete(plan) => optimize_delete_plan(plan).map(Plan::Delete), + } +} + /** * Make a few passes over the plan to optimize it. * TODO: these could probably be done in less passes, * but having them separate makes them easier to understand */ -pub fn optimize_plan(mut select_plan: Plan) -> Result { - eliminate_between(&mut select_plan.source, &mut select_plan.where_clause)?; +fn optimize_select_plan(mut plan: SelectPlan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; if let ConstantConditionEliminationResult::ImpossibleCondition = - eliminate_constants(&mut select_plan.source, &mut select_plan.where_clause)? + eliminate_constants(&mut plan.source, &mut plan.where_clause)? { - select_plan.contains_constant_false_condition = true; - return Ok(select_plan); + plan.contains_constant_false_condition = true; + return Ok(plan); } + push_predicates( - &mut select_plan.source, - &mut select_plan.where_clause, - &select_plan.referenced_tables, + &mut plan.source, + &mut plan.where_clause, + &plan.referenced_tables, )?; + use_indexes( - &mut select_plan.source, - &select_plan.referenced_tables, - &select_plan.available_indexes, + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, )?; + eliminate_unnecessary_orderby( - &mut select_plan.source, - &mut select_plan.order_by, - &select_plan.referenced_tables, - &select_plan.available_indexes, + &mut plan.source, + &mut plan.order_by, + &plan.referenced_tables, + &plan.available_indexes, )?; - Ok(select_plan) + + Ok(plan) +} + +fn optimize_delete_plan(mut plan: DeletePlan) -> Result { + eliminate_between(&mut plan.source, &mut plan.where_clause)?; + if let ConstantConditionEliminationResult::ImpossibleCondition = + eliminate_constants(&mut plan.source, &mut plan.where_clause)? + { + plan.contains_constant_false_condition = true; + return Ok(plan); + } + + use_indexes( + &mut plan.source, + &plan.referenced_tables, + &plan.available_indexes, + )?; + + Ok(plan) } fn _operator_is_already_ordered_by( diff --git a/core/translate/plan.rs b/core/translate/plan.rs index 8e0fa326e..abfab41fb 100644 --- a/core/translate/plan.rs +++ b/core/translate/plan.rs @@ -1,11 +1,11 @@ use core::fmt; +use sqlite3_parser::ast; use std::{ fmt::{Display, Formatter}, rc::Rc, }; -use sqlite3_parser::ast; - +use crate::translate::plan::Plan::{Delete, Select}; use crate::{ function::AggFunc, schema::{BTreeTable, Column, Index}, @@ -27,7 +27,13 @@ pub struct GroupBy { } #[derive(Debug)] -pub struct Plan { +pub enum Plan { + Select(SelectPlan), + Delete(DeletePlan), +} + +#[derive(Debug)] +pub struct SelectPlan { /// A tree of sources (tables). pub source: SourceOperator, /// the columns inside SELECT ... FROM @@ -50,9 +56,32 @@ pub struct Plan { pub contains_constant_false_condition: bool, } +#[derive(Debug)] +pub struct DeletePlan { + /// A tree of sources (tables). + pub source: SourceOperator, + /// the columns inside SELECT ... FROM + pub result_columns: Vec, + /// where clause split into a vec at 'AND' boundaries. + pub where_clause: Option>, + /// order by clause + pub order_by: Option>, + /// limit clause + pub limit: Option, + /// all the tables referenced in the query + pub referenced_tables: Vec, + /// all the indexes available + pub available_indexes: Vec>, + /// query contains a constant condition that is always false + pub contains_constant_false_condition: bool, +} + impl Display for Plan { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.source) + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Select(select_plan) => write!(f, "{}", select_plan.source), + Delete(delete_plan) => write!(f, "{}", delete_plan.source), + } } } diff --git a/core/translate/planner.rs b/core/translate/planner.rs index 0bdc447f3..154f7e1c0 100644 --- a/core/translate/planner.rs +++ b/core/translate/planner.rs @@ -1,8 +1,6 @@ -use super::{ - optimizer::Optimizable, - plan::{ - Aggregate, BTreeTableReference, Direction, GroupBy, Plan, ResultSetColumn, SourceOperator, - }, +use super::plan::{ + Aggregate, BTreeTableReference, DeletePlan, Direction, GroupBy, Plan, ResultSetColumn, + SelectPlan, SourceOperator, }; use crate::{ function::Func, @@ -10,7 +8,7 @@ use crate::{ util::{exprs_are_equivalent, normalize_ident}, Result, }; -use sqlite3_parser::ast::{self, FromClause, JoinType, ResultColumn}; +use sqlite3_parser::ast::{self, Expr, FromClause, JoinType, Limit, QualifiedName, ResultColumn}; pub struct OperatorIdCounter { id: usize, @@ -267,7 +265,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

{ let col_count = columns.len(); @@ -280,7 +278,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

(schema: &Schema, select: ast::Select) -> Result

{ - let l = n.parse()?; - Some(l) - } - _ => todo!(), - } - } + plan.limit = select.limit.and_then(|limit| parse_limit(limit)); // Return the unoptimized query plan - Ok(plan) + Ok(Plan::Select(plan)) } _ => todo!(), } } +pub fn prepare_delete_plan( + schema: &Schema, + tbl_name: &QualifiedName, + where_clause: Option, + limit: Option, +) -> Result { + let table = match schema.get_table(tbl_name.name.0.as_str()) { + Some(table) => table, + None => crate::bail_corrupt_error!("Parse error: no such table: {}", tbl_name), + }; + + let table_ref = BTreeTableReference { + table: table.clone(), + table_identifier: table.name.clone(), + table_index: 0, + }; + let referenced_tables = vec![table_ref.clone()]; + + // Parse the WHERE clause + let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?; + + // Parse the LIMIT clause + let resolved_limit = limit.and_then(|limit| parse_limit(limit)); + + let plan = DeletePlan { + source: SourceOperator::Scan { + id: 0, + table_reference: table_ref.clone(), + predicates: resolved_where_clauses.clone(), + iter_dir: None, + }, + result_columns: vec![], + where_clause: resolved_where_clauses, + order_by: None, + limit: resolved_limit, + referenced_tables, + available_indexes: vec![], + contains_constant_false_condition: false, + }; + + Ok(Plan::Delete(plan)) +} + #[allow(clippy::type_complexity)] fn parse_from( schema: &Schema, @@ -563,6 +589,22 @@ fn parse_from( Ok((operator, tables)) } +fn parse_where( + where_clause: Option, + referenced_tables: &[BTreeTableReference], +) -> Result>> { + if let Some(where_expr) = where_clause { + let mut predicates = vec![]; + break_predicate_at_and_boundaries(where_expr, &mut predicates); + for expr in predicates.iter_mut() { + bind_column_references(expr, referenced_tables)?; + } + Ok(Some(predicates)) + } else { + Ok(None) + } +} + fn parse_join( schema: &Schema, join: ast::JoinedSelectTable, @@ -746,6 +788,14 @@ fn parse_join( )) } +fn parse_limit(limit: Limit) -> Option { + if let Expr::Literal(ast::Literal::Numeric(n)) = limit.expr { + n.parse().ok() + } else { + None + } +} + fn break_predicate_at_and_boundaries(predicate: ast::Expr, out_predicates: &mut Vec) { match predicate { ast::Expr::Binary(left, ast::Operator::And, right) => { diff --git a/core/translate/select.rs b/core/translate/select.rs index 6d846ded8..b79560fda 100644 --- a/core/translate/select.rs +++ b/core/translate/select.rs @@ -1,15 +1,14 @@ use std::rc::Weak; use std::{cell::RefCell, rc::Rc}; +use super::emitter::emit_program; +use super::planner::prepare_select_plan; use crate::storage::sqlite3_ondisk::DatabaseHeader; +use crate::translate::optimizer::optimize_plan; use crate::Connection; use crate::{schema::Schema, vdbe::Program, Result}; use sqlite3_parser::ast; -use super::emitter::emit_program; -use super::optimizer::optimize_plan; -use super::planner::prepare_select_plan; - pub fn translate_select( schema: &Schema, select: ast::Select, diff --git a/core/types.rs b/core/types.rs index 5f1b55d7b..8f5951780 100644 --- a/core/types.rs +++ b/core/types.rs @@ -387,6 +387,75 @@ pub struct OwnedRecord { pub values: Vec, } +const I8_LOW: i64 = -128; +const I8_HIGH: i64 = 127; +const I16_LOW: i64 = -32768; +const I16_HIGH: i64 = 32767; +const I24_LOW: i64 = -8388608; +const I24_HIGH: i64 = 8388607; +const I32_LOW: i64 = -2147483648; +const I32_HIGH: i64 = 2147483647; +const I48_LOW: i64 = -140737488355328; +const I48_HIGH: i64 = 140737488355327; + +/// Sqlite Serial Types +/// https://www.sqlite.org/fileformat.html#record_format +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum SerialType { + Null, + I8, + I16, + I24, + I32, + I48, + I64, + F64, + Text { content_size: usize }, + Blob { content_size: usize }, +} + +impl From<&OwnedValue> for SerialType { + fn from(value: &OwnedValue) -> Self { + match value { + OwnedValue::Null => SerialType::Null, + OwnedValue::Integer(i) => match i { + i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8, + i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16, + i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24, + i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32, + i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48, + _ => SerialType::I64, + }, + OwnedValue::Float(_) => SerialType::F64, + OwnedValue::Text(t) => SerialType::Text { + content_size: t.value.len(), + }, + OwnedValue::Blob(b) => SerialType::Blob { + content_size: b.len(), + }, + OwnedValue::Agg(_) => unreachable!(), + OwnedValue::Record(_) => unreachable!(), + } + } +} + +impl From for u64 { + fn from(serial_type: SerialType) -> Self { + match serial_type { + SerialType::Null => 0, + SerialType::I8 => 1, + SerialType::I16 => 2, + SerialType::I24 => 3, + SerialType::I32 => 4, + SerialType::I48 => 5, + SerialType::I64 => 6, + SerialType::F64 => 7, + SerialType::Text { content_size } => (content_size * 2 + 13) as u64, + SerialType::Blob { content_size } => (content_size * 2 + 12) as u64, + } + } +} + impl OwnedRecord { pub fn new(values: Vec) -> Self { Self { values } @@ -395,31 +464,32 @@ impl OwnedRecord { pub fn serialize(&self, buf: &mut Vec) { let initial_i = buf.len(); + // write serial types for value in &self.values { - let serial_type = match value { - OwnedValue::Null => 0, - OwnedValue::Integer(_) => 6, // for now let's only do i64 - OwnedValue::Float(_) => 7, - OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64, - OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64, - // not serializable values - OwnedValue::Agg(_) => unreachable!(), - OwnedValue::Record(_) => unreachable!(), - }; - - buf.resize(buf.len() + 9, 0); // Ensure space for varint + let serial_type = SerialType::from(value); + buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length) let len = buf.len(); - let n = write_varint(&mut buf[len - 9..], serial_type); + let n = write_varint(&mut buf[len - 9..], serial_type.into()); buf.truncate(buf.len() - 9 + n); // Remove unused bytes } let mut header_size = buf.len() - initial_i; // write content for value in &self.values { - // TODO: make integers and floats with smaller serial types match value { OwnedValue::Null => {} - OwnedValue::Integer(i) => buf.extend_from_slice(&i.to_be_bytes()), + OwnedValue::Integer(i) => { + let serial_type = SerialType::from(value); + match serial_type { + SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + } + } OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()), OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()), OwnedValue::Blob(b) => buf.extend_from_slice(b), @@ -484,8 +554,212 @@ pub trait Cursor { record: &OwnedRecord, moved_before: bool, /* Tells inserter that it doesn't need to traverse in order to find leaf page */ ) -> Result>; // + fn delete(&mut self) -> Result>; fn exists(&mut self, key: &OwnedValue) -> Result>; fn set_null_flag(&mut self, flag: bool); fn get_null_flag(&self) -> bool; fn btree_create(&mut self, flags: usize) -> u32; } + +#[cfg(test)] +mod tests { + use super::*; + use std::rc::Rc; + + #[test] + fn test_serialize_null() { + let record = OwnedRecord::new(vec![OwnedValue::Null]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + // Check that the buffer is empty after the header + assert_eq!(buf.len(), header_length); + } + + #[test] + fn test_serialize_integers() { + let record = OwnedRecord::new(vec![ + OwnedValue::Integer(42), // Should use SERIAL_TYPE_I8 + OwnedValue::Integer(1000), // Should use SERIAL_TYPE_I16 + OwnedValue::Integer(1_000_000), // Should use SERIAL_TYPE_I24 + OwnedValue::Integer(1_000_000_000), // Should use SERIAL_TYPE_I32 + OwnedValue::Integer(1_000_000_000_000), // Should use SERIAL_TYPE_I48 + OwnedValue::Integer(i64::MAX), // Should use SERIAL_TYPE_I64 + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert!(header[0] == header_length as u8); // Header should be larger than number of values + + // Check that correct serial types were chosen + assert_eq!(header[1] as u64, u64::from(SerialType::I8)); + assert_eq!(header[2] as u64, u64::from(SerialType::I16)); + assert_eq!(header[3] as u64, u64::from(SerialType::I24)); + assert_eq!(header[4] as u64, u64::from(SerialType::I32)); + assert_eq!(header[5] as u64, u64::from(SerialType::I48)); + assert_eq!(header[6] as u64, u64::from(SerialType::I64)); + + // test that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i16_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i24_bytes = &buf[cur_offset..cur_offset + size_of::() - 1]; + cur_offset += size_of::() - 1; // i24 + let i32_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let i48_bytes = &buf[cur_offset..cur_offset + size_of::() - 2]; + cur_offset += size_of::() - 2; // i48 + let i64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_int16 = i16::from_be_bytes(i16_bytes.try_into().unwrap()); + + let mut leading_0 = vec![0]; + leading_0.extend(i24_bytes); + let val_int24 = i32::from_be_bytes(leading_0.try_into().unwrap()); + + let val_int32 = i32::from_be_bytes(i32_bytes.try_into().unwrap()); + + let mut leading_00 = vec![0, 0]; + leading_00.extend(i48_bytes); + let val_int48 = i64::from_be_bytes(leading_00.try_into().unwrap()); + + let val_int64 = i64::from_be_bytes(i64_bytes.try_into().unwrap()); + + assert_eq!(val_int8, 42); + assert_eq!(val_int16, 1000); + assert_eq!(val_int24, 1_000_000); + assert_eq!(val_int32, 1_000_000_000); + assert_eq!(val_int48, 1_000_000_000_000); + assert_eq!(val_int64, i64::MAX); + + // assert correct size of buffer: header + values (bytes per value depends on serial type) + assert_eq!( + buf.len(), + header_length + + size_of::() + + size_of::() + + (size_of::() - 1) // i24 + + size_of::() + + (size_of::() - 2) // i48 + + size_of::() + ); + } + + #[test] + fn test_serialize_float() { + let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for FLOAT + assert_eq!(header[1] as u64, u64::from(SerialType::F64)); + // Check that the bytes after the header can be interpreted as the float + let float_bytes = &buf[header_length..header_length + size_of::()]; + let float = f64::from_be_bytes(float_bytes.try_into().unwrap()); + assert_eq!(float, 3.14159); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + size_of::()); + } + + #[test] + fn test_serialize_text() { + let text = Rc::new("hello".to_string()); + let record = OwnedRecord::new(vec![OwnedValue::Text(LimboText::new(text.clone()))]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[1], (5 * 2 + 13) as u8); + // Check the actual text bytes + assert_eq!(&buf[2..7], b"hello"); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + text.len()); + } + + #[test] + fn test_serialize_blob() { + let blob = Rc::new(vec![1, 2, 3, 4, 5]); + let record = OwnedRecord::new(vec![OwnedValue::Blob(blob.clone())]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for BLOB, which is (len * 2 + 12) + assert_eq!(header[1], (5 * 2 + 12) as u8); + // Check the actual blob bytes + assert_eq!(&buf[2..7], &[1, 2, 3, 4, 5]); + // Check that buffer length is correct + assert_eq!(buf.len(), header_length + blob.len()); + } + + #[test] + fn test_serialize_mixed_types() { + let text = Rc::new("test".to_string()); + let record = OwnedRecord::new(vec![ + OwnedValue::Null, + OwnedValue::Integer(42), + OwnedValue::Float(3.14), + OwnedValue::Text(LimboText::new(text.clone())), + ]); + let mut buf = Vec::new(); + record.serialize(&mut buf); + + let header_length = record.values.len() + 1; + let header = &buf[0..header_length]; + // First byte should be header size + assert_eq!(header[0], header_length as u8); + // Second byte should be serial type for NULL + assert_eq!(header[1] as u64, u64::from(SerialType::Null)); + // Third byte should be serial type for I8 + assert_eq!(header[2] as u64, u64::from(SerialType::I8)); + // Fourth byte should be serial type for F64 + assert_eq!(header[3] as u64, u64::from(SerialType::F64)); + // Fifth byte should be serial type for TEXT, which is (len * 2 + 13) + assert_eq!(header[4] as u64, (4 * 2 + 13) as u64); + + // Check that the bytes after the header can be interpreted as the correct values + let mut cur_offset = header_length; + let i8_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let f64_bytes = &buf[cur_offset..cur_offset + size_of::()]; + cur_offset += size_of::(); + let text_bytes = &buf[cur_offset..cur_offset + text.len()]; + + let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap()); + let val_float = f64::from_be_bytes(f64_bytes.try_into().unwrap()); + let val_text = String::from_utf8(text_bytes.to_vec()).unwrap(); + + assert_eq!(val_int8, 42); + assert_eq!(val_float, 3.14); + assert_eq!(val_text, "test"); + + // Check that buffer length is correct + assert_eq!( + buf.len(), + header_length + size_of::() + size_of::() + text.len() + ); + } +} diff --git a/core/util.rs b/core/util.rs index a57186890..dcf04ac81 100644 --- a/core/util.rs +++ b/core/util.rs @@ -4,7 +4,7 @@ use sqlite3_parser::ast::{Expr, FunctionTail, Literal}; use crate::{ schema::{self, Schema}, - Result, RowResult, Rows, IO, + Result, Rows, StepResult, IO, }; // https://sqlite.org/lang_keywords.html @@ -27,7 +27,7 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc { + StepResult::Row(row) => { let ty = row.get::<&str>(0)?; if ty != "table" && ty != "index" { continue; @@ -53,14 +53,14 @@ pub fn parse_schema_rows(rows: Option, schema: &mut Schema, io: Arc continue, } } - RowResult::IO => { + StepResult::IO => { // TODO: How do we ensure that the I/O we submitted to // read the schema is actually complete? io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => break, + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => break, } } } diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index 8dd1cd4de..561765738 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -59,10 +59,6 @@ impl ProgramBuilder { reg } - pub fn next_free_register(&self) -> usize { - self.next_free_register - } - pub fn alloc_cursor_id( &mut self, table_identifier: Option, @@ -144,6 +140,17 @@ impl ProgramBuilder { .push((label, insn_reference)); } + /// Resolve unresolved labels to a specific offset in the instruction list. + /// + /// This function updates all instructions that reference the given label + /// to point to the specified offset. It ensures that the label and offset + /// are valid and updates the target program counter (PC) of each instruction + /// that references the label. + /// + /// # Arguments + /// + /// * `label` - The label to resolve. + /// * `to_offset` - The offset to which the labeled instructions should be resolved to. pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { assert!(label < 0); assert!(to_offset >= 0); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index ce03a53fd..a2d722948 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1,5 +1,4 @@ use super::{Insn, InsnReference, OwnedValue, Program}; -use crate::types::LimboText; use std::rc::Rc; pub fn insn_to_str( @@ -834,6 +833,24 @@ pub fn insn_to_str( 0, "".to_string(), ), + Insn::DeleteAsync { cursor_id } => ( + "DeleteAsync", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(Rc::new("".to_string())), + 0, + "".to_string(), + ), + Insn::DeleteAwait { cursor_id } => ( + "DeleteAwait", + *cursor_id as i32, + 0, + 0, + OwnedValue::build_text(Rc::new("".to_string())), + 0, + "".to_string(), + ), Insn::NewRowid { cursor, rowid_reg, diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 427770155..980aab282 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -37,7 +37,7 @@ use crate::types::{ }; use crate::util::parse_schema_rows; #[cfg(feature = "json")] -use crate::{function::JsonFunc, json::get_json, json::json_array}; +use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length}; use crate::{Connection, Result, TransactionState}; use crate::{Rows, DATABASE_VERSION}; use limbo_macros::Description; @@ -46,7 +46,7 @@ use datetime::{exec_date, exec_time, exec_unixepoch}; use rand::distributions::{Distribution, Uniform}; use rand::{thread_rng, Rng}; -use regex::Regex; +use regex::{Regex, RegexBuilder}; use std::borrow::{Borrow, BorrowMut}; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; @@ -468,6 +468,14 @@ pub enum Insn { cursor_id: usize, }, + DeleteAsync { + cursor_id: CursorID, + }, + + DeleteAwait { + cursor_id: CursorID, + }, + NewRowid { cursor: CursorID, // P1 rowid_reg: usize, // P2 Destination register to store the new rowid @@ -2281,6 +2289,21 @@ impl Program { Err(e) => return Err(e), } } + #[cfg(feature = "json")] + crate::function::Func::Json(JsonFunc::JsonArrayLength) => { + let json_value = &state.registers[*start_reg]; + let path_value = if arg_count > 1 { + Some(&state.registers[*start_reg + 1]) + } else { + None + }; + let json_array_length = json_array_length(json_value, path_value); + + match json_array_length { + Ok(length) => state.registers[*dest] = length, + Err(e) => return Err(e), + } + } crate::function::Func::Scalar(scalar_func) => match scalar_func { ScalarFunc::Cast => { assert!(arg_count == 2); @@ -2671,6 +2694,16 @@ impl Program { } state.pc += 1; } + Insn::DeleteAsync { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + return_if_io!(cursor.delete()); + state.pc += 1; + } + Insn::DeleteAwait { cursor_id } => { + let cursor = cursors.get_mut(cursor_id).unwrap(); + cursor.wait_for_completion()?; + state.pc += 1; + } Insn::NewRowid { cursor, rowid_reg, .. } => { @@ -3166,10 +3199,31 @@ fn exec_char(values: Vec) -> OwnedValue { } fn construct_like_regex(pattern: &str) -> Regex { - let mut regex_pattern = String::from("(?i)^"); - regex_pattern.push_str(&pattern.replace('%', ".*").replace('_', ".")); + let mut regex_pattern = String::with_capacity(pattern.len() * 2); + + regex_pattern.push('^'); + + for c in pattern.chars() { + match c { + '\\' => regex_pattern.push_str("\\\\"), + '%' => regex_pattern.push_str(".*"), + '_' => regex_pattern.push('.'), + ch => { + if regex_syntax::is_meta_character(c) { + regex_pattern.push('\\'); + } + regex_pattern.push(ch); + } + } + } + regex_pattern.push('$'); - Regex::new(®ex_pattern).unwrap() + + RegexBuilder::new(®ex_pattern) + .case_insensitive(true) + .dot_matches_new_line(true) + .build() + .unwrap() } // Implements LIKE pattern matching. Caches the constructed regex if a cache is provided @@ -3901,6 +3955,10 @@ mod tests { unimplemented!() } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn wait_for_completion(&mut self) -> Result<()> { unimplemented!() } @@ -4316,12 +4374,18 @@ mod tests { ); } + #[test] + fn test_like_with_escape_or_regexmeta_chars() { + assert!(exec_like(None, r#"\%A"#, r#"\A"#)); + assert!(exec_like(None, "%a%a", "aaaa")); + } + #[test] fn test_like_no_cache() { assert!(exec_like(None, "a%", "aaaa")); assert!(exec_like(None, "%a%a", "aaaa")); - assert!(exec_like(None, "%a.a", "aaaa")); - assert!(exec_like(None, "a.a%", "aaaa")); + assert!(!exec_like(None, "%a.a", "aaaa")); + assert!(!exec_like(None, "a.a%", "aaaa")); assert!(!exec_like(None, "%a.ab", "aaaa")); } @@ -4330,15 +4394,15 @@ mod tests { let mut cache = HashMap::new(); assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); // again after values have been cached assert!(exec_like(Some(&mut cache), "a%", "aaaa")); assert!(exec_like(Some(&mut cache), "%a%a", "aaaa")); - assert!(exec_like(Some(&mut cache), "%a.a", "aaaa")); - assert!(exec_like(Some(&mut cache), "a.a%", "aaaa")); + assert!(!exec_like(Some(&mut cache), "%a.a", "aaaa")); + assert!(!exec_like(Some(&mut cache), "a.a%", "aaaa")); assert!(!exec_like(Some(&mut cache), "%a.ab", "aaaa")); } diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 0365007a4..e3962b096 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -96,6 +96,10 @@ impl Cursor for Sorter { Ok(CursorResult::Ok(())) } + fn delete(&mut self) -> Result> { + unimplemented!() + } + fn set_null_flag(&mut self, _flag: bool) { todo!(); } diff --git a/docs/internals/functions.md b/docs/internals/functions.md index c8b871c16..0cf1a174e 100644 --- a/docs/internals/functions.md +++ b/docs/internals/functions.md @@ -65,9 +65,38 @@ We can see that the function is not implemented yet so the Parser did not unders For limbo to understand the meaning of `unixtimestamp`, we need to define it as a Function somewhere. That place can be found currently in `core/functions.rs`. We need to edit 3 places -1. add to ScalarFunc as `unixtimestamp` is a scalar function. +1. add to ScalarFunc as `unixtimestamp` is a scalar function. +```diff +pub enum ScalarFunc { + // other funcs... + SqliteVersion, ++ UnixEpoch, + Hex + // other funcs... +} +``` 2. add to Display to show the function as string in our program. +```diff +impl Display for ScalarFunc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let str = match self { + // ... + ScalarFunc::SqliteVersion => "sqlite_version".to_string(), ++ ScalarFunc::UnixEpoch => "unixepoch".to_string(), + ScalarFunc::Hex => "hex".to_string(), + // ... +} +``` 3. add to `fn resolve_function(..)` of `impl Func` to enable parsing from str to this function. +```diff +impl Func { + pub fn resolve_function(name: &str, arg_count: usize) -> Result { + match name { + // ... ++ "unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)), + // ... +} +``` https://github.com/tursodatabase/limbo/blob/69e3dd28f77e59927da4313e517b2b428ede480d/core/function.rs#L86 diff --git a/perf/latency/limbo/src/main.rs b/perf/latency/limbo/src/main.rs index c790c6bc8..b51ffb406 100644 --- a/perf/latency/limbo/src/main.rs +++ b/perf/latency/limbo/src/main.rs @@ -38,11 +38,11 @@ fn main() { loop { let row = rows.next_row().unwrap(); match row { - limbo_core::RowResult::Row(_) => { + limbo_core::StepResult::Row(_) => { count += 1; } - limbo_core::RowResult::IO => yield, - limbo_core::RowResult::Done => break, + limbo_core::StepResult::IO => yield, + limbo_core::StepResult::Done => break, } } assert!(count == 100); diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 61b115f01..ea2392f4e 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -1,6 +1,6 @@ use std::{fmt::Display, rc::Rc}; -use limbo_core::{Connection, Result, RowResult}; +use limbo_core::{Connection, Result, StepResult}; use rand::SeedableRng; use rand_chacha::ChaCha8Rng; @@ -106,7 +106,7 @@ impl Interactions { .iter_mut() .find(|t| t.name == insert.table) .unwrap(); - table.rows.push(insert.values.clone()); + table.rows.extend(insert.values.clone()); } Query::Delete(_) => todo!(), Query::Select(_) => {} @@ -215,7 +215,7 @@ impl Interaction { let mut out = Vec::new(); while let Ok(row) = rows.next_row() { match row { - RowResult::Row(row) => { + StepResult::Row(row) => { let mut r = Vec::new(); for el in &row.values { let v = match el { @@ -230,12 +230,12 @@ impl Interaction { out.push(r); } - RowResult::IO => {} - RowResult::Interrupt => {} - RowResult::Done => { + StepResult::IO => {} + StepResult::Interrupt => {} + StepResult::Done => { break; } - RowResult::Busy => {} + StepResult::Busy => {} } } @@ -320,7 +320,7 @@ fn property_insert_select(rng: &mut R, env: &SimulatorEnv) -> Inte // Insert the row let insert_query = Interaction::Query(Query::Insert(Insert { table: table.name.clone(), - values: row.clone(), + values: vec![row.clone()], })); // Select the row diff --git a/simulator/generation/query.rs b/simulator/generation/query.rs index ca6926650..0ff9d44e1 100644 --- a/simulator/generation/query.rs +++ b/simulator/generation/query.rs @@ -37,10 +37,15 @@ impl ArbitraryFrom> for Select { impl ArbitraryFrom for Insert { fn arbitrary_from(rng: &mut R, table: &Table) -> Self { - let values = table - .columns - .iter() - .map(|c| Value::arbitrary_from(rng, &c.column_type)) + let num_rows = rng.gen_range(1..10); + let values: Vec> = (0..num_rows) + .map(|_| { + table + .columns + .iter() + .map(|c| Value::arbitrary_from(rng, &c.column_type)) + .collect() + }) .collect(); Insert { table: table.name.clone(), diff --git a/simulator/main.rs b/simulator/main.rs index 9f70abed2..b12018062 100644 --- a/simulator/main.rs +++ b/simulator/main.rs @@ -1,7 +1,7 @@ use clap::Parser; use generation::plan::{Interaction, InteractionPlan, ResultSet}; use generation::{pick_index, ArbitraryFrom}; -use limbo_core::{Connection, Database, Result, RowResult, IO}; +use limbo_core::{Connection, Database, Result, StepResult, IO}; use model::table::Value; use rand::prelude::*; use rand_chacha::ChaCha8Rng; diff --git a/simulator/model/query.rs b/simulator/model/query.rs index eeec68d08..7a12def8d 100644 --- a/simulator/model/query.rs +++ b/simulator/model/query.rs @@ -75,7 +75,7 @@ pub(crate) struct Select { #[derive(Clone, Debug, PartialEq)] pub(crate) struct Insert { pub(crate) table: String, - pub(crate) values: Vec, + pub(crate) values: Vec>, } #[derive(Clone, Debug, PartialEq)] @@ -104,14 +104,21 @@ impl Display for Query { predicate: guard, }) => write!(f, "SELECT * FROM {} WHERE {}", table, guard), Query::Insert(Insert { table, values }) => { - write!(f, "INSERT INTO {} VALUES (", table)?; - for (i, v) in values.iter().enumerate() { + write!(f, "INSERT INTO {} VALUES ", table)?; + for (i, row) in values.iter().enumerate() { if i != 0 { write!(f, ", ")?; } - write!(f, "{}", v)?; + write!(f, "(")?; + for (j, value) in row.iter().enumerate() { + if j != 0 { + write!(f, ", ")?; + } + write!(f, "{}", value)?; + } + write!(f, ")")?; } - write!(f, ")") + Ok(()) } Query::Delete(Delete { table, diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 6bd5b23d6..cd09ef62b 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -239,14 +239,14 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in let stmt = &mut *stmt; if let Ok(result) = stmt.stmt.step() { match result { - limbo_core::RowResult::IO => SQLITE_BUSY, - limbo_core::RowResult::Done => SQLITE_DONE, - limbo_core::RowResult::Interrupt => SQLITE_INTERRUPT, - limbo_core::RowResult::Row(row) => { + limbo_core::StepResult::IO => SQLITE_BUSY, + limbo_core::StepResult::Done => SQLITE_DONE, + limbo_core::StepResult::Interrupt => SQLITE_INTERRUPT, + limbo_core::StepResult::Row(row) => { stmt.row.replace(Some(row)); SQLITE_ROW } - limbo_core::RowResult::Busy => SQLITE_BUSY, + limbo_core::StepResult::Busy => SQLITE_BUSY, } } else { SQLITE_ERROR diff --git a/test/src/lib.rs b/test/src/lib.rs index 8bd6feea2..931c9b1bf 100644 --- a/test/src/lib.rs +++ b/test/src/lib.rs @@ -40,7 +40,7 @@ impl TempDatabase { #[cfg(test)] mod tests { use super::*; - use limbo_core::{CheckpointStatus, Connection, RowResult, Value}; + use limbo_core::{CheckpointStatus, Connection, StepResult, Value}; use log::debug; #[ignore] @@ -63,10 +63,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -80,7 +80,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = row.values.first().expect("missing id"); let id = match first_value { Value::Integer(i) => *i as i32, @@ -90,12 +90,12 @@ mod tests { assert_eq!(current_read_index, id); current_read_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => { + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => { panic!("Database is busy"); } } @@ -127,10 +127,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -146,7 +146,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -161,12 +161,12 @@ mod tests { assert_eq!(1, id); compare_string(&huge_text, text); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -200,10 +200,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -219,7 +219,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let text = &row.values[1]; let id = match first_value { @@ -236,12 +236,12 @@ mod tests { compare_string(huge_text, text); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -269,10 +269,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -290,7 +290,7 @@ mod tests { match conn.query(list_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let id = match first_value { Value::Integer(i) => *i as i32, @@ -300,12 +300,12 @@ mod tests { assert_eq!(current_index, id as usize); current_index += 1; } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => unreachable!(), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => unreachable!(), } }, Ok(None) => {} @@ -329,10 +329,10 @@ mod tests { match conn.query(insert_query) { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -353,7 +353,7 @@ mod tests { if let Some(ref mut rows) = conn.query(list_query).unwrap() { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { let first_value = &row.values[0]; let count = match first_value { Value::Integer(i) => *i as i32, @@ -362,12 +362,12 @@ mod tests { log::debug!("counted {}", count); return Ok(count as usize); } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -436,10 +436,10 @@ mod tests { if let Some(ref mut rows) = insert_query { loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } } @@ -450,17 +450,17 @@ mod tests { if let Some(ref mut rows) = select_query { loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { assert_eq!(id, 1, "First insert should have rowid 1"); } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } } } @@ -469,10 +469,10 @@ mod tests { match conn.query("INSERT INTO test_rowid (id, val) VALUES (5, 'test2')") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Done => break, + StepResult::Done => break, _ => unreachable!(), } }, @@ -485,17 +485,17 @@ mod tests { match conn.query("SELECT last_insert_rowid()") { Ok(Some(ref mut rows)) => loop { match rows.next_row()? { - RowResult::Row(row) => { + StepResult::Row(row) => { if let Value::Integer(id) = row.values[0] { last_id = id; } } - RowResult::IO => { + StepResult::IO => { tmp_db.io.run_once()?; } - RowResult::Interrupt => break, - RowResult::Done => break, - RowResult::Busy => panic!("Database is busy"), + StepResult::Interrupt => break, + StepResult::Done => break, + StepResult::Busy => panic!("Database is busy"), } }, Ok(None) => {} diff --git a/testing/json.test b/testing/json.test index a62040555..7c33fdc5a 100755 --- a/testing/json.test +++ b/testing/json.test @@ -83,3 +83,35 @@ do_execsql_test json_array_json { do_execsql_test json_array_nested { SELECT json_array(json_array(1,2,3), json('[1,2,3]'), '[1,2,3]') } {{[[1,2,3],[1,2,3],"[1,2,3]"]}} + +do_execsql_test json_array_length { + SELECT json_array_length('[1,2,3,4]'); +} {{4}} + +do_execsql_test json_array_length_empty { + SELECT json_array_length('[]'); +} {{0}} + +do_execsql_test json_array_length_root { + SELECT json_array_length('[1,2,3,4]', '$'); +} {{4}} + +do_execsql_test json_array_length_not_array { + SELECT json_array_length('{"one":[1,2,3]}'); +} {{0}} + +do_execsql_test json_array_length_via_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.one'); +} {{3}} + +do_execsql_test json_array_length_via_index { + SELECT json_array_length('[[1,2,3,4]]', '$[0]'); +} {{4}} + +do_execsql_test json_array_length_via_index_not_array { + SELECT json_array_length('[1,2,3,4]', '$[2]'); +} {{0}} + +do_execsql_test json_array_length_via_bad_prop { + SELECT json_array_length('{"one":[1,2,3]}', '$.two'); +} {{}} \ No newline at end of file diff --git a/testing/like.test b/testing/like.test index edd6ba5e5..a52b90b60 100755 --- a/testing/like.test +++ b/testing/like.test @@ -77,3 +77,15 @@ Robert|Roberts} do_execsql_test where-like-impossible { select * from products where 'foobar' like 'fooba'; } {} + +do_execsql_test like-with-backslash { + select like('\%A', '\A') +} {1} + +do_execsql_test like-with-dollar { + select like('A$%', 'A$') +} {1} + +do_execsql_test like-with-dot { + select like('%a.a', 'aaaa') +} {0} diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index e7f1c1b10..f04fa1765 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -809,6 +809,10 @@ do_execsql_test cast-small-float-to-numeric { SELECT typeof(CAST('1.23' AS NUMERIC)), CAST('1.23' AS NUMERIC); } {real|1.23} +do_execsql_test_regex sqlite-version-should-return-valid-output { + SELECT sqlite_version(); +} {\d+\.\d+\.\d+} + # TODO COMPAT: sqlite returns 9.22337203685478e+18, do we care...? # do_execsql_test cast-large-text-to-numeric { # SELECT typeof(CAST('9223372036854775808' AS NUMERIC)), CAST('9223372036854775808' AS NUMERIC); diff --git a/testing/tester.tcl b/testing/tester.tcl index 04a43c3eb..b8cbff17f 100644 --- a/testing/tester.tcl +++ b/testing/tester.tcl @@ -26,6 +26,23 @@ proc do_execsql_test {test_name sql_statements expected_outputs} { } } +proc do_execsql_test_regex {test_name sql_statements expected_regex} { + foreach db $::test_dbs { + puts [format "(%s) %s Running test: %s" $db [string repeat " " [expr {40 - [string length $db]}]] $test_name] + set combined_sql [string trim $sql_statements] + set actual_output [evaluate_sql $::sqlite_exec $db $combined_sql] + + # Validate the actual output against the regular expression + if {![regexp $expected_regex $actual_output]} { + puts "Test FAILED: '$sql_statements'" + puts "returned '$actual_output'" + puts "expected to match regex '$expected_regex'" + exit 1 + } + } +} + + proc do_execsql_test_on_specific_db {db_name test_name sql_statements expected_outputs} { puts [format "(%s) %s Running test: %s" $db_name [string repeat " " [expr {40 - [string length $db_name]}]] $test_name] set combined_sql [string trim $sql_statements]