mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-20 07:25:14 +01:00
Merge branch 'main' into json-extract
This commit is contained in:
@@ -17,6 +17,10 @@ If you are new to Rust, the following books are recommended reading:
|
||||
* Jim Blandy et al. [Programming Rust, 2nd Edition](https://www.oreilly.com/library/view/programming-rust-2nd/9781492052586/). 2021
|
||||
* Steve Klabnik and Carol Nichols. [The Rust Programming Language](https://doc.rust-lang.org/book/#the-rust-programming-language). 2022
|
||||
|
||||
Examples of contributing
|
||||
|
||||
* [How to contribute a SQL function implementation](docs/internals/functions.md)
|
||||
|
||||
## Finding things to work on
|
||||
|
||||
The issue tracker has issues tagged with [good first issue](https://github.com/penberg/limbo/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22),
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
fn main() {
|
||||
pyo3_build_config::use_pyo3_cfgs();
|
||||
println!("cargo::rustc-check-cfg=cfg(allocator, values(\"default\", \"mimalloc\"))");
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use anyhow::Result;
|
||||
use errors::*;
|
||||
use limbo_core::IO;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyList;
|
||||
use pyo3::types::PyTuple;
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::Arc;
|
||||
|
||||
mod errors;
|
||||
|
||||
@@ -78,7 +78,7 @@ pub struct Cursor {
|
||||
#[pyo3(get)]
|
||||
rowcount: i64,
|
||||
|
||||
smt: Option<Arc<Mutex<limbo_core::Statement>>>,
|
||||
smt: Option<Rc<RefCell<limbo_core::Statement>>>,
|
||||
}
|
||||
|
||||
// SAFETY: The limbo_core crate guarantees that `Cursor` is thread-safe.
|
||||
@@ -90,26 +90,33 @@ impl Cursor {
|
||||
#[pyo3(signature = (sql, parameters=None))]
|
||||
pub fn execute(&mut self, sql: &str, parameters: Option<Py<PyTuple>>) -> Result<Self> {
|
||||
let stmt_is_dml = stmt_is_dml(sql);
|
||||
let stmt_is_ddl = stmt_is_ddl(sql);
|
||||
|
||||
let conn_lock =
|
||||
self.conn.conn.lock().map_err(|_| {
|
||||
PyErr::new::<OperationalError, _>("Failed to acquire connection lock")
|
||||
})?;
|
||||
|
||||
let statement = conn_lock.prepare(sql).map_err(|e| {
|
||||
let statement = self.conn.conn.prepare(sql).map_err(|e| {
|
||||
PyErr::new::<ProgrammingError, _>(format!("Failed to prepare statement: {:?}", e))
|
||||
})?;
|
||||
|
||||
self.smt = Some(Arc::new(Mutex::new(statement)));
|
||||
let stmt = Rc::new(RefCell::new(statement));
|
||||
|
||||
// TODO: use stmt_is_dml to set rowcount
|
||||
if stmt_is_dml {
|
||||
return Err(PyErr::new::<NotSupportedError, _>(
|
||||
"DML statements (INSERT/UPDATE/DELETE) are not fully supported in this version",
|
||||
)
|
||||
.into());
|
||||
// For DDL and DML statements,
|
||||
// we need to execute the statement immediately
|
||||
if stmt_is_ddl || stmt_is_dml {
|
||||
loop {
|
||||
match stmt.borrow_mut().step().map_err(|e| {
|
||||
PyErr::new::<OperationalError, _>(format!("Step error: {:?}", e))
|
||||
})? {
|
||||
limbo_core::StepResult::IO => {
|
||||
self.conn.io.run_once().map_err(|e| {
|
||||
PyErr::new::<OperationalError, _>(format!("IO error: {:?}", e))
|
||||
})?;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.smt = Some(stmt);
|
||||
|
||||
Ok(Cursor {
|
||||
smt: self.smt.clone(),
|
||||
conn: self.conn.clone(),
|
||||
@@ -121,11 +128,8 @@ impl Cursor {
|
||||
|
||||
pub fn fetchone(&mut self, py: Python) -> Result<Option<PyObject>> {
|
||||
if let Some(smt) = &self.smt {
|
||||
let mut smt_lock = smt.lock().map_err(|_| {
|
||||
PyErr::new::<OperationalError, _>("Failed to acquire statement lock")
|
||||
})?;
|
||||
loop {
|
||||
match smt_lock.step().map_err(|e| {
|
||||
match smt.borrow_mut().step().map_err(|e| {
|
||||
PyErr::new::<OperationalError, _>(format!("Step error: {:?}", e))
|
||||
})? {
|
||||
limbo_core::StepResult::Row(row) => {
|
||||
@@ -157,14 +161,9 @@ impl Cursor {
|
||||
|
||||
pub fn fetchall(&mut self, py: Python) -> Result<Vec<PyObject>> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
if let Some(smt) = &self.smt {
|
||||
let mut smt_lock = smt.lock().map_err(|_| {
|
||||
PyErr::new::<OperationalError, _>("Failed to acquire statement lock")
|
||||
})?;
|
||||
|
||||
loop {
|
||||
match smt_lock.step().map_err(|e| {
|
||||
match smt.borrow_mut().step().map_err(|e| {
|
||||
PyErr::new::<OperationalError, _>(format!("Step error: {:?}", e))
|
||||
})? {
|
||||
limbo_core::StepResult::Row(row) => {
|
||||
@@ -221,11 +220,17 @@ fn stmt_is_dml(sql: &str) -> bool {
|
||||
sql.starts_with("INSERT") || sql.starts_with("UPDATE") || sql.starts_with("DELETE")
|
||||
}
|
||||
|
||||
fn stmt_is_ddl(sql: &str) -> bool {
|
||||
let sql = sql.trim();
|
||||
let sql = sql.to_uppercase();
|
||||
sql.starts_with("CREATE") || sql.starts_with("ALTER") || sql.starts_with("DROP")
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
#[derive(Clone)]
|
||||
pub struct Connection {
|
||||
conn: Arc<Mutex<Rc<limbo_core::Connection>>>,
|
||||
io: Arc<limbo_core::PlatformIO>,
|
||||
conn: Rc<limbo_core::Connection>,
|
||||
io: Arc<dyn limbo_core::IO>,
|
||||
}
|
||||
|
||||
// SAFETY: The limbo_core crate guarantees that `Connection` is thread-safe.
|
||||
@@ -263,16 +268,24 @@ impl Connection {
|
||||
#[allow(clippy::arc_with_non_send_sync)]
|
||||
#[pyfunction]
|
||||
pub fn connect(path: &str) -> Result<Connection> {
|
||||
let io = Arc::new(limbo_core::PlatformIO::new().map_err(|e| {
|
||||
PyErr::new::<InterfaceError, _>(format!("IO initialization failed: {:?}", e))
|
||||
})?);
|
||||
let db = limbo_core::Database::open_file(io.clone(), path)
|
||||
.map_err(|e| PyErr::new::<DatabaseError, _>(format!("Failed to open database: {:?}", e)))?;
|
||||
let conn: Rc<limbo_core::Connection> = db.connect();
|
||||
Ok(Connection {
|
||||
conn: Arc::new(Mutex::new(conn)),
|
||||
io,
|
||||
})
|
||||
match path {
|
||||
":memory:" => {
|
||||
let io: Arc<dyn limbo_core::IO> = Arc::new(limbo_core::MemoryIO::new()?);
|
||||
let db = limbo_core::Database::open_file(io.clone(), path).map_err(|e| {
|
||||
PyErr::new::<DatabaseError, _>(format!("Failed to open database: {:?}", e))
|
||||
})?;
|
||||
let conn: Rc<limbo_core::Connection> = db.connect();
|
||||
Ok(Connection { conn, io })
|
||||
}
|
||||
path => {
|
||||
let io: Arc<dyn limbo_core::IO> = Arc::new(limbo_core::PlatformIO::new()?);
|
||||
let db = limbo_core::Database::open_file(io.clone(), path).map_err(|e| {
|
||||
PyErr::new::<DatabaseError, _>(format!("Failed to open database: {:?}", e))
|
||||
})?;
|
||||
let conn: Rc<limbo_core::Connection> = db.connect();
|
||||
Ok(Connection { conn, io })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn row_to_py(py: Python, row: &limbo_core::Row) -> PyObject {
|
||||
|
||||
@@ -27,6 +27,20 @@ def test_fetchall_select_user_ids(provider):
|
||||
assert user_ids == [(1,), (2,)]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["sqlite3", "limbo"])
|
||||
def test_in_memory_fetchone_select_all_users(provider):
|
||||
conn = connect(provider, ":memory:")
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE users (id INT PRIMARY KEY, username TEXT)")
|
||||
cursor.execute("INSERT INTO users VALUES (1, 'alice')")
|
||||
|
||||
cursor.execute("SELECT * FROM users")
|
||||
|
||||
alice = cursor.fetchone()
|
||||
assert alice
|
||||
assert alice == (1, "alice")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["sqlite3", "limbo"])
|
||||
def test_fetchone_select_all_users(provider):
|
||||
conn = connect(provider, "tests/database.db")
|
||||
|
||||
@@ -17,9 +17,9 @@ impl Display for JsonFunc {
|
||||
f,
|
||||
"{}",
|
||||
match self {
|
||||
JsonFunc::Json => "json".to_string(),
|
||||
JsonFunc::JsonArray => "json_array".to_string(),
|
||||
JsonFunc::JsonExtract => "json_extract".to_string(),
|
||||
Self::Json => "json".to_string(),
|
||||
Self::JsonArray => "json_array".to_string(),
|
||||
Self::JsonExtract => "json_extract".to_string(),
|
||||
Self::JsonArrayLength => "json_array_length".to_string(),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -11,6 +11,7 @@ use std::rc::Rc;
|
||||
use thiserror::Error;
|
||||
|
||||
const MAX_IOVECS: usize = 128;
|
||||
const SQPOLL_IDLE: u32 = 1000;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
enum LinuxIOError {
|
||||
@@ -49,7 +50,13 @@ struct InnerLinuxIO {
|
||||
|
||||
impl LinuxIO {
|
||||
pub fn new() -> Result<Self> {
|
||||
let ring = io_uring::IoUring::new(MAX_IOVECS as u32)?;
|
||||
let ring = match io_uring::IoUring::builder()
|
||||
.setup_sqpoll(SQPOLL_IDLE)
|
||||
.build(MAX_IOVECS as u32)
|
||||
{
|
||||
Ok(ring) => ring,
|
||||
Err(_) => io_uring::IoUring::new(MAX_IOVECS as u32)?,
|
||||
};
|
||||
let inner = InnerLinuxIO {
|
||||
ring: WrappedIOUring {
|
||||
ring,
|
||||
|
||||
@@ -41,7 +41,7 @@ impl<'de> Deserializer<'de> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
||||
impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> {
|
||||
type Error = Error;
|
||||
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
|
||||
|
||||
@@ -66,7 +66,7 @@ fn get_json_value(json_value: &OwnedValue) -> crate::Result<Val> {
|
||||
}
|
||||
},
|
||||
OwnedValue::Blob(b) => {
|
||||
if let Ok(json) = jsonb::from_slice(b) {
|
||||
if let Ok(_json) = jsonb::from_slice(b) {
|
||||
todo!("jsonb to json conversion");
|
||||
} else {
|
||||
crate::bail_parse_error!("malformed JSON");
|
||||
@@ -137,7 +137,7 @@ pub fn json_array_length(
|
||||
};
|
||||
|
||||
match arr_val {
|
||||
Val::Array(val) => (Ok(OwnedValue::Integer(val.len() as i64))),
|
||||
Val::Array(val) => Ok(OwnedValue::Integer(val.len() as i64)),
|
||||
Val::Null => Ok(OwnedValue::Null),
|
||||
_ => Ok(OwnedValue::Integer(0)),
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ impl Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::Serializer for &'a mut Serializer {
|
||||
impl ser::Serializer for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -237,7 +237,7 @@ impl<'a> ser::Serializer for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeSeq for &'a mut Serializer {
|
||||
impl ser::SerializeSeq for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -257,7 +257,7 @@ impl<'a> ser::SerializeSeq for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeTuple for &'a mut Serializer {
|
||||
impl ser::SerializeTuple for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -273,7 +273,7 @@ impl<'a> ser::SerializeTuple for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeTupleStruct for &'a mut Serializer {
|
||||
impl ser::SerializeTupleStruct for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -289,7 +289,7 @@ impl<'a> ser::SerializeTupleStruct for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeTupleVariant for &'a mut Serializer {
|
||||
impl ser::SerializeTupleVariant for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -306,7 +306,7 @@ impl<'a> ser::SerializeTupleVariant for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeMap for &'a mut Serializer {
|
||||
impl ser::SerializeMap for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -334,7 +334,7 @@ impl<'a> ser::SerializeMap for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStruct for &'a mut Serializer {
|
||||
impl ser::SerializeStruct for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
@@ -351,7 +351,7 @@ impl<'a> ser::SerializeStruct for &'a mut Serializer {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStructVariant for &'a mut Serializer {
|
||||
impl ser::SerializeStructVariant for &mut Serializer {
|
||||
type Ok = ();
|
||||
type Error = Error;
|
||||
|
||||
|
||||
@@ -1378,10 +1378,10 @@ impl BTreeCursor {
|
||||
PageType::IndexLeaf => todo!(),
|
||||
};
|
||||
cbrk -= size;
|
||||
if cbrk < first_cell as u64 || pc + size > usable_space {
|
||||
if cbrk < first_cell || pc + size > usable_space {
|
||||
todo!("corrupt");
|
||||
}
|
||||
assert!(cbrk + size <= usable_space && cbrk >= first_cell as u64);
|
||||
assert!(cbrk + size <= usable_space && cbrk >= first_cell);
|
||||
// set new pointer
|
||||
write_buf[cell_idx..cell_idx + 2].copy_from_slice(&(cbrk as u16).to_be_bytes());
|
||||
// copy payload
|
||||
@@ -1394,7 +1394,7 @@ impl BTreeCursor {
|
||||
// if( data[hdr+7]+cbrk-iCellFirst!=pPage->nFree ){
|
||||
// return SQLITE_CORRUPT_PAGE(pPage);
|
||||
// }
|
||||
assert!(cbrk >= first_cell as u64);
|
||||
assert!(cbrk >= first_cell);
|
||||
let write_buf = page.as_ptr();
|
||||
|
||||
// set new first byte of cell content
|
||||
@@ -1437,7 +1437,7 @@ impl BTreeCursor {
|
||||
// #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions)
|
||||
|
||||
let mut free_space_bytes =
|
||||
page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize;
|
||||
page.unallocated_region_size() + page.num_frag_free_bytes() as usize;
|
||||
|
||||
// #3 is computed by iterating over the freeblocks linked list
|
||||
let mut cur_freeblock_ptr = page.first_freeblock() as usize;
|
||||
|
||||
@@ -241,12 +241,7 @@ fn emit_program_for_select(
|
||||
inner_loop_emit(&mut program, &mut plan, &mut metadata)?;
|
||||
|
||||
// Clean up and close the main execution loop
|
||||
close_loop(
|
||||
&mut program,
|
||||
&plan.source,
|
||||
&mut metadata,
|
||||
&plan.referenced_tables,
|
||||
)?;
|
||||
close_loop(&mut program, &plan.source, &mut metadata)?;
|
||||
|
||||
if let Some(skip_loops_label) = skip_loops_label {
|
||||
program.resolve_label(skip_loops_label, program.offset());
|
||||
@@ -338,12 +333,7 @@ fn emit_program_for_delete(
|
||||
emit_delete_insns(&mut program, &plan.source, &plan.limit, &metadata)?;
|
||||
|
||||
// Clean up and close the main execution loop
|
||||
close_loop(
|
||||
&mut program,
|
||||
&plan.source,
|
||||
&mut metadata,
|
||||
&plan.referenced_tables,
|
||||
)?;
|
||||
close_loop(&mut program, &plan.source, &mut metadata)?;
|
||||
|
||||
if let Some(skip_loops_label) = skip_loops_label {
|
||||
program.resolve_label(skip_loops_label, program.offset());
|
||||
@@ -663,7 +653,7 @@ fn open_loop(
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
Ok(())
|
||||
}
|
||||
SourceOperator::Scan {
|
||||
id,
|
||||
@@ -722,7 +712,7 @@ fn open_loop(
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
Ok(())
|
||||
}
|
||||
SourceOperator::Search {
|
||||
id,
|
||||
@@ -905,11 +895,9 @@ fn open_loop(
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
SourceOperator::Nothing => {
|
||||
return Ok(());
|
||||
Ok(())
|
||||
}
|
||||
SourceOperator::Nothing => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -978,14 +966,14 @@ fn inner_loop_emit(
|
||||
);
|
||||
}
|
||||
// if we have neither, we emit a ResultRow. In that case, if we have a Limit, we handle that with DecrJumpZero.
|
||||
return inner_loop_source_emit(
|
||||
inner_loop_source_emit(
|
||||
program,
|
||||
&plan.result_columns,
|
||||
&plan.aggregates,
|
||||
metadata,
|
||||
InnerLoopEmitTarget::ResultRow { limit: plan.limit },
|
||||
&plan.referenced_tables,
|
||||
);
|
||||
)
|
||||
}
|
||||
|
||||
/// This is a helper function for inner_loop_emit,
|
||||
@@ -1111,7 +1099,6 @@ fn close_loop(
|
||||
program: &mut ProgramBuilder,
|
||||
source: &SourceOperator,
|
||||
metadata: &mut Metadata,
|
||||
referenced_tables: &[BTreeTableReference],
|
||||
) -> Result<()> {
|
||||
match source {
|
||||
SourceOperator::Join {
|
||||
@@ -1121,7 +1108,7 @@ fn close_loop(
|
||||
outer,
|
||||
..
|
||||
} => {
|
||||
close_loop(program, right, metadata, referenced_tables)?;
|
||||
close_loop(program, right, metadata)?;
|
||||
|
||||
if *outer {
|
||||
let lj_meta = metadata.left_joins.get(id).unwrap();
|
||||
@@ -1168,7 +1155,7 @@ fn close_loop(
|
||||
assert!(program.offset() == jump_offset);
|
||||
}
|
||||
|
||||
close_loop(program, left, metadata, referenced_tables)?;
|
||||
close_loop(program, left, metadata)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -2281,14 +2281,10 @@ pub fn translate_aggregation(
|
||||
let delimiter_reg = program.alloc_register();
|
||||
|
||||
let expr = &agg.args[0];
|
||||
let delimiter_expr: ast::Expr;
|
||||
|
||||
match &agg.args[1] {
|
||||
ast::Expr::Column { .. } => {
|
||||
delimiter_expr = agg.args[1].clone();
|
||||
}
|
||||
let delimiter_expr = match &agg.args[1] {
|
||||
ast::Expr::Column { .. } => agg.args[1].clone(),
|
||||
ast::Expr::Literal(ast::Literal::String(s)) => {
|
||||
delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string()));
|
||||
ast::Expr::Literal(ast::Literal::String(s.to_string()))
|
||||
}
|
||||
_ => crate::bail_parse_error!("Incorrect delimiter parameter"),
|
||||
};
|
||||
@@ -2464,14 +2460,10 @@ pub fn translate_aggregation_groupby(
|
||||
let expr_reg = program.alloc_register();
|
||||
let delimiter_reg = program.alloc_register();
|
||||
|
||||
let delimiter_expr: ast::Expr;
|
||||
|
||||
match &agg.args[1] {
|
||||
ast::Expr::Column { .. } => {
|
||||
delimiter_expr = agg.args[1].clone();
|
||||
}
|
||||
let delimiter_expr = match &agg.args[1] {
|
||||
ast::Expr::Column { .. } => agg.args[1].clone(),
|
||||
ast::Expr::Literal(ast::Literal::String(s)) => {
|
||||
delimiter_expr = ast::Expr::Literal(ast::Literal::String(s.to_string()));
|
||||
ast::Expr::Literal(ast::Literal::String(s.to_string()))
|
||||
}
|
||||
_ => crate::bail_parse_error!("Incorrect delimiter parameter"),
|
||||
};
|
||||
|
||||
@@ -349,7 +349,7 @@ pub fn translate_insert(
|
||||
// Create new rowid if a) not provided by user or b) provided by user but is NULL
|
||||
program.emit_insn(Insn::NewRowid {
|
||||
cursor: cursor_id,
|
||||
rowid_reg: rowid_reg,
|
||||
rowid_reg,
|
||||
prev_largest_reg: 0,
|
||||
});
|
||||
|
||||
@@ -366,7 +366,7 @@ pub fn translate_insert(
|
||||
program.emit_insn_with_label_dependency(
|
||||
Insn::NotExists {
|
||||
cursor: cursor_id,
|
||||
rowid_reg: rowid_reg,
|
||||
rowid_reg,
|
||||
target_pc: make_record_label,
|
||||
},
|
||||
make_record_label,
|
||||
|
||||
@@ -666,7 +666,7 @@ impl Optimizable for ast::Expr {
|
||||
if id.0.eq_ignore_ascii_case("false") {
|
||||
return Ok(Some(ConstantPredicate::AlwaysFalse));
|
||||
}
|
||||
return Ok(None);
|
||||
Ok(None)
|
||||
}
|
||||
Self::Literal(lit) => match lit {
|
||||
ast::Literal::Null => Ok(Some(ConstantPredicate::AlwaysFalse)),
|
||||
|
||||
@@ -476,7 +476,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
|
||||
}
|
||||
|
||||
// Parse the LIMIT clause
|
||||
plan.limit = select.limit.and_then(|limit| parse_limit(limit));
|
||||
plan.limit = select.limit.and_then(parse_limit);
|
||||
|
||||
// Return the unoptimized query plan
|
||||
Ok(Plan::Select(plan))
|
||||
@@ -507,7 +507,7 @@ pub fn prepare_delete_plan(
|
||||
let resolved_where_clauses = parse_where(where_clause, &[table_ref.clone()])?;
|
||||
|
||||
// Parse the LIMIT clause
|
||||
let resolved_limit = limit.and_then(|limit| parse_limit(limit));
|
||||
let resolved_limit = limit.and_then(parse_limit);
|
||||
|
||||
let plan = DeletePlan {
|
||||
source: SourceOperator::Scan {
|
||||
|
||||
@@ -15,7 +15,7 @@ pub enum Value<'a> {
|
||||
Blob(&'a Vec<u8>),
|
||||
}
|
||||
|
||||
impl<'a> Display for Value<'a> {
|
||||
impl Display for Value<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Null => write!(f, "NULL"),
|
||||
@@ -647,7 +647,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_float() {
|
||||
let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]);
|
||||
#[warn(clippy::approx_constant)]
|
||||
let record = OwnedRecord::new(vec![OwnedValue::Float(3.15555)]);
|
||||
let mut buf = Vec::new();
|
||||
record.serialize(&mut buf);
|
||||
|
||||
@@ -660,7 +661,7 @@ mod tests {
|
||||
// Check that the bytes after the header can be interpreted as the float
|
||||
let float_bytes = &buf[header_length..header_length + size_of::<f64>()];
|
||||
let float = f64::from_be_bytes(float_bytes.try_into().unwrap());
|
||||
assert_eq!(float, 3.14159);
|
||||
assert_eq!(float, 3.15555);
|
||||
// Check that buffer length is correct
|
||||
assert_eq!(buf.len(), header_length + size_of::<f64>());
|
||||
}
|
||||
@@ -709,7 +710,7 @@ mod tests {
|
||||
let record = OwnedRecord::new(vec![
|
||||
OwnedValue::Null,
|
||||
OwnedValue::Integer(42),
|
||||
OwnedValue::Float(3.14),
|
||||
OwnedValue::Float(3.15),
|
||||
OwnedValue::Text(LimboText::new(text.clone())),
|
||||
]);
|
||||
let mut buf = Vec::new();
|
||||
@@ -741,7 +742,7 @@ mod tests {
|
||||
let val_text = String::from_utf8(text_bytes.to_vec()).unwrap();
|
||||
|
||||
assert_eq!(val_int8, 42);
|
||||
assert_eq!(val_float, 3.14);
|
||||
assert_eq!(val_float, 3.15);
|
||||
assert_eq!(val_text, "test");
|
||||
|
||||
// Check that buffer length is correct
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use regex::{Regex, RegexBuilder};
|
||||
|
||||
use crate::{types::OwnedValue, LimboError};
|
||||
@@ -8,11 +10,9 @@ pub fn construct_like_escape_arg(escape_value: &OwnedValue) -> Result<char, Limb
|
||||
let mut escape_chars = text.value.chars();
|
||||
match (escape_chars.next(), escape_chars.next()) {
|
||||
(Some(escape), None) => Ok(escape),
|
||||
_ => {
|
||||
return Result::Err(LimboError::Constraint(
|
||||
"ESCAPE expression must be a single character".to_string(),
|
||||
))
|
||||
}
|
||||
_ => Result::Err(LimboError::Constraint(
|
||||
"ESCAPE expression must be a single character".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
@@ -63,6 +63,124 @@ fn construct_like_regex_with_escape(pattern: &str, escape: char) -> Regex {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided
|
||||
pub fn exec_glob(
|
||||
regex_cache: Option<&mut HashMap<String, Regex>>,
|
||||
pattern: &str,
|
||||
text: &str,
|
||||
) -> bool {
|
||||
if let Some(cache) = regex_cache {
|
||||
match cache.get(pattern) {
|
||||
Some(re) => re.is_match(text),
|
||||
None => match construct_glob_regex(pattern) {
|
||||
Ok(re) => {
|
||||
let res = re.is_match(text);
|
||||
cache.insert(pattern.to_string(), re);
|
||||
res
|
||||
}
|
||||
Err(_) => false,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
construct_glob_regex(pattern)
|
||||
.map(|re| re.is_match(text))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn push_char_to_regex_pattern(c: char, regex_pattern: &mut String) {
|
||||
if regex_syntax::is_meta_character(c) {
|
||||
regex_pattern.push('\\');
|
||||
}
|
||||
regex_pattern.push(c);
|
||||
}
|
||||
|
||||
fn construct_glob_regex(pattern: &str) -> Result<Regex, LimboError> {
|
||||
let mut regex_pattern = String::with_capacity(pattern.len() * 2);
|
||||
|
||||
regex_pattern.push('^');
|
||||
|
||||
let mut chars = pattern.chars();
|
||||
let mut bracket_closed = true;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
match ch {
|
||||
'[' => {
|
||||
bracket_closed = false;
|
||||
regex_pattern.push('[');
|
||||
if let Some(next_ch) = chars.next() {
|
||||
match next_ch {
|
||||
']' => {
|
||||
// The string enclosed by the brackets cannot be empty;
|
||||
// therefore ']' can be allowed between the brackets,
|
||||
// provided that it is the first character.
|
||||
// so this means
|
||||
// - `[]]` will be translated to `[\]]`
|
||||
// - `[[]` will be translated to `[\[]`
|
||||
regex_pattern.push_str("\\]");
|
||||
}
|
||||
'^' => {
|
||||
// For the most cases we can pass `^` directly to regex
|
||||
// but in certain cases like [^][a] , `[^]` will make regex crate
|
||||
// throw unenclosed character class. So this means
|
||||
// - `[^][a]` will be translated to `[^\]a]`
|
||||
regex_pattern.push('^');
|
||||
if let Some(next_ch_2) = chars.next() {
|
||||
match next_ch_2 {
|
||||
']' => {
|
||||
regex_pattern.push('\\');
|
||||
regex_pattern.push(']');
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
while let Some(next_ch) = chars.next() {
|
||||
match next_ch {
|
||||
']' => {
|
||||
bracket_closed = true;
|
||||
regex_pattern.push(']');
|
||||
break;
|
||||
}
|
||||
'-' => {
|
||||
regex_pattern.push('-');
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'?' => {
|
||||
regex_pattern.push('.');
|
||||
}
|
||||
'*' => {
|
||||
regex_pattern.push_str(".*");
|
||||
}
|
||||
c => {
|
||||
push_char_to_regex_pattern(c, &mut regex_pattern);
|
||||
}
|
||||
}
|
||||
}
|
||||
regex_pattern.push('$');
|
||||
|
||||
if bracket_closed {
|
||||
Ok(Regex::new(®ex_pattern).unwrap())
|
||||
} else {
|
||||
Result::Err(LimboError::Constraint(
|
||||
"blob pattern is not closed".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
@@ -84,4 +202,16 @@ mod test {
|
||||
assert!(!exec_like_with_escape("abcXX", "abc", 'X'));
|
||||
assert!(!exec_like_with_escape("abcXX", "abcXX", 'X'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_glob_no_cache() {
|
||||
assert!(exec_glob(None, r#"?*/abc/?*"#, r#"x//a/ab/abc/y"#));
|
||||
assert!(exec_glob(None, r#"a[1^]"#, r#"a1"#));
|
||||
assert!(exec_glob(None, r#"a[1^]*"#, r#"a^"#));
|
||||
assert!(!exec_glob(None, r#"a[a*"#, r#"a["#));
|
||||
assert!(!exec_glob(None, r#"a[a"#, r#"a[a"#));
|
||||
assert!(exec_glob(None, r#"a[[]"#, r#"a["#));
|
||||
assert!(exec_glob(None, r#"abc[^][*?]efg"#, r#"abcdefg"#));
|
||||
assert!(!exec_glob(None, r#"abc[^][*?]efg"#, r#"abc]efg"#));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,12 +39,13 @@ use crate::types::{
|
||||
use crate::util::parse_schema_rows;
|
||||
use crate::vdbe::insn::Insn;
|
||||
#[cfg(feature = "json")]
|
||||
use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length};
|
||||
use crate::{
|
||||
function::JsonFunc, json::get_json, json::json_array, json::json_array_length,
|
||||
json::json_extract,
|
||||
};
|
||||
use crate::{Connection, Result, Rows, TransactionState, DATABASE_VERSION};
|
||||
use datetime::{exec_date, exec_time, exec_unixepoch};
|
||||
use likeop::{construct_like_escape_arg, exec_like_with_escape};
|
||||
|
||||
use crate::json::json_extract;
|
||||
use likeop::{construct_like_escape_arg, exec_glob, exec_like_with_escape};
|
||||
use rand::distributions::{Distribution, Uniform};
|
||||
use rand::{thread_rng, Rng};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
@@ -2701,7 +2702,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue {
|
||||
let word: String = s
|
||||
.value
|
||||
.chars()
|
||||
.filter(|c| !c.is_digit(10))
|
||||
.filter(|c| !c.is_ascii_digit())
|
||||
.collect::<String>()
|
||||
.replace(" ", "");
|
||||
if word.is_empty() {
|
||||
@@ -2743,7 +2744,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue {
|
||||
|
||||
// Remove adjacent same digits
|
||||
let tmp = tmp.chars().fold(String::new(), |mut acc, ch| {
|
||||
if acc.chars().last() != Some(ch) {
|
||||
if !acc.ends_with(ch) {
|
||||
acc.push(ch);
|
||||
}
|
||||
acc
|
||||
@@ -2759,7 +2760,7 @@ pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue {
|
||||
|
||||
// If the first symbol is a digit, replace it with the saved first letter
|
||||
if let Some(first_digit) = result.chars().next() {
|
||||
if first_digit.is_digit(10) {
|
||||
if first_digit.is_ascii_digit() {
|
||||
result.replace_range(0..1, &first_letter.to_string());
|
||||
}
|
||||
}
|
||||
@@ -2898,31 +2899,6 @@ fn exec_like(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, te
|
||||
}
|
||||
}
|
||||
|
||||
fn construct_glob_regex(pattern: &str) -> Regex {
|
||||
let mut regex_pattern = String::from("^");
|
||||
regex_pattern.push_str(&pattern.replace('*', ".*").replace("?", "."));
|
||||
regex_pattern.push('$');
|
||||
Regex::new(®ex_pattern).unwrap()
|
||||
}
|
||||
|
||||
// Implements GLOB pattern matching. Caches the constructed regex if a cache is provided
|
||||
fn exec_glob(regex_cache: Option<&mut HashMap<String, Regex>>, pattern: &str, text: &str) -> bool {
|
||||
if let Some(cache) = regex_cache {
|
||||
match cache.get(pattern) {
|
||||
Some(re) => re.is_match(text),
|
||||
None => {
|
||||
let re = construct_glob_regex(pattern);
|
||||
let res = re.is_match(text);
|
||||
cache.insert(pattern.to_string(), re);
|
||||
res
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let re = construct_glob_regex(pattern);
|
||||
re.is_match(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn exec_min(regs: Vec<&OwnedValue>) -> OwnedValue {
|
||||
regs.iter()
|
||||
.min()
|
||||
@@ -4114,7 +4090,7 @@ mod tests {
|
||||
expected_len: 2,
|
||||
},
|
||||
TestCase {
|
||||
input: OwnedValue::Float(-3.14),
|
||||
input: OwnedValue::Float(-3.15),
|
||||
expected_len: 1,
|
||||
},
|
||||
TestCase {
|
||||
|
||||
225
docs/internals/functions.md
Normal file
225
docs/internals/functions.md
Normal file
@@ -0,0 +1,225 @@
|
||||
# How to contribute a SQL function implementation?
|
||||
|
||||
Steps
|
||||
1. Pick a `SQL functions` in [COMPAT.md](../../COMPAT.md) file with a No (not implemented yet) status.
|
||||
2. Create an issue for that function.
|
||||
3. Implement the function in a feature branch.
|
||||
4. Push it as a Merge Request, get it review.
|
||||
|
||||
## An example with function `date(..)`
|
||||
|
||||
> Note that the files, code location, steps might be not exactly the same because of refactor but the idea of the changes needed in each layer stays.
|
||||
|
||||
[Issue #158](https://github.com/tursodatabase/limbo/issues/158) was created for it.
|
||||
Refer to commit [4ff7058](https://github.com/tursodatabase/limbo/commit/4ff705868a054643f6113cbe009655c32bc5f235).
|
||||
|
||||
```
|
||||
sql function: string
|
||||
--Parser-->
|
||||
enum Func
|
||||
--translate-->
|
||||
Instruction
|
||||
--VDBE-->
|
||||
Result
|
||||
```
|
||||
|
||||
TODO for implementing the function:
|
||||
- analysis
|
||||
- read and try out how the function works in SQLite.
|
||||
- compare `explain` output of SQLite and Limbo.
|
||||
- add/ update the function definition in `functions.rs`.
|
||||
- add/ update how to function is translated from `definition` to `instruction` in virtual machine layer VDBE.
|
||||
- add/ update the function Rust execution code and tests in vdbe layer.
|
||||
- add/ update how the bytecode `Program` executes when steps into the function.
|
||||
- add/ update TCL tests for this function in limbo/testing.
|
||||
- update doc for function compatibility.
|
||||
|
||||
### Analysis
|
||||
|
||||
How `date` works in SQLite?
|
||||
```bash
|
||||
> sqlite3
|
||||
|
||||
sqlite> explain select date('now');
|
||||
addr opcode p1 p2 p3 p4 p5 comment
|
||||
---- ------------- ---- ---- ---- ------------- -- -------------
|
||||
0 Init 0 6 0 0 Start at 6
|
||||
1 Once 0 3 0 0
|
||||
2 Function 0 0 2 date(-1) 0 r[2]=func()
|
||||
3 Copy 2 1 0 0 r[1]=r[2]
|
||||
4 ResultRow 1 1 0 0 output=r[1]
|
||||
5 Halt 0 0 0 0
|
||||
6 Goto 0 1 0 0
|
||||
```
|
||||
|
||||
Comparing that with `Limbo`:
|
||||
```bash
|
||||
# created a sqlite database file database.db
|
||||
# or cargo run to use the memory mode if it is already available.
|
||||
> cargo run database.db
|
||||
|
||||
Enter ".help" for usage hints.
|
||||
limbo> explain select date('now');
|
||||
Parse error: unknown function date
|
||||
```
|
||||
|
||||
We can see that the function is not implemented yet so the Parser did not understand it and throw an error `Parse error: unknown function date`.
|
||||
- we only need to pay attention to opcode `Function` at addr 2. The rest is already set up in limbo.
|
||||
- we have up to 5 registers p1 to p5 for each opcode.
|
||||
|
||||
### Function definition
|
||||
|
||||
For limbo to understand the meaning of `date`, we need to define it as a Function somewhere.
|
||||
That place can be found currently in `core/functions.rs`. We need to edit 3 places
|
||||
1. add to ScalarFunc as `date` is a scalar function.
|
||||
```diff
|
||||
// file core/functions.rs
|
||||
pub enum ScalarFunc {
|
||||
// other funcs...
|
||||
Soundex,
|
||||
+ Date,
|
||||
Time,
|
||||
// other funcs...
|
||||
}
|
||||
```
|
||||
2. add to Display to show the function as string in our program.
|
||||
```diff
|
||||
// file core/functions.rs
|
||||
impl Display for ScalarFunc {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let str = match self {
|
||||
// ...
|
||||
ScalarFunc::Soundex => "soundex".to_string(),
|
||||
+ ScalarFunc::Date => "date".to_string(),
|
||||
ScalarFunc::Time => "time".to_string(),
|
||||
// ...
|
||||
}
|
||||
```
|
||||
3. add to `fn resolve_function(..)` of `impl Func` to enable parsing from str to this function.
|
||||
```diff
|
||||
// file core/functions.rs
|
||||
impl Func {
|
||||
pub fn resolve_function(name: &str, arg_count: usize) -> Result<Func, ()> {
|
||||
match name {
|
||||
// ...
|
||||
+ "date" => Ok(Func::Scalar(ScalarFunc::Date)),
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
### Function translation
|
||||
|
||||
How to translate the function into bytecode `Instruction`?
|
||||
- `date` function can have zero to many arguments.
|
||||
- in case there are arguments, we loop through the args and allocate a register `let target_reg = program.alloc_register();`
|
||||
for each argument expression.
|
||||
- then we emit the bytecode instruction for Function `program.emit_insn(Insn::Function {...})`
|
||||
|
||||
https://github.com/tursodatabase/limbo/blob/69e3dd28f77e59927da4313e517b2b428ede480d/core/translate/expr.rs#L1235C1-L1256C26
|
||||
|
||||
|
||||
```diff
|
||||
// file core/translate/expr.rs
|
||||
pub fn translate_expr(...) -> Result<usize> {
|
||||
// ...
|
||||
match expr {
|
||||
// ..
|
||||
ast::Expr::FunctionCall {
|
||||
// ...
|
||||
match &func_ctx.func {
|
||||
// ...
|
||||
Func::Scalar(srf) => {
|
||||
// ...
|
||||
+ ScalarFunc::Date => {
|
||||
+ if let Some(args) = args {
|
||||
+ for arg in args.iter() {
|
||||
+ // register containing result of each argument expression
|
||||
+ let target_reg = program.alloc_register();
|
||||
+ _ = translate_expr(
|
||||
+ program,
|
||||
+ referenced_tables,
|
||||
+ arg,
|
||||
+ target_reg,
|
||||
+ precomputed_exprs_to_registers,
|
||||
+ )?;
|
||||
+ }
|
||||
+ }
|
||||
+ program.emit_insn(Insn::Function {
|
||||
+ constant_mask: 0,
|
||||
+ start_reg: target_register + 1,
|
||||
+ dest: target_register,
|
||||
+ func: func_ctx,
|
||||
+ });
|
||||
+ Ok(target_register)
|
||||
+ }
|
||||
// ...
|
||||
```
|
||||
|
||||
### Function execution
|
||||
|
||||
The function execution code is implemented in `vdbe/datetime.rs` file [here](https://github.com/tursodatabase/limbo/commit/9cc965186fecf4ba4dd81c783a841c71575123bf#diff-839435241d4ffb648ad2d162bc6ba6a94f052309865251dc2aff36eaa14fa3c5R11-R30) as we already implemented the datetime features in this file.
|
||||
Note that for other functions it might be implemented in other location in vdbe module.
|
||||
|
||||
```diff
|
||||
// file vdbe/datetime.rs
|
||||
// ...
|
||||
+ pub fn exec_date(values: &[OwnedValue]) -> OwnedValue {
|
||||
+ // ... implementation
|
||||
+ }
|
||||
|
||||
// ...
|
||||
```
|
||||
|
||||
### Program bytecode execution
|
||||
|
||||
Next step is to implement how the virtual machine (VDBE layer) executes the bytecode `Program` when the program step into the function instruction `Insn::Function` date `ScalarFunc::Date`.
|
||||
|
||||
Per [SQLite spec](https://www.sqlite.org/lang_datefunc.html#time_values) if there is no `time value` (no start register) , we want to execute the function with default param `'now'`.
|
||||
> In all functions other than timediff(), the time-value (and all modifiers) may be omitted, in which case a time value of 'now' is assumed.
|
||||
|
||||
```diff
|
||||
// file vdbe/mod.rs
|
||||
impl Program {
|
||||
pub fn step<'a>(...) {
|
||||
loop {
|
||||
// ...
|
||||
match isin {
|
||||
// ...
|
||||
Insn::Function {
|
||||
// ...
|
||||
+ ScalarFunc::Date => {
|
||||
+ let result =
|
||||
+ exec_date(&state.registers[*start_reg..*start_reg + arg_count]);
|
||||
+ state.registers[*dest] = result;
|
||||
+ }
|
||||
// ...
|
||||
```
|
||||
|
||||
### Adding tests
|
||||
|
||||
There are 2 kind of tests we need to add
|
||||
1. tests for Rust code
|
||||
2. TCL tests for executing the sql function
|
||||
|
||||
One test for the Rust code is shown as example below
|
||||
https://github.com/tursodatabase/limbo/blob/69e3dd28f77e59927da4313e517b2b428ede480d/core/vdbe/datetime.rs#L620C1-L661C1
|
||||
|
||||
TCL tests for `date` functions can be referenced from SQLite source code which is already very comprehensive.
|
||||
- https://github.com/sqlite/sqlite/blob/f2b21a5f57e1a1db1a286c42af40563077635c3d/test/date3.test#L36
|
||||
- https://github.com/sqlite/sqlite/blob/f2b21a5f57e1a1db1a286c42af40563077635c3d/test/date.test#L611C1-L652C73
|
||||
|
||||
### Updating doc
|
||||
|
||||
Update the [COMPAT.md](../../COMPAT.md) file to mark this function as implemented. Change Status to
|
||||
- `Yes` if it is fully supported,
|
||||
- `Partial` if supported but not fully yet compared to SQLite.
|
||||
|
||||
An example:
|
||||
```diff
|
||||
// file COMPAT.md
|
||||
| Function | Status | Comment |
|
||||
|------------------------------|---------|------------------------------|
|
||||
- | date() | No | |
|
||||
+ | date() | Yes | partially supports modifiers |
|
||||
...
|
||||
```
|
||||
@@ -1,5 +1,7 @@
|
||||
use std::{iter::Sum, ops::SubAssign};
|
||||
|
||||
use anarchist_readable_name_generator_lib::readable_name_custom;
|
||||
use rand::Rng;
|
||||
use rand::{distributions::uniform::SampleUniform, Rng};
|
||||
|
||||
pub mod plan;
|
||||
pub mod query;
|
||||
@@ -13,12 +15,17 @@ pub trait ArbitraryFrom<T> {
|
||||
fn arbitrary_from<R: Rng>(rng: &mut R, t: &T) -> Self;
|
||||
}
|
||||
|
||||
pub(crate) fn frequency<'a, T, R: rand::Rng>(
|
||||
choices: Vec<(usize, Box<dyn FnOnce(&mut R) -> T + 'a>)>,
|
||||
pub(crate) fn frequency<
|
||||
'a,
|
||||
T,
|
||||
R: rand::Rng,
|
||||
N: Sum + PartialOrd + Copy + Default + SampleUniform + SubAssign,
|
||||
>(
|
||||
choices: Vec<(N, Box<dyn FnOnce(&mut R) -> T + 'a>)>,
|
||||
rng: &mut R,
|
||||
) -> T {
|
||||
let total = choices.iter().map(|(weight, _)| weight).sum::<usize>();
|
||||
let mut choice = rng.gen_range(0..total);
|
||||
let total = choices.iter().map(|(weight, _)| *weight).sum::<N>();
|
||||
let mut choice = rng.gen_range(N::default()..total);
|
||||
|
||||
for (weight, f) in choices {
|
||||
if choice < weight {
|
||||
@@ -38,7 +45,7 @@ pub(crate) fn one_of<'a, T, R: rand::Rng>(
|
||||
choices[index](rng)
|
||||
}
|
||||
|
||||
pub(crate) fn pick<'a, T, R: rand::Rng>(choices: &'a Vec<T>, rng: &mut R) -> &'a T {
|
||||
pub(crate) fn pick<'a, T, R: rand::Rng>(choices: &'a [T], rng: &mut R) -> &'a T {
|
||||
let index = rng.gen_range(0..choices.len());
|
||||
&choices[index]
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use crate::generation::{frequency, Arbitrary, ArbitraryFrom};
|
||||
|
||||
use super::{pick, pick_index};
|
||||
|
||||
pub(crate) type ResultSet = Vec<Vec<Value>>;
|
||||
pub(crate) type ResultSet = Result<Vec<Vec<Value>>>;
|
||||
|
||||
pub(crate) struct InteractionPlan {
|
||||
pub(crate) plan: Vec<Interaction>,
|
||||
@@ -45,14 +45,15 @@ pub(crate) struct InteractionStats {
|
||||
pub(crate) read_count: usize,
|
||||
pub(crate) write_count: usize,
|
||||
pub(crate) delete_count: usize,
|
||||
pub(crate) create_count: usize,
|
||||
}
|
||||
|
||||
impl Display for InteractionStats {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Read: {}, Write: {}, Delete: {}",
|
||||
self.read_count, self.write_count, self.delete_count
|
||||
"Read: {}, Write: {}, Delete: {}, Create: {}",
|
||||
self.read_count, self.write_count, self.delete_count, self.create_count
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -100,7 +101,9 @@ impl Interactions {
|
||||
match interaction {
|
||||
Interaction::Query(query) => match query {
|
||||
Query::Create(create) => {
|
||||
env.tables.push(create.table.clone());
|
||||
if !env.tables.iter().any(|t| t.name == create.table.name) {
|
||||
env.tables.push(create.table.clone());
|
||||
}
|
||||
}
|
||||
Query::Insert(insert) => {
|
||||
let table = env
|
||||
@@ -137,6 +140,7 @@ impl InteractionPlan {
|
||||
let mut read = 0;
|
||||
let mut write = 0;
|
||||
let mut delete = 0;
|
||||
let mut create = 0;
|
||||
|
||||
for interaction in &self.plan {
|
||||
match interaction {
|
||||
@@ -144,7 +148,7 @@ impl InteractionPlan {
|
||||
Query::Select(_) => read += 1,
|
||||
Query::Insert(_) => write += 1,
|
||||
Query::Delete(_) => delete += 1,
|
||||
Query::Create(_) => {}
|
||||
Query::Create(_) => create += 1,
|
||||
},
|
||||
Interaction::Assertion(_) => {}
|
||||
Interaction::Fault(_) => {}
|
||||
@@ -155,6 +159,7 @@ impl InteractionPlan {
|
||||
read_count: read,
|
||||
write_count: write,
|
||||
delete_count: delete,
|
||||
create_count: create,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -172,7 +177,7 @@ impl ArbitraryFrom<SimulatorEnv> for InteractionPlan {
|
||||
rng: ChaCha8Rng::seed_from_u64(rng.next_u64()),
|
||||
};
|
||||
|
||||
let num_interactions = rng.gen_range(0..env.opts.max_interactions);
|
||||
let num_interactions = env.opts.max_interactions;
|
||||
|
||||
// First create at least one table
|
||||
let create_query = Create::arbitrary(rng);
|
||||
@@ -197,7 +202,7 @@ impl ArbitraryFrom<SimulatorEnv> for InteractionPlan {
|
||||
}
|
||||
|
||||
impl Interaction {
|
||||
pub(crate) fn execute_query(&self, conn: &mut Rc<Connection>) -> Result<ResultSet> {
|
||||
pub(crate) fn execute_query(&self, conn: &mut Rc<Connection>) -> ResultSet {
|
||||
match self {
|
||||
Self::Query(query) => {
|
||||
let query_str = query.to_string();
|
||||
@@ -342,13 +347,40 @@ fn property_insert_select<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Inte
|
||||
),
|
||||
func: Box::new(move |stack: &Vec<ResultSet>| {
|
||||
let rows = stack.last().unwrap();
|
||||
rows.iter().any(|r| r == &row)
|
||||
match rows {
|
||||
Ok(rows) => rows.iter().any(|r| r == &row),
|
||||
Err(_) => false,
|
||||
}
|
||||
}),
|
||||
});
|
||||
|
||||
Interactions(vec![insert_query, select_query, assertion])
|
||||
}
|
||||
|
||||
fn property_double_create_failure<R: rand::Rng>(rng: &mut R, _env: &SimulatorEnv) -> Interactions {
|
||||
let create_query = Create::arbitrary(rng);
|
||||
let table_name = create_query.table.name.clone();
|
||||
let cq1 = Interaction::Query(Query::Create(create_query.clone()));
|
||||
let cq2 = Interaction::Query(Query::Create(create_query.clone()));
|
||||
|
||||
let assertion = Interaction::Assertion(Assertion {
|
||||
message:
|
||||
"creating two tables with the name should result in a failure for the second query"
|
||||
.to_string(),
|
||||
func: Box::new(move |stack: &Vec<ResultSet>| {
|
||||
let last = stack.last().unwrap();
|
||||
match last {
|
||||
Ok(_) => false,
|
||||
Err(e) => e
|
||||
.to_string()
|
||||
.contains(&format!("Table {table_name} already exists")),
|
||||
}
|
||||
}),
|
||||
});
|
||||
|
||||
Interactions(vec![cq1, cq2, assertion])
|
||||
}
|
||||
|
||||
fn create_table<R: rand::Rng>(rng: &mut R, _env: &SimulatorEnv) -> Interactions {
|
||||
let create_query = Interaction::Query(Query::Create(Create::arbitrary(rng)));
|
||||
Interactions(vec![create_query])
|
||||
@@ -375,17 +407,21 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions {
|
||||
rng: &mut R,
|
||||
(env, stats): &(&SimulatorEnv, InteractionStats),
|
||||
) -> Self {
|
||||
let remaining_read =
|
||||
((((env.opts.max_interactions * env.opts.read_percent) as f64) / 100.0) as usize)
|
||||
.saturating_sub(stats.read_count);
|
||||
let remaining_write = ((((env.opts.max_interactions * env.opts.write_percent) as f64)
|
||||
/ 100.0) as usize)
|
||||
.saturating_sub(stats.write_count);
|
||||
let remaining_read = ((env.opts.max_interactions as f64 * env.opts.read_percent / 100.0)
|
||||
- (stats.read_count as f64))
|
||||
.max(0.0);
|
||||
let remaining_write = ((env.opts.max_interactions as f64 * env.opts.write_percent / 100.0)
|
||||
- (stats.write_count as f64))
|
||||
.max(0.0);
|
||||
let remaining_create = ((env.opts.max_interactions as f64 * env.opts.create_percent
|
||||
/ 100.0)
|
||||
- (stats.create_count as f64))
|
||||
.max(0.0);
|
||||
|
||||
frequency(
|
||||
vec![
|
||||
(
|
||||
usize::min(remaining_read, remaining_write),
|
||||
f64::min(remaining_read, remaining_write),
|
||||
Box::new(|rng: &mut R| property_insert_select(rng, env)),
|
||||
),
|
||||
(
|
||||
@@ -397,10 +433,14 @@ impl ArbitraryFrom<(&SimulatorEnv, InteractionStats)> for Interactions {
|
||||
Box::new(|rng: &mut R| random_write(rng, env)),
|
||||
),
|
||||
(
|
||||
remaining_write / 10,
|
||||
remaining_create,
|
||||
Box::new(|rng: &mut R| create_table(rng, env)),
|
||||
),
|
||||
(1, Box::new(|rng: &mut R| random_fault(rng, env))),
|
||||
(1.0, Box::new(|rng: &mut R| random_fault(rng, env))),
|
||||
(
|
||||
remaining_create / 2.0,
|
||||
Box::new(|rng: &mut R| property_double_create_failure(rng, env)),
|
||||
),
|
||||
],
|
||||
rng,
|
||||
)
|
||||
|
||||
@@ -41,11 +41,7 @@ impl Arbitrary for Column {
|
||||
|
||||
impl Arbitrary for ColumnType {
|
||||
fn arbitrary<R: Rng>(rng: &mut R) -> Self {
|
||||
pick(
|
||||
&vec![Self::Integer, Self::Float, Self::Text, Self::Blob],
|
||||
rng,
|
||||
)
|
||||
.to_owned()
|
||||
pick(&[Self::Integer, Self::Float, Self::Text, Self::Blob], rng).to_owned()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -121,6 +121,15 @@ fn main() {
|
||||
// Move the old database and plan file back
|
||||
std::fs::rename(&old_db_path, &db_path).unwrap();
|
||||
std::fs::rename(&old_plan_path, &plan_path).unwrap();
|
||||
} else if let Ok(result) = result {
|
||||
match result {
|
||||
Ok(_) => {
|
||||
log::info!("simulation completed successfully");
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("simulation failed: {:?}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Print the seed, the locations of the database and the plan file at the end again for easily accessing them.
|
||||
println!("database path: {:?}", db_path);
|
||||
@@ -136,30 +145,50 @@ fn run_simulation(
|
||||
) -> Result<()> {
|
||||
let mut rng = ChaCha8Rng::seed_from_u64(seed);
|
||||
|
||||
let (read_percent, write_percent, delete_percent) = {
|
||||
let mut remaining = 100;
|
||||
let read_percent = rng.gen_range(0..=remaining);
|
||||
let (create_percent, read_percent, write_percent, delete_percent) = {
|
||||
let mut remaining = 100.0;
|
||||
let read_percent = rng.gen_range(0.0..=remaining);
|
||||
remaining -= read_percent;
|
||||
let write_percent = rng.gen_range(0..=remaining);
|
||||
let write_percent = rng.gen_range(0.0..=remaining);
|
||||
remaining -= write_percent;
|
||||
let delete_percent = remaining;
|
||||
(read_percent, write_percent, delete_percent)
|
||||
|
||||
let create_percent = write_percent / 10.0;
|
||||
let write_percent = write_percent - create_percent;
|
||||
|
||||
(create_percent, read_percent, write_percent, delete_percent)
|
||||
};
|
||||
|
||||
if cli_opts.minimum_size < 1 {
|
||||
return Err(limbo_core::LimboError::InternalError(
|
||||
"minimum size must be at least 1".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if cli_opts.maximum_size < 1 {
|
||||
panic!("maximum size must be at least 1");
|
||||
return Err(limbo_core::LimboError::InternalError(
|
||||
"maximum size must be at least 1".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if cli_opts.maximum_size < cli_opts.minimum_size {
|
||||
return Err(limbo_core::LimboError::InternalError(
|
||||
"maximum size must be greater than or equal to minimum size".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let opts = SimulatorOpts {
|
||||
ticks: rng.gen_range(1..=cli_opts.maximum_size),
|
||||
ticks: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
|
||||
max_connections: 1, // TODO: for now let's use one connection as we didn't implement
|
||||
// correct transactions procesing
|
||||
max_tables: rng.gen_range(0..128),
|
||||
create_percent,
|
||||
read_percent,
|
||||
write_percent,
|
||||
delete_percent,
|
||||
page_size: 4096, // TODO: randomize this too
|
||||
max_interactions: rng.gen_range(1..=cli_opts.maximum_size),
|
||||
max_interactions: rng.gen_range(cli_opts.minimum_size..=cli_opts.maximum_size),
|
||||
max_time_simulation: cli_opts.maximum_time,
|
||||
};
|
||||
let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap());
|
||||
|
||||
@@ -207,12 +236,19 @@ fn run_simulation(
|
||||
}
|
||||
|
||||
fn execute_plans(env: &mut SimulatorEnv, plans: &mut [InteractionPlan]) -> Result<()> {
|
||||
let now = std::time::Instant::now();
|
||||
// todo: add history here by recording which interaction was executed at which tick
|
||||
for _tick in 0..env.opts.ticks {
|
||||
// Pick the connection to interact with
|
||||
let connection_index = pick_index(env.connections.len(), &mut env.rng);
|
||||
// Execute the interaction for the selected connection
|
||||
execute_plan(env, connection_index, plans)?;
|
||||
// Check if the maximum time for the simulation has been reached
|
||||
if now.elapsed().as_secs() >= env.opts.max_time_simulation as u64 {
|
||||
return Err(limbo_core::LimboError::InternalError(
|
||||
"maximum time for simulation reached".into(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -266,7 +302,7 @@ fn execute_interaction(
|
||||
};
|
||||
|
||||
log::debug!("{}", interaction);
|
||||
let results = interaction.execute_query(conn)?;
|
||||
let results = interaction.execute_query(conn);
|
||||
log::debug!("{:?}", results);
|
||||
stack.push(results);
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ pub(crate) enum Query {
|
||||
Delete(Delete),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Create {
|
||||
pub(crate) table: Table,
|
||||
}
|
||||
|
||||
@@ -15,10 +15,24 @@ pub struct SimulatorCLI {
|
||||
)]
|
||||
pub doublecheck: bool,
|
||||
#[clap(
|
||||
short,
|
||||
short = 'n',
|
||||
long,
|
||||
help = "change the maximum size of the randomly generated sequence of interactions",
|
||||
default_value_t = 1024
|
||||
)]
|
||||
pub maximum_size: usize,
|
||||
#[clap(
|
||||
short = 'k',
|
||||
long,
|
||||
help = "change the minimum size of the randomly generated sequence of interactions",
|
||||
default_value_t = 1
|
||||
)]
|
||||
pub minimum_size: usize,
|
||||
#[clap(
|
||||
short = 't',
|
||||
long,
|
||||
help = "change the maximum time of the simulation(in seconds)",
|
||||
default_value_t = 60 * 60 // default to 1 hour
|
||||
)]
|
||||
pub maximum_time: usize,
|
||||
}
|
||||
|
||||
@@ -30,9 +30,11 @@ pub(crate) struct SimulatorOpts {
|
||||
pub(crate) max_tables: usize,
|
||||
// this next options are the distribution of workload where read_percent + write_percent +
|
||||
// delete_percent == 100%
|
||||
pub(crate) read_percent: usize,
|
||||
pub(crate) write_percent: usize,
|
||||
pub(crate) delete_percent: usize,
|
||||
pub(crate) create_percent: f64,
|
||||
pub(crate) read_percent: f64,
|
||||
pub(crate) write_percent: f64,
|
||||
pub(crate) delete_percent: f64,
|
||||
pub(crate) max_interactions: usize,
|
||||
pub(crate) page_size: usize,
|
||||
pub(crate) max_time_simulation: usize,
|
||||
}
|
||||
|
||||
@@ -68,3 +68,75 @@ Robert|Roberts}
|
||||
do_execsql_test where-glob-impossible {
|
||||
select * from products where 'foobar' glob 'fooba';
|
||||
} {}
|
||||
|
||||
foreach {testnum pattern text ans} {
|
||||
1 abcdefg abcdefg 1
|
||||
2 abcdefG abcdefg 0
|
||||
3 abcdef abcdefg 0
|
||||
4 abcdefgh abcdefg 0
|
||||
5 abcdef? abcdefg 1
|
||||
6 abcdef? abcdef 0
|
||||
7 abcdef? abcdefgh 0
|
||||
8 abcdefg abcdef? 0
|
||||
9 abcdef? abcdef? 1
|
||||
10 abc/def abc/def 1
|
||||
11 abc//def abc/def 0
|
||||
12 */abc/* x/abc/y 1
|
||||
13 */abc/* /abc/ 1
|
||||
16 */abc/* x///a/ab/abc 0
|
||||
17 */abc/* x//a/ab/abc/ 1
|
||||
16 */abc/* x///a/ab/abc 0
|
||||
17 */abc/* x//a/ab/abc/ 1
|
||||
18 **/abc/** x//a/ab/abc/ 1
|
||||
19 *?/abc/*? x//a/ab/abc/y 1
|
||||
20 ?*/abc/?* x//a/ab/abc/y 1
|
||||
21 {abc[cde]efg} abcbefg 0
|
||||
22 {abc[cde]efg} abccefg 1
|
||||
23 {abc[cde]efg} abcdefg 1
|
||||
24 {abc[cde]efg} abceefg 1
|
||||
25 {abc[cde]efg} abcfefg 0
|
||||
26 {abc[^cde]efg} abcbefg 1
|
||||
27 {abc[^cde]efg} abccefg 0
|
||||
28 {abc[^cde]efg} abcdefg 0
|
||||
29 {abc[^cde]efg} abceefg 0
|
||||
30 {abc[^cde]efg} abcfefg 1
|
||||
31 {abc[c-e]efg} abcbefg 0
|
||||
32 {abc[c-e]efg} abccefg 1
|
||||
33 {abc[c-e]efg} abcdefg 1
|
||||
34 {abc[c-e]efg} abceefg 1
|
||||
35 {abc[c-e]efg} abcfefg 0
|
||||
36 {abc[^c-e]efg} abcbefg 1
|
||||
37 {abc[^c-e]efg} abccefg 0
|
||||
38 {abc[^c-e]efg} abcdefg 0
|
||||
39 {abc[^c-e]efg} abceefg 0
|
||||
40 {abc[^c-e]efg} abcfefg 1
|
||||
41 {abc[c-e]efg} abc-efg 0
|
||||
42 {abc[-ce]efg} abc-efg 1
|
||||
43 {abc[ce-]efg} abc-efg 1
|
||||
44 {abc[][*?]efg} {abc]efg} 1
|
||||
45 {abc[][*?]efg} {abc*efg} 1
|
||||
46 {abc[][*?]efg} {abc?efg} 1
|
||||
47 {abc[][*?]efg} {abc[efg} 1
|
||||
48 {abc[^][*?]efg} {abc]efg} 0
|
||||
49 {abc[^][*?]efg} {abc*efg} 0
|
||||
50 {abc[^][*?]efg} {abc?efg} 0
|
||||
51 {abc[^][*?]efg} {abc[efg} 0
|
||||
52 {abc[^][*?]efg} {abcdefg} 1
|
||||
53 {*[xyz]efg} {abcxefg} 1
|
||||
54 {*[xyz]efg} {abcwefg} 0
|
||||
55 {[-c]} {c} 1
|
||||
56 {[-c]} {-} 1
|
||||
57 {[-c]} {x} 0
|
||||
} {
|
||||
do_execsql_test glob-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans
|
||||
}
|
||||
|
||||
|
||||
foreach {testnum pattern text ans} {
|
||||
1 {abc[} {abc[} 0
|
||||
2 {abc[} {abc} 0
|
||||
3 {a]b} {a]b} 1
|
||||
4 {a]b} {a[b} 0
|
||||
} {
|
||||
do_execsql_test glob-unenclosed-$testnum.1 "SELECT glob ( '$pattern' , '$text' )" $::ans
|
||||
}
|
||||
|
||||
@@ -113,10 +113,10 @@ do_execsql_test json_extract_number {
|
||||
SELECT json_extract(1, '$')
|
||||
} {{1}}
|
||||
|
||||
# \x61 is the ASCII code for 'a', json_extract needs an exact match though
|
||||
# \x61 is the ASCII code for 'a'
|
||||
do_execsql_test json_extract_with_escaping {
|
||||
SELECT json_extract('{"\x61": 1}', '$.a')
|
||||
} {{}}
|
||||
} {{1}}
|
||||
|
||||
# TODO: fix me
|
||||
#do_execsql_test json_extract_with_escaping_2 {
|
||||
|
||||
Reference in New Issue
Block a user