Merge branch 'main' into feature/delete-planning

This commit is contained in:
김선우
2024-12-27 23:21:35 +09:00
41 changed files with 2498 additions and 981 deletions

View File

@@ -234,8 +234,8 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html).
| jsonb(json) | | |
| json_array(value1,value2,...) | Yes | |
| jsonb_array(value1,value2,...) | | |
| json_array_length(json) | | |
| json_array_length(json,path) | | |
| json_array_length(json) | Yes | |
| json_array_length(json,path) | Yes | |
| json_error_position(json) | | |
| json_extract(json,path,...) | | |
| jsonb_extract(json,path,...) | | |

11
Cargo.lock generated
View File

@@ -1138,8 +1138,8 @@ dependencies = [
"jsonb",
"julian_day_converter",
"libc",
"limbo_macros",
"log",
"macros",
"mimalloc",
"mockall",
"nix 0.29.0",
@@ -1160,11 +1160,16 @@ dependencies = [
"uuid",
]
[[package]]
name = "limbo_macros"
version = "0.0.10"
[[package]]
name = "limbo_sim"
version = "0.0.10"
dependencies = [
"anarchist-readable-name-generator-lib",
"clap",
"env_logger 0.10.2",
"limbo_core",
"log",
@@ -1205,10 +1210,6 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "macros"
version = "0.0.0"
[[package]]
name = "memchr"
version = "2.7.4"

View File

@@ -128,21 +128,26 @@ impl Cursor {
match smt_lock.step().map_err(|e| {
PyErr::new::<OperationalError, _>(format!("Step error: {:?}", e))
})? {
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::Row(row) => {
let py_row = row_to_py(py, &row);
return Ok(Some(py_row));
}
limbo_core::RowResult::IO => {
limbo_core::StepResult::IO => {
self.conn.io.run_once().map_err(|e| {
PyErr::new::<OperationalError, _>(format!("IO error: {:?}", e))
})?;
}
limbo_core::RowResult::Interrupt => {
limbo_core::StepResult::Interrupt => {
return Ok(None);
}
limbo_core::RowResult::Done => {
limbo_core::StepResult::Done => {
return Ok(None);
}
limbo_core::StepResult::Busy => {
return Err(
PyErr::new::<OperationalError, _>("Busy error".to_string()).into()
);
}
}
}
} else {
@@ -162,21 +167,26 @@ impl Cursor {
match smt_lock.step().map_err(|e| {
PyErr::new::<OperationalError, _>(format!("Step error: {:?}", e))
})? {
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::Row(row) => {
let py_row = row_to_py(py, &row);
results.push(py_row);
}
limbo_core::RowResult::IO => {
limbo_core::StepResult::IO => {
self.conn.io.run_once().map_err(|e| {
PyErr::new::<OperationalError, _>(format!("IO error: {:?}", e))
})?;
}
limbo_core::RowResult::Interrupt => {
limbo_core::StepResult::Interrupt => {
return Ok(results);
}
limbo_core::RowResult::Done => {
limbo_core::StepResult::Done => {
return Ok(results);
}
limbo_core::StepResult::Busy => {
return Err(
PyErr::new::<OperationalError, _>("Busy error".to_string()).into()
);
}
}
}
} else {

View File

@@ -75,7 +75,7 @@ impl Statement {
pub fn get(&self) -> JsValue {
match self.inner.borrow_mut().step() {
Ok(limbo_core::RowResult::Row(row)) => {
Ok(limbo_core::StepResult::Row(row)) => {
let row_array = js_sys::Array::new();
for value in row.values {
let value = to_js_value(value);
@@ -83,9 +83,10 @@ impl Statement {
}
JsValue::from(row_array)
}
Ok(limbo_core::RowResult::IO)
| Ok(limbo_core::RowResult::Done)
| Ok(limbo_core::RowResult::Interrupt) => JsValue::UNDEFINED,
Ok(limbo_core::StepResult::IO)
| Ok(limbo_core::StepResult::Done)
| Ok(limbo_core::StepResult::Interrupt)
| Ok(limbo_core::StepResult::Busy) => JsValue::UNDEFINED,
Err(e) => panic!("Error: {:?}", e),
}
}
@@ -94,7 +95,7 @@ impl Statement {
let array = js_sys::Array::new();
loop {
match self.inner.borrow_mut().step() {
Ok(limbo_core::RowResult::Row(row)) => {
Ok(limbo_core::StepResult::Row(row)) => {
let row_array = js_sys::Array::new();
for value in row.values {
let value = to_js_value(value);
@@ -102,9 +103,10 @@ impl Statement {
}
array.push(&row_array);
}
Ok(limbo_core::RowResult::IO) => {}
Ok(limbo_core::RowResult::Interrupt) => break,
Ok(limbo_core::RowResult::Done) => break,
Ok(limbo_core::StepResult::IO) => {}
Ok(limbo_core::StepResult::Interrupt) => break,
Ok(limbo_core::StepResult::Done) => break,
Ok(limbo_core::StepResult::Busy) => break,
Err(e) => panic!("Error: {:?}", e),
}
}

View File

@@ -1,6 +1,6 @@
use crate::opcodes_dictionary::OPCODE_DESCRIPTIONS;
use cli_table::{Cell, Table};
use limbo_core::{Database, LimboError, RowResult, Value};
use limbo_core::{Database, LimboError, StepResult, Value};
use clap::{Parser, ValueEnum};
use std::{
@@ -498,7 +498,7 @@ impl Limbo {
}
match rows.next_row() {
Ok(RowResult::Row(row)) => {
Ok(StepResult::Row(row)) => {
for (i, value) in row.values.iter().enumerate() {
if i > 0 {
let _ = self.writer.write(b"|");
@@ -518,11 +518,15 @@ impl Limbo {
}
let _ = self.writeln("");
}
Ok(RowResult::IO) => {
Ok(StepResult::IO) => {
self.io.run_once()?;
}
Ok(RowResult::Interrupt) => break,
Ok(RowResult::Done) => {
Ok(StepResult::Interrupt) => break,
Ok(StepResult::Done) => {
break;
}
Ok(StepResult::Busy) => {
let _ = self.writeln("database is busy");
break;
}
Err(err) => {
@@ -539,7 +543,7 @@ impl Limbo {
let mut table_rows: Vec<Vec<_>> = vec![];
loop {
match rows.next_row() {
Ok(RowResult::Row(row)) => {
Ok(StepResult::Row(row)) => {
table_rows.push(
row.values
.iter()
@@ -555,11 +559,15 @@ impl Limbo {
.collect(),
);
}
Ok(RowResult::IO) => {
Ok(StepResult::IO) => {
self.io.run_once()?;
}
Ok(RowResult::Interrupt) => break,
Ok(RowResult::Done) => break,
Ok(StepResult::Interrupt) => break,
Ok(StepResult::Done) => break,
Ok(StepResult::Busy) => {
let _ = self.writeln("database is busy");
break;
}
Err(err) => {
let _ = self.write_fmt(format_args!("{}", err));
break;
@@ -599,17 +607,21 @@ impl Limbo {
let mut found = false;
loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
if let Some(Value::Text(schema)) = row.values.first() {
let _ = self.write_fmt(format_args!("{};", schema));
found = true;
}
}
RowResult::IO => {
StepResult::IO => {
self.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => {
let _ = self.writeln("database is busy");
break;
}
}
}
if !found {
@@ -652,17 +664,21 @@ impl Limbo {
let mut tables = String::new();
loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
if let Some(Value::Text(table)) = row.values.first() {
tables.push_str(table);
tables.push(' ');
}
}
RowResult::IO => {
StepResult::IO => {
self.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => {
let _ = self.writeln("database is busy");
break;
}
}
}

View File

@@ -54,7 +54,7 @@ pest = { version = "2.0", optional = true }
pest_derive = { version = "2.0", optional = true }
rand = "0.8.5"
bumpalo = { version = "3.16.0", features = ["collections", "boxed"] }
macros = { path = "../macros" }
limbo_macros = { path = "../macros" }
uuid = { version = "1.11.0", features = ["v4", "v7"], optional = true }
[target.'cfg(not(target_family = "windows"))'.dev-dependencies]

View File

@@ -40,16 +40,19 @@ fn limbo_bench(criterion: &mut Criterion) {
b.iter(|| {
let mut rows = stmt.query().unwrap();
match rows.next_row().unwrap() {
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::Row(row) => {
assert_eq!(row.get::<i64>(0).unwrap(), 1);
}
limbo_core::RowResult::IO => {
limbo_core::StepResult::IO => {
io.run_once().unwrap();
}
limbo_core::RowResult::Interrupt => {
limbo_core::StepResult::Interrupt => {
unreachable!();
}
limbo_core::RowResult::Done => {
limbo_core::StepResult::Done => {
unreachable!();
}
limbo_core::StepResult::Busy => {
unreachable!();
}
}
@@ -65,18 +68,21 @@ fn limbo_bench(criterion: &mut Criterion) {
b.iter(|| {
let mut rows = stmt.query().unwrap();
match rows.next_row().unwrap() {
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::Row(row) => {
assert_eq!(row.get::<i64>(0).unwrap(), 1);
}
limbo_core::RowResult::IO => {
limbo_core::StepResult::IO => {
io.run_once().unwrap();
}
limbo_core::RowResult::Interrupt => {
limbo_core::StepResult::Interrupt => {
unreachable!();
}
limbo_core::RowResult::Done => {
limbo_core::StepResult::Done => {
unreachable!();
}
limbo_core::StepResult::Busy => {
unreachable!()
}
}
stmt.reset();
});
@@ -91,18 +97,21 @@ fn limbo_bench(criterion: &mut Criterion) {
b.iter(|| {
let mut rows = stmt.query().unwrap();
match rows.next_row().unwrap() {
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::Row(row) => {
assert_eq!(row.get::<i64>(0).unwrap(), 1);
}
limbo_core::RowResult::IO => {
limbo_core::StepResult::IO => {
io.run_once().unwrap();
}
limbo_core::RowResult::Interrupt => {
limbo_core::StepResult::Interrupt => {
unreachable!();
}
limbo_core::RowResult::Done => {
limbo_core::StepResult::Done => {
unreachable!();
}
limbo_core::StepResult::Busy => {
unreachable!()
}
}
stmt.reset();
});

View File

@@ -6,6 +6,7 @@ use std::fmt::Display;
pub enum JsonFunc {
Json,
JsonArray,
JsonArrayLength,
}
#[cfg(feature = "json")]
@@ -17,6 +18,7 @@ impl Display for JsonFunc {
match self {
JsonFunc::Json => "json".to_string(),
JsonFunc::JsonArray => "json_array".to_string(),
JsonFunc::JsonArrayLength => "json_array_length".to_string(),
}
)
}
@@ -334,6 +336,8 @@ impl Func {
"json" => Ok(Func::Json(JsonFunc::Json)),
#[cfg(feature = "json")]
"json_array" => Ok(Func::Json(JsonFunc::JsonArray)),
#[cfg(feature = "json")]
"json_array_length" => Ok(Func::Json(JsonFunc::JsonArrayLength)),
"unixepoch" => Ok(Func::Scalar(ScalarFunc::UnixEpoch)),
"hex" => Ok(Func::Scalar(ScalarFunc::Hex)),
"unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)),

View File

@@ -1,5 +1,6 @@
mod de;
mod error;
mod path;
mod ser;
use std::rc::Rc;
@@ -8,9 +9,10 @@ pub use crate::json::de::from_str;
pub use crate::json::ser::to_string;
use crate::types::{LimboText, OwnedValue, TextSubtype};
use indexmap::IndexMap;
use path::get_json_val_by_path;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, PartialEq, Debug)]
#[serde(untagged)]
pub enum Val {
Null,
@@ -88,6 +90,49 @@ pub fn json_array(values: Vec<&OwnedValue>) -> crate::Result<OwnedValue> {
Ok(OwnedValue::Text(LimboText::json(Rc::new(s))))
}
pub fn json_array_length(
json_value: &OwnedValue,
json_path: Option<&OwnedValue>,
) -> crate::Result<OwnedValue> {
let path = match json_path {
Some(OwnedValue::Text(t)) => Some(t.value.to_string()),
Some(OwnedValue::Integer(i)) => Some(i.to_string()),
Some(OwnedValue::Float(f)) => Some(f.to_string()),
_ => None::<String>,
};
let top_val = match json_value {
OwnedValue::Text(ref t) => crate::json::from_str::<Val>(&t.value),
OwnedValue::Blob(b) => match jsonb::from_slice(b) {
Ok(j) => {
let json = j.to_string();
crate::json::from_str(&json)
}
Err(_) => crate::bail_parse_error!("malformed JSON"),
},
_ => return Ok(OwnedValue::Integer(0)),
};
let Ok(top_val) = top_val else {
crate::bail_parse_error!("malformed JSON")
};
let arr_val = if let Some(path) = path {
match get_json_val_by_path(&top_val, &path) {
Ok(Some(val)) => val,
Ok(None) => return Ok(OwnedValue::Null),
Err(e) => return Err(e),
}
} else {
&top_val
};
if let Val::Array(val) = &arr_val {
return Ok(OwnedValue::Integer(val.len() as i64));
}
Ok(OwnedValue::Integer(0))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -266,4 +311,121 @@ mod tests {
Err(e) => assert!(e.to_string().contains("JSON cannot hold BLOB values")),
}
}
#[test]
fn test_json_array_length() {
let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string()));
let result = json_array_length(&input, None).unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 4);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_empty() {
let input = OwnedValue::build_text(Rc::new("[]".to_string()));
let result = json_array_length(&input, None).unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 0);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_root() {
let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string()));
let result = json_array_length(
&input,
Some(&OwnedValue::build_text(Rc::new("$".to_string()))),
)
.unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 4);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_not_array() {
let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string()));
let result = json_array_length(&input, None).unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 0);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_via_prop() {
let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string()));
let result = json_array_length(
&input,
Some(&OwnedValue::build_text(Rc::new("$.one".to_string()))),
)
.unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 4);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_via_index() {
let input = OwnedValue::build_text(Rc::new("[[1,2,3,4]]".to_string()));
let result = json_array_length(
&input,
Some(&OwnedValue::build_text(Rc::new("$[0]".to_string()))),
)
.unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 4);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_via_index_not_array() {
let input = OwnedValue::build_text(Rc::new("[1,2,3,4]".to_string()));
let result = json_array_length(
&input,
Some(&OwnedValue::build_text(Rc::new("$[2]".to_string()))),
)
.unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 0);
} else {
panic!("Expected OwnedValue::Integer");
}
}
#[test]
fn test_json_array_length_via_index_bad_prop() {
let input = OwnedValue::build_text(Rc::new("{one: [1,2,3,4]}".to_string()));
let result = json_array_length(
&input,
Some(&OwnedValue::build_text(Rc::new("$.two".to_string()))),
)
.unwrap();
assert_eq!(OwnedValue::Null, result);
}
#[test]
fn test_json_array_length_simple_json_subtype() {
let input = OwnedValue::build_text(Rc::new("[1,2,3]".to_string()));
let wrapped = get_json(&input).unwrap();
let result = json_array_length(&wrapped, None).unwrap();
if let OwnedValue::Integer(res) = result {
assert_eq!(res, 3);
} else {
panic!("Expected OwnedValue::Integer");
}
}
}

181
core/json/path.rs Normal file
View File

@@ -0,0 +1,181 @@
use super::Val;
pub fn get_json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result<Option<&'v Val>> {
match path.strip_prefix('$') {
Some(tail) => json_val_by_path(val, tail),
None => crate::bail_parse_error!("malformed path"),
}
}
fn json_val_by_path<'v>(val: &'v Val, path: &str) -> crate::Result<Option<&'v Val>> {
if path.is_empty() {
return Ok(Some(val));
}
match val {
Val::Array(inner) => {
if inner.is_empty() {
return Ok(None);
}
let Some(tail) = path.strip_prefix('[') else {
return Ok(None);
};
let (from_end, tail) = if let Some(updated_tail) = tail.strip_prefix("#-") {
(true, updated_tail)
} else {
(false, tail)
};
let Some((idx_str, tail)) = tail.split_once("]") else {
crate::bail_parse_error!("malformed path");
};
if idx_str.is_empty() {
return Ok(None);
}
let Ok(idx) = idx_str.parse::<usize>() else {
crate::bail_parse_error!("malformed path");
};
let result = if from_end {
inner.get(inner.len() - 1 - idx)
} else {
inner.get(idx)
};
if let Some(result) = result {
return json_val_by_path(result, tail);
}
Ok(None)
}
Val::Object(inner) => {
let Some(tail) = path.strip_prefix('.') else {
return Ok(None);
};
let (property, tail) = if let Some(tail) = tail.strip_prefix('"') {
if let Some((property, tail)) = tail.split_once('"') {
(property, tail)
} else {
crate::bail_parse_error!("malformed path");
}
} else if let Some(idx) = tail.find('.') {
(&tail[..idx], &tail[idx..])
} else {
(tail, "")
};
if let Some(result) = inner.get(property) {
return json_val_by_path(result, tail);
}
Ok(None)
}
_ => Ok(None),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_path_root() {
assert_eq!(
get_json_val_by_path(&Val::Bool(true), "$",).unwrap(),
Some(&Val::Bool(true))
);
}
#[test]
fn test_path_index() {
assert_eq!(
get_json_val_by_path(
&Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]),
"$[2]",
)
.unwrap(),
Some(&Val::Integer(66))
);
}
#[test]
fn test_path_negative_index() {
assert_eq!(
get_json_val_by_path(
&Val::Array(vec![Val::Integer(33), Val::Integer(55), Val::Integer(66)]),
"$[#-2]",
)
.unwrap(),
Some(&Val::Integer(33))
);
}
#[test]
fn test_path_index_deep() {
assert_eq!(
get_json_val_by_path(
&Val::Array(vec![Val::Array(vec![
Val::Integer(33),
Val::Integer(55),
Val::Integer(66)
])]),
"$[0][1]",
)
.unwrap(),
Some(&Val::Integer(55))
);
}
#[test]
fn test_path_prop_simple() {
assert_eq!(
get_json_val_by_path(
&Val::Object(
[
("foo".into(), Val::Integer(55)),
("bar".into(), Val::Integer(66))
]
.into()
),
"$.bar",
)
.unwrap(),
Some(&Val::Integer(66))
);
}
#[test]
fn test_path_prop_nested() {
assert_eq!(
get_json_val_by_path(
&Val::Object(
[(
"foo".into(),
Val::Object([("bar".into(), Val::Integer(66))].into())
)]
.into()
),
"$.foo.bar",
)
.unwrap(),
Some(&Val::Integer(66))
);
}
#[test]
fn test_path_prop_quoted() {
assert_eq!(
get_json_val_by_path(
&Val::Object(
[
("foo.baz".into(), Val::Integer(55)),
("bar".into(), Val::Integer(66))
]
.into()
),
r#"$."foo.baz""#,
)
.unwrap(),
Some(&Val::Integer(55))
);
}
}

View File

@@ -5,6 +5,7 @@ mod io;
#[cfg(feature = "json")]
mod json;
mod pseudo;
mod result;
mod schema;
mod storage;
mod translate;
@@ -66,7 +67,6 @@ pub struct Database {
pager: Rc<Pager>,
schema: Rc<RefCell<Schema>>,
header: Rc<RefCell<DatabaseHeader>>,
transaction_state: RefCell<TransactionState>,
// Shared structures of a Database are the parts that are common to multiple threads that might
// create DB connections.
shared_page_cache: Arc<RwLock<DumbLruPageCache>>,
@@ -123,6 +123,7 @@ impl Database {
pager: pager.clone(),
schema: bootstrap_schema.clone(),
header: db_header.clone(),
transaction_state: RefCell::new(TransactionState::None),
db: Weak::new(),
last_insert_rowid: Cell::new(0),
});
@@ -135,7 +136,6 @@ impl Database {
pager,
schema,
header,
transaction_state: RefCell::new(TransactionState::None),
shared_page_cache,
shared_wal,
}))
@@ -148,6 +148,7 @@ impl Database {
header: self.header.clone(),
last_insert_rowid: Cell::new(0),
db: Arc::downgrade(self),
transaction_state: RefCell::new(TransactionState::None),
})
}
}
@@ -206,6 +207,7 @@ pub struct Connection {
schema: Rc<RefCell<Schema>>,
header: Rc<RefCell<DatabaseHeader>>,
db: Weak<Database>, // backpointer to the database holding this connection
transaction_state: RefCell<TransactionState>,
last_insert_rowid: Cell<u64>,
}
@@ -372,13 +374,14 @@ impl Statement {
self.state.interrupt();
}
pub fn step(&mut self) -> Result<RowResult<'_>> {
pub fn step(&mut self) -> Result<StepResult<'_>> {
let result = self.program.step(&mut self.state, self.pager.clone())?;
match result {
vdbe::StepResult::Row(row) => Ok(RowResult::Row(Row { values: row.values })),
vdbe::StepResult::IO => Ok(RowResult::IO),
vdbe::StepResult::Done => Ok(RowResult::Done),
vdbe::StepResult::Interrupt => Ok(RowResult::Interrupt),
vdbe::StepResult::Row(row) => Ok(StepResult::Row(Row { values: row.values })),
vdbe::StepResult::IO => Ok(StepResult::IO),
vdbe::StepResult::Done => Ok(StepResult::Done),
vdbe::StepResult::Interrupt => Ok(StepResult::Interrupt),
vdbe::StepResult::Busy => Ok(StepResult::Busy),
}
}
@@ -390,11 +393,12 @@ impl Statement {
pub fn reset(&self) {}
}
pub enum RowResult<'a> {
pub enum StepResult<'a> {
Row(Row<'a>),
IO,
Done,
Interrupt,
Busy,
}
pub struct Row<'a> {
@@ -417,7 +421,7 @@ impl Rows {
Self { stmt }
}
pub fn next_row(&mut self) -> Result<RowResult<'_>> {
pub fn next_row(&mut self) -> Result<StepResult<'_>> {
self.stmt.step()
}
}

6
core/result.rs Normal file
View File

@@ -0,0 +1,6 @@
/// Common results that different functions can return in limbo.
pub enum LimboResult {
/// Couldn't acquire a lock
Busy,
Ok,
}

View File

@@ -20,22 +20,37 @@ use super::sqlite3_ondisk::{
/*
These are offsets of fields in the header of a b-tree page.
*/
const BTREE_HEADER_OFFSET_TYPE: usize = 0; /* type of btree page -> u8 */
const BTREE_HEADER_OFFSET_FREEBLOCK: usize = 1; /* pointer to first freeblock -> u16 */
const BTREE_HEADER_OFFSET_CELL_COUNT: usize = 3; /* number of cells in the page -> u16 */
const BTREE_HEADER_OFFSET_CELL_CONTENT: usize = 5; /* pointer to first byte of cell allocated content from top -> u16 */
const BTREE_HEADER_OFFSET_FRAGMENTED: usize = 7; /* number of fragmented bytes -> u8 */
const BTREE_HEADER_OFFSET_RIGHTMOST: usize = 8; /* if internalnode, pointer right most pointer (saved separately from cells) -> u32 */
/*
** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
** this will be declared corrupt. This value is calculated based on a
** maximum database size of 2^31 pages a minimum fanout of 2 for a
** root-node and 3 for all other internal nodes.
**
** If a tree that appears to be taller than this is encountered, it is
** assumed that the database is corrupt.
*/
/// type of btree page -> u8
const PAGE_HEADER_OFFSET_PAGE_TYPE: usize = 0;
/// pointer to first freeblock -> u16
/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page.
/// A freeblock is a structure used to identify unallocated space within a b-tree page.
/// Freeblocks are organized as a chain.
///
/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead
/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions.
const PAGE_HEADER_OFFSET_FIRST_FREEBLOCK: usize = 1;
/// number of cells in the page -> u16
const PAGE_HEADER_OFFSET_CELL_COUNT: usize = 3;
/// pointer to first byte of cell allocated content from top -> u16
/// SQLite strives to place cells as far toward the end of the b-tree page as it can,
/// in order to leave space for future growth of the cell pointer array.
/// = the cell content area pointer moves leftward as cells are added to the page
const PAGE_HEADER_OFFSET_CELL_CONTENT_AREA: usize = 5;
/// number of fragmented bytes -> u8
/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
const PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT: usize = 7;
/// if internalnode, pointer right most pointer (saved separately from cells) -> u32
const PAGE_HEADER_OFFSET_RIGHTMOST_PTR: usize = 8;
/// Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
/// this will be declared corrupt. This value is calculated based on a
/// maximum database size of 2^31 pages a minimum fanout of 2 for a
/// root-node and 3 for all other internal nodes.
///
/// If a tree that appears to be taller than this is encountered, it is
/// assumed that the database is corrupt.
pub const BTCURSOR_MAX_DEPTH: usize = 20;
/// Evaluate a Result<CursorResult<T>>, if IO return IO.
@@ -57,6 +72,8 @@ macro_rules! return_if_locked {
}};
}
/// State machine of a write operation.
/// May involve balancing due to overflow.
#[derive(Debug)]
enum WriteState {
Start,
@@ -67,11 +84,16 @@ enum WriteState {
}
struct WriteInfo {
/// State of the write operation state machine.
state: WriteState,
/// Pages allocated during the write operation due to balancing.
new_pages: RefCell<Vec<PageRef>>,
/// Scratch space used during balancing.
scratch_cells: RefCell<Vec<&'static [u8]>>,
/// Bookkeeping of the rightmost pointer so the PAGE_HEADER_OFFSET_RIGHTMOST_PTR can be updated.
rightmost_pointer: RefCell<Option<u32>>,
page_copy: RefCell<Option<PageContent>>, // this holds the copy a of a page needed for buffer references
/// Copy of the current page needed for buffer references.
page_copy: RefCell<Option<PageContent>>,
}
pub struct BTreeCursor {
@@ -142,6 +164,8 @@ impl BTreeCursor {
}
}
/// Check if the table is empty.
/// This is done by checking if the root page has no cells.
fn is_empty_table(&mut self) -> Result<CursorResult<bool>> {
let page = self.pager.read_page(self.root_page)?;
return_if_locked!(page);
@@ -150,16 +174,18 @@ impl BTreeCursor {
Ok(CursorResult::Ok(cell_count == 0))
}
/// Move the cursor to the previous record and return it.
/// Used in backwards iteration.
fn get_prev_record(&mut self) -> Result<CursorResult<(Option<u64>, Option<OwnedRecord>)>> {
loop {
let page = self.stack.top();
let cell_idx = self.stack.current_index();
let cell_idx = self.stack.current_cell_index();
// moved to current page begin
// moved to beginning of current page
// todo: find a better way to flag moved to end or begin of page
if self.stack.curr_idx_out_of_begin() {
if self.stack.current_cell_index_less_than_min() {
loop {
if self.stack.current_index() > 0 {
if self.stack.current_cell_index() > 0 {
self.stack.retreat();
break;
}
@@ -198,8 +224,8 @@ impl BTreeCursor {
let cell = contents.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)?;
@@ -228,13 +254,15 @@ impl BTreeCursor {
}
}
/// Move the cursor to the next record and return it.
/// Used in forwards iteration, which is the default.
fn get_next_record(
&mut self,
predicate: Option<(SeekKey<'_>, SeekOp)>,
) -> Result<CursorResult<(Option<u64>, Option<OwnedRecord>)>> {
loop {
let mem_page_rc = self.stack.top();
let cell_idx = self.stack.current_index() as usize;
let cell_idx = self.stack.current_cell_index() as usize;
debug!("current id={} cell={}", mem_page_rc.get().id, cell_idx);
return_if_locked!(mem_page_rc);
@@ -286,8 +314,8 @@ impl BTreeCursor {
let cell = contents.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)?;
match &cell {
@@ -386,6 +414,9 @@ impl BTreeCursor {
}
}
/// Move the cursor to the record that matches the seek key and seek operation.
/// This may be used to seek to a specific record in a point query (e.g. SELECT * FROM table WHERE col = 10)
/// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10).
fn seek(
&mut self,
key: SeekKey<'_>,
@@ -403,8 +434,8 @@ impl BTreeCursor {
let cell = contents.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)?;
match &cell {
@@ -476,12 +507,14 @@ impl BTreeCursor {
Ok(CursorResult::Ok((None, None)))
}
/// Move the cursor to the root page of the btree.
fn move_to_root(&mut self) {
let mem_page = self.pager.read_page(self.root_page).unwrap();
self.stack.clear();
self.stack.push(mem_page);
}
/// Move the cursor to the rightmost record in the btree.
fn move_to_rightmost(&mut self) -> Result<CursorResult<()>> {
self.move_to_root();
@@ -553,8 +586,8 @@ impl BTreeCursor {
match &contents.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)? {
BTreeCell::TableInteriorCell(TableInteriorCell {
@@ -634,6 +667,8 @@ impl BTreeCursor {
}
}
/// Insert a record into the btree.
/// If the insert operation overflows the page, it will be split and the btree will be balanced.
fn insert_into_page(
&mut self,
key: &OwnedValue,
@@ -700,10 +735,15 @@ impl BTreeCursor {
}
}
/* insert to position and shift other pointers */
/// Insert a record into a cell.
/// If the cell overflows, an overflow cell is created.
/// insert_into_cell() is called from insert_into_page(),
/// and the overflow cell count is used to determine if the page overflows,
/// i.e. whether we need to balance the btree after the insert.
fn insert_into_cell(&self, page: &mut PageContent, payload: &[u8], cell_idx: usize) {
let free = self.compute_free_space(page, RefCell::borrow(&self.database_header));
let enough_space = payload.len() + 2 <= free as usize;
const CELL_POINTER_SIZE_BYTES: usize = 2;
let enough_space = payload.len() + CELL_POINTER_SIZE_BYTES <= free as usize;
if !enough_space {
// add to overflow cell
page.overflow_cells.push(OverflowCell {
@@ -714,61 +754,77 @@ impl BTreeCursor {
}
// TODO: insert into cell payload in internal page
let pc = self.allocate_cell_space(page, payload.len() as u16);
let new_cell_data_pointer = self.allocate_cell_space(page, payload.len() as u16);
let buf = page.as_ptr();
// copy data
buf[pc as usize..pc as usize + payload.len()].copy_from_slice(payload);
buf[new_cell_data_pointer as usize..new_cell_data_pointer as usize + payload.len()]
.copy_from_slice(payload);
// memmove(pIns+2, pIns, 2*(pPage->nCell - i));
let (pointer_area_pc_by_idx, _) = page.cell_get_raw_pointer_region();
let pointer_area_pc_by_idx = pointer_area_pc_by_idx + (2 * cell_idx);
let (cell_pointer_array_start, _) = page.cell_pointer_array_offset_and_size();
let cell_pointer_cur_idx = cell_pointer_array_start + (CELL_POINTER_SIZE_BYTES * cell_idx);
// move previous pointers forward and insert new pointer there
let n_cells_forward = 2 * (page.cell_count() - cell_idx);
if n_cells_forward > 0 {
// move existing pointers forward by CELL_POINTER_SIZE_BYTES...
let n_cells_forward = page.cell_count() - cell_idx;
let n_bytes_forward = CELL_POINTER_SIZE_BYTES * n_cells_forward;
if n_bytes_forward > 0 {
buf.copy_within(
pointer_area_pc_by_idx..pointer_area_pc_by_idx + n_cells_forward,
pointer_area_pc_by_idx + 2,
cell_pointer_cur_idx..cell_pointer_cur_idx + n_bytes_forward,
cell_pointer_cur_idx + CELL_POINTER_SIZE_BYTES,
);
}
page.write_u16(pointer_area_pc_by_idx - page.offset, pc);
// ...and insert new cell pointer at the current index
page.write_u16(cell_pointer_cur_idx - page.offset, new_cell_data_pointer);
// update first byte of content area
page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, pc);
// update first byte of content area (cell data always appended to the left, so cell content area pointer moves to point to the new cell data)
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, new_cell_data_pointer);
// update cell count
let new_n_cells = (page.cell_count() + 1) as u16;
page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, new_n_cells);
page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, new_n_cells);
}
/// Free the range of bytes that a cell occupies.
/// This function also updates the freeblock list in the page.
/// Freeblocks are used to keep track of free space in the page,
/// and are organized as a linked list.
fn free_cell_range(&self, page: &mut PageContent, offset: u16, len: u16) {
// if the freeblock list is empty, we set this block as the first freeblock in the page header.
if page.first_freeblock() == 0 {
// insert into empty list
page.write_u16(offset as usize, 0);
page.write_u16(offset as usize + 2, len);
page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset);
page.write_u16(offset as usize, 0); // next freeblock = null
page.write_u16(offset as usize + 2, len); // size of this freeblock
page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block
return;
}
let first_block = page.first_freeblock();
// if the freeblock list is not empty, and the offset is less than the first freeblock,
// we insert this block at the head of the list
if offset < first_block {
// insert into head of list
page.write_u16(offset as usize, first_block);
page.write_u16(offset as usize + 2, len);
page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, offset);
page.write_u16(offset as usize, first_block); // next freeblock = previous first freeblock
page.write_u16(offset as usize + 2, len); // size of this freeblock
page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, offset); // first freeblock in page = this block
return;
}
// if we clear space that is at the start of the cell content area,
// we need to update the cell content area pointer forward to account for the removed space
// FIXME: is offset ever < cell_content_area? cell content area grows leftwards and the pointer
// is to the start of the last allocated cell. should we assert!(offset >= page.cell_content_area())
// and change this to if offset == page.cell_content_area()?
if offset <= page.cell_content_area() {
// extend boundary of content area
page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, page.first_freeblock());
page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, offset + len);
// FIXME: remove the line directly below this, it does not change anything.
page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, page.first_freeblock());
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, offset + len);
return;
}
// if the freeblock list is not empty, and the offset is greater than the first freeblock,
// then we need to do some more calculation to figure out where to insert the freeblock
// in the freeblock linked list.
let maxpc = {
let db_header = self.database_header.borrow();
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
usable_space as u16
};
@@ -799,17 +855,23 @@ impl BTreeCursor {
}
}
/// Drop a cell from a page.
/// This is done by freeing the range of bytes that the cell occupies.
fn drop_cell(&self, page: &mut PageContent, cell_idx: usize) {
let (cell_start, cell_len) = page.cell_get_raw_region(
cell_idx,
self.max_local(page.page_type()),
self.min_local(page.page_type()),
self.payload_overflow_threshold_max(page.page_type()),
self.payload_overflow_threshold_min(page.page_type()),
self.usable_space(),
);
self.free_cell_range(page, cell_start as u16, cell_len as u16);
page.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1);
page.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, page.cell_count() as u16 - 1);
}
/// Balance a leaf page.
/// Balancing is done when a page overflows.
/// see e.g. https://en.wikipedia.org/wiki/B-tree
///
/// This is a naive algorithm that doesn't try to distribute cells evenly by content.
/// It will try to split the page in half by keys not by content.
/// Sqlite tries to have a page at least 40% full.
@@ -852,8 +914,8 @@ impl BTreeCursor {
for cell_idx in 0..page_copy.cell_count() {
let (start, len) = page_copy.cell_get_raw_region(
cell_idx,
self.max_local(page_copy.page_type()),
self.min_local(page_copy.page_type()),
self.payload_overflow_threshold_max(page_copy.page_type()),
self.payload_overflow_threshold_min(page_copy.page_type()),
self.usable_space(),
);
let buf = page_copy.as_ptr();
@@ -930,14 +992,14 @@ impl BTreeCursor {
assert_eq!(parent_contents.overflow_cells.len(), 0);
// Right page pointer is u32 in right most pointer, and in cell is u32 too, so we can use a *u32 to hold where we want to change this value
let mut right_pointer = BTREE_HEADER_OFFSET_RIGHTMOST;
let mut right_pointer = PAGE_HEADER_OFFSET_RIGHTMOST_PTR;
for cell_idx in 0..parent_contents.cell_count() {
let cell = parent_contents
.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(page_type.clone()),
self.min_local(page_type.clone()),
self.payload_overflow_threshold_max(page_type.clone()),
self.payload_overflow_threshold_min(page_type.clone()),
self.usable_space(),
)
.unwrap();
@@ -950,8 +1012,8 @@ impl BTreeCursor {
if found {
let (start, _len) = parent_contents.cell_get_raw_region(
cell_idx,
self.max_local(page_type.clone()),
self.min_local(page_type.clone()),
self.payload_overflow_threshold_max(page_type.clone()),
self.payload_overflow_threshold_min(page_type.clone()),
self.usable_space(),
);
right_pointer = start;
@@ -967,17 +1029,20 @@ impl BTreeCursor {
assert!(page.is_dirty());
let contents = page.get().contents.as_mut().unwrap();
contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);
let db_header = RefCell::borrow(&self.database_header);
let cell_content_area_start =
db_header.page_size - db_header.unused_space as u16;
contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start);
db_header.page_size - db_header.reserved_space as u16;
contents.write_u16(
PAGE_HEADER_OFFSET_CELL_CONTENT_AREA,
cell_content_area_start,
);
contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0);
contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0);
if !contents.is_leaf() {
contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0);
contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0);
}
}
@@ -1035,8 +1100,8 @@ impl BTreeCursor {
.cell_get(
contents.cell_count() - 1,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)
.unwrap();
@@ -1045,13 +1110,13 @@ impl BTreeCursor {
_ => unreachable!(),
};
self.drop_cell(contents, contents.cell_count() - 1);
contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, last_cell_pointer);
contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, last_cell_pointer);
}
// last page right most pointer points to previous right most pointer before splitting
let last_page = new_pages.last().unwrap();
let last_page_contents = last_page.get().contents.as_mut().unwrap();
last_page_contents.write_u32(
BTREE_HEADER_OFFSET_RIGHTMOST,
PAGE_HEADER_OFFSET_RIGHTMOST_PTR,
self.write_info.rightmost_pointer.borrow().unwrap(),
);
}
@@ -1069,8 +1134,8 @@ impl BTreeCursor {
&contents.page_type(),
0,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)
.unwrap();
@@ -1119,6 +1184,9 @@ impl BTreeCursor {
}
}
/// Balance the root page.
/// This is done when the root page overflows, and we need to create a new root page.
/// See e.g. https://en.wikipedia.org/wiki/B-tree
fn balance_root(&mut self) {
/* todo: balance deeper, create child and copy contents of root there. Then split root */
/* if we are in root page then we just need to create a new root and push key there */
@@ -1145,8 +1213,8 @@ impl BTreeCursor {
}
// point new root right child to previous root
new_root_page_contents
.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, new_root_page_id as u32);
new_root_page_contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, new_root_page_id as u32);
new_root_page_contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);
}
/* swap splitted page buffer with new root buffer so we don't have to update page idx */
@@ -1164,7 +1232,7 @@ impl BTreeCursor {
if is_page_1 {
// Remove header from child and set offset to 0
let contents = child.get().contents.as_mut().unwrap();
let (cell_pointer_offset, _) = contents.cell_get_raw_pointer_region();
let (cell_pointer_offset, _) = contents.cell_pointer_array_offset_and_size();
// change cell pointers
for cell_idx in 0..contents.cell_count() {
let cell_pointer_offset = cell_pointer_offset + (2 * cell_idx) - offset;
@@ -1195,12 +1263,16 @@ impl BTreeCursor {
}
}
/// Allocate a new page to the btree via the pager.
/// This marks the page as dirty and writes the page header.
fn allocate_page(&self, page_type: PageType, offset: usize) -> PageRef {
let page = self.pager.allocate_page().unwrap();
btree_init_page(&page, page_type, &self.database_header.borrow(), offset);
page
}
/// Allocate a new overflow page.
/// This is done when a cell overflows and new space is needed.
fn allocate_overflow_page(&self) -> PageRef {
let page = self.pager.allocate_page().unwrap();
@@ -1212,13 +1284,11 @@ impl BTreeCursor {
page
}
/*
Allocate space for a cell on a page.
*/
/// Allocate space for a cell on a page.
fn allocate_cell_space(&self, page_ref: &PageContent, amount: u16) -> u16 {
let amount = amount as usize;
let (cell_offset, _) = page_ref.cell_get_raw_pointer_region();
let (cell_offset, _) = page_ref.cell_pointer_array_offset_and_size();
let gap = cell_offset + 2 * page_ref.cell_count();
let mut top = page_ref.cell_content_area() as usize;
@@ -1236,33 +1306,31 @@ impl BTreeCursor {
if gap + 2 + amount > top {
// defragment
self.defragment_page(page_ref, RefCell::borrow(&self.database_header));
top = page_ref.read_u16(BTREE_HEADER_OFFSET_CELL_CONTENT) as usize;
top = page_ref.read_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA) as usize;
}
let db_header = RefCell::borrow(&self.database_header);
top -= amount;
page_ref.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, top as u16);
page_ref.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, top as u16);
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
assert!(top + amount <= usable_space);
top as u16
}
/// Defragment a page. This means packing all the cells to the end of the page.
fn defragment_page(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) {
log::debug!("defragment_page");
let cloned_page = page.clone();
// TODO(pere): usable space should include offset probably
let usable_space = (db_header.page_size - db_header.unused_space as u16) as u64;
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as u64;
let mut cbrk = usable_space;
// TODO: implement fast algorithm
let last_cell = usable_space - 4;
let first_cell = {
let (start, end) = cloned_page.cell_get_raw_pointer_region();
start + end
};
let first_cell = cloned_page.unallocated_region_start() as u64;
if cloned_page.cell_count() > 0 {
let page_type = page.page_type();
@@ -1330,42 +1398,54 @@ impl BTreeCursor {
let write_buf = page.as_ptr();
// set new first byte of cell content
page.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cbrk as u16);
page.write_u16(PAGE_HEADER_OFFSET_CELL_CONTENT_AREA, cbrk as u16);
// set free block to 0, unused spaced can be retrieved from gap between cell pointer end and content start
page.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
page.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
// set unused space to 0
let first_cell = cloned_page.cell_content_area() as u64;
assert!(first_cell <= cbrk);
write_buf[first_cell as usize..cbrk as usize].fill(0);
}
// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte
// and end of cell pointer area.
/// Free blocks can be zero, meaning the "real free space" that can be used to allocate is expected to be between first cell byte
/// and end of cell pointer area.
#[allow(unused_assignments)]
fn compute_free_space(&self, page: &PageContent, db_header: Ref<DatabaseHeader>) -> u16 {
// TODO(pere): maybe free space is not calculated correctly with offset
let buf = page.as_ptr();
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let mut first_byte_in_cell_content = page.cell_content_area();
if first_byte_in_cell_content == 0 {
first_byte_in_cell_content = u16::MAX;
// Usable space, not the same as free space, simply means:
// space that is not reserved for extensions by sqlite. Usually reserved_space is 0.
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
let mut cell_content_area_start = page.cell_content_area();
// A zero value for the cell content area pointer is interpreted as 65536.
// See https://www.sqlite.org/fileformat.html
// The max page size for a sqlite database is 64kiB i.e. 65536 bytes.
// 65536 is u16::MAX + 1, and since cell content grows from right to left, this means
// the cell content area pointer is at the end of the page,
// i.e.
// 1. the page size is 64kiB
// 2. there are no cells on the page
// 3. there is no reserved space at the end of the page
if cell_content_area_start == 0 {
cell_content_area_start = u16::MAX;
}
let fragmented_free_bytes = page.num_frag_free_bytes();
let free_block_pointer = page.first_freeblock();
let ncell = page.cell_count();
// The amount of free space is the sum of:
// #1. the size of the unallocated region
// #2. fragments (isolated 1-3 byte chunks of free space within the cell content area)
// #3. freeblocks (linked list of blocks of at least 4 bytes within the cell content area that are not in use due to e.g. deletions)
// 8 + 4 == header end
let child_pointer_size = if page.is_leaf() { 0 } else { 4 };
let first_cell = (page.offset + 8 + child_pointer_size + (2 * ncell)) as u16;
let mut free_space_bytes =
page.unallocated_region_size() as usize + page.num_frag_free_bytes() as usize;
let mut nfree = fragmented_free_bytes as usize + first_byte_in_cell_content as usize;
let mut pc = free_block_pointer as usize;
if pc > 0 {
if pc < first_byte_in_cell_content as usize {
// corrupt
// #3 is computed by iterating over the freeblocks linked list
let mut cur_freeblock_ptr = page.first_freeblock() as usize;
let page_buf = page.as_ptr();
if cur_freeblock_ptr > 0 {
if cur_freeblock_ptr < cell_content_area_start as usize {
// Freeblocks exist in the cell content area e.g. after deletions
// They should never exist in the unused area of the page.
todo!("corrupted page");
}
@@ -1373,32 +1453,51 @@ impl BTreeCursor {
let mut size = 0;
loop {
// TODO: check corruption icellast
next = u16::from_be_bytes(buf[pc..pc + 2].try_into().unwrap()) as usize;
size = u16::from_be_bytes(buf[pc + 2..pc + 4].try_into().unwrap()) as usize;
nfree += size;
if next <= pc + size + 3 {
next = u16::from_be_bytes(
page_buf[cur_freeblock_ptr..cur_freeblock_ptr + 2]
.try_into()
.unwrap(),
) as usize; // first 2 bytes in freeblock = next freeblock pointer
size = u16::from_be_bytes(
page_buf[cur_freeblock_ptr + 2..cur_freeblock_ptr + 4]
.try_into()
.unwrap(),
) as usize; // next 2 bytes in freeblock = size of current freeblock
free_space_bytes += size;
// Freeblocks are in order from left to right on the page,
// so next pointer should > current pointer + its size, or 0 if no next block exists.
if next <= cur_freeblock_ptr + size + 3 {
break;
}
pc = next;
cur_freeblock_ptr = next;
}
if next > 0 {
todo!("corrupted page ascending order");
}
// Next should always be 0 (NULL) at this point since we have reached the end of the freeblocks linked list
assert!(
next == 0,
"corrupted page: freeblocks list not in ascending order"
);
if pc + size > usable_space {
todo!("corrupted page last freeblock extends last page end");
}
assert!(
cur_freeblock_ptr + size <= usable_space,
"corrupted page: last freeblock extends last page end"
);
}
assert!(
free_space_bytes <= usable_space,
"corrupted page: free space is greater than usable space"
);
// if( nFree>usableSize || nFree<iCellFirst ){
// return SQLITE_CORRUPT_PAGE(pPage);
// }
// don't count header and cell pointers?
nfree -= first_cell as usize;
nfree as u16
free_space_bytes as u16
}
/// Fill in the cell payload with the record.
/// If the record is too large to fit in the cell, it will spill onto overflow pages.
fn fill_cell_payload(
&self,
page_type: PageType,
@@ -1423,25 +1522,26 @@ impl BTreeCursor {
write_varint_to_vec(record_buf.len() as u64, cell_payload);
}
let max_local = self.max_local(page_type.clone());
let payload_overflow_threshold_max = self.payload_overflow_threshold_max(page_type.clone());
log::debug!(
"fill_cell_payload(record_size={}, max_local={})",
"fill_cell_payload(record_size={}, payload_overflow_threshold_max={})",
record_buf.len(),
max_local
payload_overflow_threshold_max
);
if record_buf.len() <= max_local {
if record_buf.len() <= payload_overflow_threshold_max {
// enough allowed space to fit inside a btree page
cell_payload.extend_from_slice(record_buf.as_slice());
cell_payload.resize(cell_payload.len() + 4, 0);
return;
}
log::debug!("fill_cell_payload(overflow)");
let min_local = self.min_local(page_type);
let mut space_left = min_local + (record_buf.len() - min_local) % (self.usable_space() - 4);
let payload_overflow_threshold_min = self.payload_overflow_threshold_min(page_type);
// see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371
let mut space_left = payload_overflow_threshold_min
+ (record_buf.len() - payload_overflow_threshold_min) % (self.usable_space() - 4);
if space_left > max_local {
space_left = min_local;
if space_left > payload_overflow_threshold_max {
space_left = payload_overflow_threshold_min;
}
// cell_size must be equal to first value of space_left as this will be the bytes copied to non-overflow page.
@@ -1487,31 +1587,54 @@ impl BTreeCursor {
assert_eq!(cell_size, cell_payload.len());
}
fn max_local(&self, page_type: PageType) -> usize {
let usable_space = self.usable_space();
/// Returns the maximum payload size (X) that can be stored directly on a b-tree page without spilling to overflow pages.
///
/// For table leaf pages: X = usable_size - 35
/// For index pages: X = ((usable_size - 12) * 64/255) - 23
///
/// The usable size is the total page size less the reserved space at the end of each page.
/// These thresholds are designed to:
/// - Give a minimum fanout of 4 for index b-trees
/// - Ensure enough payload is on the b-tree page that the record header can usually be accessed
/// without consulting an overflow page
fn payload_overflow_threshold_max(&self, page_type: PageType) -> usize {
let usable_size = self.usable_space();
match page_type {
PageType::IndexInterior | PageType::TableInterior => {
(usable_space - 12) * 64 / 255 - 23
PageType::IndexInterior | PageType::IndexLeaf => {
((usable_size - 12) * 64 / 255) - 23 // Index page formula
}
PageType::TableInterior | PageType::TableLeaf => {
usable_size - 35 // Table leaf page formula
}
PageType::IndexLeaf | PageType::TableLeaf => usable_space - 35,
}
}
fn min_local(&self, page_type: PageType) -> usize {
let usable_space = self.usable_space();
match page_type {
PageType::IndexInterior | PageType::TableInterior => {
(usable_space - 12) * 32 / 255 - 23
}
PageType::IndexLeaf | PageType::TableLeaf => (usable_space - 12) * 32 / 255 - 23,
}
/// Returns the minimum payload size (M) that must be stored on the b-tree page before spilling to overflow pages is allowed.
///
/// For all page types: M = ((usable_size - 12) * 32/255) - 23
///
/// When payload size P exceeds max_local():
/// - If K = M + ((P-M) % (usable_size-4)) <= max_local(): store K bytes on page
/// - Otherwise: store M bytes on page
///
/// The remaining bytes are stored on overflow pages in both cases.
fn payload_overflow_threshold_min(&self, _page_type: PageType) -> usize {
let usable_size = self.usable_space();
// Same formula for all page types
((usable_size - 12) * 32 / 255) - 23
}
/// The "usable size" of a database page is the page size specified by the 2-byte integer at offset 16
/// in the header, minus the "reserved" space size recorded in the 1-byte integer at offset 20 in the header.
/// The usable size of a page might be an odd number. However, the usable size is not allowed to be less than 480.
/// In other words, if the page size is 512, then the reserved space size cannot exceed 32.
fn usable_space(&self) -> usize {
let db_header = RefCell::borrow(&self.database_header);
(db_header.page_size - db_header.unused_space as u16) as usize
(db_header.page_size - db_header.reserved_space as u16) as usize
}
/// Find the index of the cell in the page that contains the given rowid.
/// BTree tables only.
fn find_cell(&self, page: &PageContent, int_key: u64) -> usize {
let mut cell_idx = 0;
let cell_count = page.cell_count();
@@ -1520,8 +1643,8 @@ impl BTreeCursor {
.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(page.page_type()),
self.min_local(page.page_type()),
self.payload_overflow_threshold_max(page.page_type()),
self.payload_overflow_threshold_min(page.page_type()),
self.usable_space(),
)
.unwrap()
@@ -1545,6 +1668,8 @@ impl BTreeCursor {
}
impl PageStack {
/// Push a new page onto the stack.
/// This effectively means traversing to a child page.
fn push(&self, page: PageRef) {
debug!(
"pagestack::push(current={}, new_page_id={})",
@@ -1561,6 +1686,8 @@ impl PageStack {
self.cell_indices.borrow_mut()[current as usize] = 0;
}
/// Pop a page off the stack.
/// This effectively means traversing back up to a parent page.
fn pop(&self) {
let current = *self.current_page.borrow();
debug!("pagestack::pop(current={})", current);
@@ -1569,6 +1696,8 @@ impl PageStack {
*self.current_page.borrow_mut() -= 1;
}
/// Get the top page on the stack.
/// This is the page that is currently being traversed.
fn top(&self) -> PageRef {
let current = *self.current_page.borrow();
let page = self.stack.borrow()[current as usize]
@@ -1583,6 +1712,7 @@ impl PageStack {
page
}
/// Get the parent page of the current page.
fn parent(&self) -> PageRef {
let current = *self.current_page.borrow();
self.stack.borrow()[current as usize - 1]
@@ -1597,13 +1727,15 @@ impl PageStack {
}
/// Cell index of the current page
fn current_index(&self) -> i32 {
fn current_cell_index(&self) -> i32 {
let current = self.current();
self.cell_indices.borrow()[current]
}
fn curr_idx_out_of_begin(&self) -> bool {
let cell_idx = self.current_index();
/// Check if the current cell index is less than 0.
/// This means we have been iterating backwards and have reached the start of the page.
fn current_cell_index_less_than_min(&self) -> bool {
let cell_idx = self.current_cell_index();
cell_idx < 0
}
@@ -1639,7 +1771,7 @@ fn find_free_cell(page_ref: &PageContent, db_header: Ref<DatabaseHeader>, amount
let buf = page_ref.as_ptr();
let usable_space = (db_header.page_size - db_header.unused_space as u16) as usize;
let usable_space = (db_header.page_size - db_header.reserved_space as u16) as usize;
let maxpc = usable_space - amount;
let mut found = false;
while pc <= maxpc {
@@ -1790,8 +1922,8 @@ impl Cursor for BTreeCursor {
let equals = match &contents.cell_get(
cell_idx,
self.pager.clone(),
self.max_local(contents.page_type()),
self.min_local(contents.page_type()),
self.payload_overflow_threshold_max(contents.page_type()),
self.payload_overflow_threshold_min(contents.page_type()),
self.usable_space(),
)? {
BTreeCell::TableLeafCell(l) => l._rowid == int_key,
@@ -1828,15 +1960,18 @@ pub fn btree_init_page(
let contents = contents.contents.as_mut().unwrap();
contents.offset = offset;
let id = page_type as u8;
contents.write_u8(BTREE_HEADER_OFFSET_TYPE, id);
contents.write_u16(BTREE_HEADER_OFFSET_FREEBLOCK, 0);
contents.write_u16(BTREE_HEADER_OFFSET_CELL_COUNT, 0);
contents.write_u8(PAGE_HEADER_OFFSET_PAGE_TYPE, id);
contents.write_u16(PAGE_HEADER_OFFSET_FIRST_FREEBLOCK, 0);
contents.write_u16(PAGE_HEADER_OFFSET_CELL_COUNT, 0);
let cell_content_area_start = db_header.page_size - db_header.unused_space as u16;
contents.write_u16(BTREE_HEADER_OFFSET_CELL_CONTENT, cell_content_area_start);
let cell_content_area_start = db_header.page_size - db_header.reserved_space as u16;
contents.write_u16(
PAGE_HEADER_OFFSET_CELL_CONTENT_AREA,
cell_content_area_start,
);
contents.write_u8(BTREE_HEADER_OFFSET_FRAGMENTED, 0);
contents.write_u32(BTREE_HEADER_OFFSET_RIGHTMOST, 0);
contents.write_u8(PAGE_HEADER_OFFSET_FRAGMENTED_BYTES_COUNT, 0);
contents.write_u32(PAGE_HEADER_OFFSET_RIGHTMOST_PTR, 0);
}
fn to_static_buf(buf: &[u8]) -> &'static [u8] {

View File

@@ -1,9 +1,10 @@
use crate::result::LimboResult;
use crate::storage::buffer_pool::BufferPool;
use crate::storage::database::DatabaseStorage;
use crate::storage::sqlite3_ondisk::{self, DatabaseHeader, PageContent};
use crate::storage::wal::Wal;
use crate::{Buffer, Result};
use log::{debug, trace};
use log::trace;
use std::cell::{RefCell, UnsafeCell};
use std::collections::HashSet;
use std::rc::Rc;
@@ -11,7 +12,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use super::page_cache::{DumbLruPageCache, PageCacheKey};
use super::wal::CheckpointStatus;
use super::wal::{CheckpointMode, CheckpointStatus};
pub struct PageInner {
pub flags: AtomicUsize,
@@ -196,14 +197,12 @@ impl Pager {
})
}
pub fn begin_read_tx(&self) -> Result<()> {
self.wal.borrow_mut().begin_read_tx()?;
Ok(())
pub fn begin_read_tx(&self) -> Result<LimboResult> {
self.wal.borrow_mut().begin_read_tx()
}
pub fn begin_write_tx(&self) -> Result<()> {
self.wal.borrow_mut().begin_write_tx()?;
Ok(())
pub fn begin_write_tx(&self) -> Result<LimboResult> {
self.wal.borrow_mut().begin_write_tx()
}
pub fn end_tx(&self) -> Result<CheckpointStatus> {
@@ -378,7 +377,11 @@ impl Pager {
match state {
CheckpointState::Checkpoint => {
let in_flight = self.checkpoint_inflight.clone();
match self.wal.borrow_mut().checkpoint(self, in_flight)? {
match self.wal.borrow_mut().checkpoint(
self,
in_flight,
CheckpointMode::Passive,
)? {
CheckpointStatus::IO => return Ok(CheckpointStatus::IO),
CheckpointStatus::Done => {
self.checkpoint_state.replace(CheckpointState::SyncDbFile);
@@ -414,11 +417,11 @@ impl Pager {
// WARN: used for testing purposes
pub fn clear_page_cache(&self) {
loop {
match self
.wal
.borrow_mut()
.checkpoint(self, Rc::new(RefCell::new(0)))
{
match self.wal.borrow_mut().checkpoint(
self,
Rc::new(RefCell::new(0)),
CheckpointMode::Passive,
) {
Ok(CheckpointStatus::IO) => {
self.io.run_once();
}
@@ -482,7 +485,7 @@ impl Pager {
pub fn usable_size(&self) -> usize {
let db_header = self.db_header.borrow();
(db_header.page_size - db_header.unused_space as u16) as usize
(db_header.page_size - db_header.reserved_space as u16) as usize
}
}

View File

@@ -64,30 +64,84 @@ const DEFAULT_CACHE_SIZE: i32 = -2000;
// Minimum number of pages that cache can hold.
pub const MIN_PAGE_CACHE_SIZE: usize = 10;
/// The database header.
/// The first 100 bytes of the database file comprise the database file header.
/// The database file header is divided into fields as shown by the table below.
/// All multibyte fields in the database file header are stored with the most significant byte first (big-endian).
#[derive(Debug, Clone)]
pub struct DatabaseHeader {
/// The header string: "SQLite format 3\0"
magic: [u8; 16],
/// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive,
/// or the value 1 representing a page size of 65536.
pub page_size: u16,
/// File format write version. 1 for legacy; 2 for WAL.
write_version: u8,
/// File format read version. 1 for legacy; 2 for WAL.
read_version: u8,
pub unused_space: u8,
/// Bytes of unused "reserved" space at the end of each page. Usually 0.
/// SQLite has the ability to set aside a small number of extra bytes at the end of every page for use by extensions.
/// These extra bytes are used, for example, by the SQLite Encryption Extension to store a nonce and/or
/// cryptographic checksum associated with each page.
pub reserved_space: u8,
/// Maximum embedded payload fraction. Must be 64.
max_embed_frac: u8,
/// Minimum embedded payload fraction. Must be 32.
min_embed_frac: u8,
/// Leaf payload fraction. Must be 32.
min_leaf_frac: u8,
/// File change counter, incremented when database is modified.
change_counter: u32,
/// Size of the database file in pages. The "in-header database size".
pub database_size: u32,
/// Page number of the first freelist trunk page.
freelist_trunk_page: u32,
/// Total number of freelist pages.
freelist_pages: u32,
/// The schema cookie. Incremented when the database schema changes.
schema_cookie: u32,
/// The schema format number. Supported formats are 1, 2, 3, and 4.
schema_format: u32,
pub default_cache_size: i32,
vacuum: u32,
/// Default page cache size.
pub default_page_cache_size: i32,
/// The page number of the largest root b-tree page when in auto-vacuum or
/// incremental-vacuum modes, or zero otherwise.
vacuum_mode_largest_root_page: u32,
/// The database text encoding. 1=UTF-8, 2=UTF-16le, 3=UTF-16be.
text_encoding: u32,
/// The "user version" as read and set by the user_version pragma.
user_version: u32,
incremental_vacuum: u32,
/// True (non-zero) for incremental-vacuum mode. False (zero) otherwise.
incremental_vacuum_enabled: u32,
/// The "Application ID" set by PRAGMA application_id.
application_id: u32,
reserved: [u8; 20],
/// Reserved for expansion. Must be zero.
reserved_for_expansion: [u8; 20],
/// The version-valid-for number.
version_valid_for: u32,
/// SQLITE_VERSION_NUMBER
pub version_number: u32,
}
@@ -98,28 +152,62 @@ pub const WAL_FRAME_HEADER_SIZE: usize = 24;
pub const WAL_MAGIC_LE: u32 = 0x377f0682;
pub const WAL_MAGIC_BE: u32 = 0x377f0683;
/// The Write-Ahead Log (WAL) header.
/// The first 32 bytes of a WAL file comprise the WAL header.
/// The WAL header is divided into the following fields stored in big-endian order.
#[derive(Debug, Default, Clone)]
#[repr(C)] // This helps with encoding because rust does not respect the order in structs, so in
// this case we want to keep the order
pub struct WalHeader {
/// Magic number. 0x377f0682 or 0x377f0683
/// If the LSB is 0, checksums are native byte order, else checksums are serialized
pub magic: u32,
/// WAL format version. Currently 3007000
pub file_format: u32,
/// Database page size in bytes. Power of two between 512 and 32768 inclusive
pub page_size: u32,
/// Checkpoint sequence number. Increases with each checkpoint
pub checkpoint_seq: u32,
/// Random value used for the first salt in checksum calculations
pub salt_1: u32,
/// Random value used for the second salt in checksum calculations
pub salt_2: u32,
/// First checksum value in the wal-header
pub checksum_1: u32,
/// Second checksum value in the wal-header
pub checksum_2: u32,
}
/// Immediately following the wal-header are zero or more frames.
/// Each frame consists of a 24-byte frame-header followed by <page-size> bytes of page data.
/// The frame-header is six big-endian 32-bit unsigned integer values, as follows:
#[allow(dead_code)]
#[derive(Debug, Default)]
pub struct WalFrameHeader {
/// Page number
page_number: u32,
/// For commit records, the size of the database file in pages after the commit.
/// For all other records, zero.
db_size: u32,
/// Salt-1 copied from the WAL header
salt_1: u32,
/// Salt-2 copied from the WAL header
salt_2: u32,
/// Checksum-1: Cumulative checksum up through and including this page
checksum_1: u32,
/// Checksum-2: Second half of the cumulative checksum
checksum_2: u32,
}
@@ -130,7 +218,7 @@ impl Default for DatabaseHeader {
page_size: 4096,
write_version: 2,
read_version: 2,
unused_space: 0,
reserved_space: 0,
max_embed_frac: 64,
min_embed_frac: 32,
min_leaf_frac: 32,
@@ -140,13 +228,13 @@ impl Default for DatabaseHeader {
freelist_pages: 0,
schema_cookie: 0,
schema_format: 4, // latest format, new sqlite3 databases use this format
default_cache_size: 500, // pages
vacuum: 0,
default_page_cache_size: 500, // pages
vacuum_mode_largest_root_page: 0,
text_encoding: 1, // utf-8
user_version: 1,
incremental_vacuum: 0,
incremental_vacuum_enabled: 0,
application_id: 0,
reserved: [0; 20],
reserved_for_expansion: [0; 20],
version_valid_for: 3047000,
version_number: 3047000,
}
@@ -180,7 +268,7 @@ fn finish_read_database_header(
header.page_size = u16::from_be_bytes([buf[16], buf[17]]);
header.write_version = buf[18];
header.read_version = buf[19];
header.unused_space = buf[20];
header.reserved_space = buf[20];
header.max_embed_frac = buf[21];
header.min_embed_frac = buf[22];
header.min_leaf_frac = buf[23];
@@ -190,16 +278,16 @@ fn finish_read_database_header(
header.freelist_pages = u32::from_be_bytes([buf[36], buf[37], buf[38], buf[39]]);
header.schema_cookie = u32::from_be_bytes([buf[40], buf[41], buf[42], buf[43]]);
header.schema_format = u32::from_be_bytes([buf[44], buf[45], buf[46], buf[47]]);
header.default_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]);
if header.default_cache_size == 0 {
header.default_cache_size = DEFAULT_CACHE_SIZE;
header.default_page_cache_size = i32::from_be_bytes([buf[48], buf[49], buf[50], buf[51]]);
if header.default_page_cache_size == 0 {
header.default_page_cache_size = DEFAULT_CACHE_SIZE;
}
header.vacuum = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]);
header.vacuum_mode_largest_root_page = u32::from_be_bytes([buf[52], buf[53], buf[54], buf[55]]);
header.text_encoding = u32::from_be_bytes([buf[56], buf[57], buf[58], buf[59]]);
header.user_version = u32::from_be_bytes([buf[60], buf[61], buf[62], buf[63]]);
header.incremental_vacuum = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]);
header.incremental_vacuum_enabled = u32::from_be_bytes([buf[64], buf[65], buf[66], buf[67]]);
header.application_id = u32::from_be_bytes([buf[68], buf[69], buf[70], buf[71]]);
header.reserved.copy_from_slice(&buf[72..92]);
header.reserved_for_expansion.copy_from_slice(&buf[72..92]);
header.version_valid_for = u32::from_be_bytes([buf[92], buf[93], buf[94], buf[95]]);
header.version_number = u32::from_be_bytes([buf[96], buf[97], buf[98], buf[99]]);
Ok(())
@@ -258,7 +346,7 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) {
buf[16..18].copy_from_slice(&header.page_size.to_be_bytes());
buf[18] = header.write_version;
buf[19] = header.read_version;
buf[20] = header.unused_space;
buf[20] = header.reserved_space;
buf[21] = header.max_embed_frac;
buf[22] = header.min_embed_frac;
buf[23] = header.min_leaf_frac;
@@ -268,15 +356,15 @@ fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) {
buf[36..40].copy_from_slice(&header.freelist_pages.to_be_bytes());
buf[40..44].copy_from_slice(&header.schema_cookie.to_be_bytes());
buf[44..48].copy_from_slice(&header.schema_format.to_be_bytes());
buf[48..52].copy_from_slice(&header.default_cache_size.to_be_bytes());
buf[48..52].copy_from_slice(&header.default_page_cache_size.to_be_bytes());
buf[52..56].copy_from_slice(&header.vacuum.to_be_bytes());
buf[52..56].copy_from_slice(&header.vacuum_mode_largest_root_page.to_be_bytes());
buf[56..60].copy_from_slice(&header.text_encoding.to_be_bytes());
buf[60..64].copy_from_slice(&header.user_version.to_be_bytes());
buf[64..68].copy_from_slice(&header.incremental_vacuum.to_be_bytes());
buf[64..68].copy_from_slice(&header.incremental_vacuum_enabled.to_be_bytes());
buf[68..72].copy_from_slice(&header.application_id.to_be_bytes());
buf[72..92].copy_from_slice(&header.reserved);
buf[72..92].copy_from_slice(&header.reserved_for_expansion);
buf[92..96].copy_from_slice(&header.version_valid_for.to_be_bytes());
buf[96..100].copy_from_slice(&header.version_number.to_be_bytes());
}
@@ -387,18 +475,60 @@ impl PageContent {
buf[self.offset + pos..self.offset + pos + 4].copy_from_slice(&value.to_be_bytes());
}
/// The second field of the b-tree page header is the offset of the first freeblock, or zero if there are no freeblocks on the page.
/// A freeblock is a structure used to identify unallocated space within a b-tree page.
/// Freeblocks are organized as a chain.
///
/// To be clear, freeblocks do not mean the regular unallocated free space to the left of the cell content area pointer, but instead
/// blocks of at least 4 bytes WITHIN the cell content area that are not in use due to e.g. deletions.
pub fn first_freeblock(&self) -> u16 {
self.read_u16(1)
}
/// The number of cells on the page.
pub fn cell_count(&self) -> usize {
self.read_u16(3) as usize
}
/// The size of the cell pointer array in bytes.
/// 2 bytes per cell pointer
pub fn cell_pointer_array_size(&self) -> usize {
const CELL_POINTER_SIZE_BYTES: usize = 2;
self.cell_count() * CELL_POINTER_SIZE_BYTES
}
/// The start of the unallocated region.
/// Effectively: the offset after the page header + the cell pointer array.
pub fn unallocated_region_start(&self) -> usize {
let (cell_ptr_array_start, cell_ptr_array_size) = self.cell_pointer_array_offset_and_size();
cell_ptr_array_start + cell_ptr_array_size
}
pub fn unallocated_region_size(&self) -> usize {
self.cell_content_area() as usize - self.unallocated_region_start()
}
/// The start of the cell content area.
/// SQLite strives to place cells as far toward the end of the b-tree page as it can,
/// in order to leave space for future growth of the cell pointer array.
/// = the cell content area pointer moves leftward as cells are added to the page
pub fn cell_content_area(&self) -> u16 {
self.read_u16(5)
}
/// The size of the page header in bytes.
/// 8 bytes for leaf pages, 12 bytes for interior pages (due to storing rightmost child pointer)
pub fn header_size(&self) -> usize {
match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
}
}
/// The total number of bytes in all fragments is stored in the fifth field of the b-tree page header.
/// Fragments are isolated groups of 1, 2, or 3 unused bytes within the cell content area.
pub fn num_frag_free_bytes(&self) -> u8 {
self.read_u8(7)
}
@@ -416,22 +546,19 @@ impl PageContent {
&self,
idx: usize,
pager: Rc<Pager>,
max_local: usize,
min_local: usize,
payload_overflow_threshold_max: usize,
payload_overflow_threshold_min: usize,
usable_size: usize,
) -> Result<BTreeCell> {
log::debug!("cell_get(idx={})", idx);
let buf = self.as_ptr();
let ncells = self.cell_count();
let cell_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
// the page header is 12 bytes for interior pages, 8 bytes for leaf pages
// this is because the 4 last bytes in the interior page's header are used for the rightmost pointer.
let cell_pointer_array_start = self.header_size();
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_start + (idx * 2);
let cell_pointer = cell_pointer_array_start + (idx * 2);
let cell_pointer = self.read_u16(cell_pointer) as usize;
read_btree_cell(
@@ -439,48 +566,46 @@ impl PageContent {
&self.page_type(),
cell_pointer,
pager,
max_local,
min_local,
payload_overflow_threshold_max,
payload_overflow_threshold_min,
usable_size,
)
}
/// When using this fu
pub fn cell_get_raw_pointer_region(&self) -> (usize, usize) {
let cell_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
(self.offset + cell_start, self.cell_count() * 2)
/// The cell pointer array of a b-tree page immediately follows the b-tree page header.
/// Let K be the number of cells on the btree.
/// The cell pointer array consists of K 2-byte integer offsets to the cell contents.
/// The cell pointers are arranged in key order with:
/// - left-most cell (the cell with the smallest key) first and
/// - the right-most cell (the cell with the largest key) last.
pub fn cell_pointer_array_offset_and_size(&self) -> (usize, usize) {
let header_size = self.header_size();
(self.offset + header_size, self.cell_pointer_array_size())
}
/* Get region of a cell's payload */
pub fn cell_get_raw_region(
&self,
idx: usize,
max_local: usize,
min_local: usize,
payload_overflow_threshold_max: usize,
payload_overflow_threshold_min: usize,
usable_size: usize,
) -> (usize, usize) {
let buf = self.as_ptr();
let ncells = self.cell_count();
let cell_start = match self.page_type() {
PageType::IndexInterior => 12,
PageType::TableInterior => 12,
PageType::IndexLeaf => 8,
PageType::TableLeaf => 8,
};
let cell_pointer_array_start = self.header_size();
assert!(idx < ncells, "cell_get: idx out of bounds");
let cell_pointer = cell_start + (idx * 2);
let cell_pointer = cell_pointer_array_start + (idx * 2); // pointers are 2 bytes each
let cell_pointer = self.read_u16(cell_pointer) as usize;
let start = cell_pointer;
let len = match self.page_type() {
PageType::IndexInterior => {
let (len_payload, n_payload) = read_varint(&buf[cell_pointer + 4..]).unwrap();
let (overflows, to_read) =
payload_overflows(len_payload as usize, max_local, min_local, usable_size);
let (overflows, to_read) = payload_overflows(
len_payload as usize,
payload_overflow_threshold_max,
payload_overflow_threshold_min,
usable_size,
);
if overflows {
4 + to_read + n_payload + 4
} else {
@@ -493,8 +618,12 @@ impl PageContent {
}
PageType::IndexLeaf => {
let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap();
let (overflows, to_read) =
payload_overflows(len_payload as usize, max_local, min_local, usable_size);
let (overflows, to_read) = payload_overflows(
len_payload as usize,
payload_overflow_threshold_max,
payload_overflow_threshold_min,
usable_size,
);
if overflows {
to_read + n_payload + 4
} else {
@@ -504,8 +633,12 @@ impl PageContent {
PageType::TableLeaf => {
let (len_payload, n_payload) = read_varint(&buf[cell_pointer..]).unwrap();
let (_, n_rowid) = read_varint(&buf[cell_pointer + n_payload..]).unwrap();
let (overflows, to_read) =
payload_overflows(len_payload as usize, max_local, min_local, usable_size);
let (overflows, to_read) = payload_overflows(
len_payload as usize,
payload_overflow_threshold_max,
payload_overflow_threshold_min,
usable_size,
);
if overflows {
to_read + n_payload + n_rowid
} else {
@@ -1170,28 +1303,46 @@ pub fn begin_write_wal_header(io: &Rc<dyn File>, header: &WalHeader) -> Result<(
Ok(())
}
/*
Checks if payload will overflow a cell based on max local and
it will return the min size that will be stored in that case,
including overflow pointer
*/
/// Checks if payload will overflow a cell based on the maximum allowed size.
/// It will return the min size that will be stored in that case,
/// including overflow pointer
/// see e.g. https://github.com/sqlite/sqlite/blob/9591d3fe93936533c8c3b0dc4d025ac999539e11/src/dbstat.c#L371
pub fn payload_overflows(
payload_size: usize,
max_local: usize,
min_local: usize,
payload_overflow_threshold_max: usize,
payload_overflow_threshold_min: usize,
usable_size: usize,
) -> (bool, usize) {
if payload_size <= max_local {
if payload_size <= payload_overflow_threshold_max {
return (false, 0);
}
let mut space_left = min_local + (payload_size - min_local) % (usable_size - 4);
if space_left > max_local {
space_left = min_local;
let mut space_left = payload_overflow_threshold_min
+ (payload_size - payload_overflow_threshold_min) % (usable_size - 4);
if space_left > payload_overflow_threshold_max {
space_left = payload_overflow_threshold_min;
}
(true, space_left + 4)
}
/// The checksum is computed by interpreting the input as an even number of unsigned 32-bit integers: x(0) through x(N).
/// The 32-bit integers are big-endian if the magic number in the first 4 bytes of the WAL header is 0x377f0683
/// and the integers are little-endian if the magic number is 0x377f0682.
/// The checksum values are always stored in the frame header in a big-endian format regardless of which byte order is used to compute the checksum.
/// The checksum algorithm only works for content which is a multiple of 8 bytes in length.
/// In other words, if the inputs are x(0) through x(N) then N must be odd.
/// The checksum algorithm is as follows:
///
/// s0 = s1 = 0
/// for i from 0 to n-1 step 2:
/// s0 += x(i) + s1;
/// s1 += x(i+1) + s0;
/// endfor
///
/// The outputs s0 and s1 are both weighted checksums using Fibonacci weights in reverse order.
/// (The largest Fibonacci weight occurs on the first element of the sequence being summed.)
/// The s1 value spans all 32-bit integer terms of the sequence whereas s0 omits the final term.
pub fn checksum_wal(
buf: &[u8],
_wal_header: &WalHeader,

View File

@@ -1,10 +1,12 @@
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::RwLock;
use std::{cell::RefCell, rc::Rc, sync::Arc};
use log::{debug, trace};
use crate::io::{File, SyncCompletion, IO};
use crate::result::LimboResult;
use crate::storage::sqlite3_ondisk::{
begin_read_wal_frame, begin_write_wal_frame, WAL_FRAME_HEADER_SIZE, WAL_HEADER_SIZE,
};
@@ -14,23 +16,119 @@ use crate::{Completion, Page};
use self::sqlite3_ondisk::{checksum_wal, PageContent, WAL_MAGIC_BE, WAL_MAGIC_LE};
use super::buffer_pool::BufferPool;
use super::page_cache::PageCacheKey;
use super::pager::{PageRef, Pager};
use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader};
pub const READMARK_NOT_USED: u32 = 0xffffffff;
pub const NO_LOCK: u32 = 0;
pub const SHARED_LOCK: u32 = 1;
pub const WRITE_LOCK: u32 = 2;
pub enum CheckpointMode {
Passive,
Full,
Restart,
Truncate,
}
#[derive(Debug)]
struct LimboRwLock {
lock: AtomicU32,
nreads: AtomicU32,
value: AtomicU32,
}
impl LimboRwLock {
/// Shared lock. Returns true if it was successful, false if it couldn't lock it
pub fn read(&mut self) -> bool {
let lock = self.lock.load(Ordering::SeqCst);
match lock {
NO_LOCK => {
let res = self.lock.compare_exchange(
lock,
SHARED_LOCK,
Ordering::SeqCst,
Ordering::SeqCst,
);
let ok = res.is_ok();
if ok {
self.nreads.fetch_add(1, Ordering::SeqCst);
}
ok
}
SHARED_LOCK => {
self.nreads.fetch_add(1, Ordering::SeqCst);
true
}
WRITE_LOCK => false,
_ => unreachable!(),
}
}
/// Locks exlusively. Returns true if it was successful, false if it couldn't lock it
pub fn write(&mut self) -> bool {
let lock = self.lock.load(Ordering::SeqCst);
match lock {
NO_LOCK => {
let res = self.lock.compare_exchange(
lock,
WRITE_LOCK,
Ordering::SeqCst,
Ordering::SeqCst,
);
res.is_ok()
}
SHARED_LOCK => {
// no op
false
}
WRITE_LOCK => true,
_ => unreachable!(),
}
}
/// Unlock the current held lock.
pub fn unlock(&mut self) {
let lock = self.lock.load(Ordering::SeqCst);
match lock {
NO_LOCK => {}
SHARED_LOCK => {
let prev = self.nreads.fetch_sub(1, Ordering::SeqCst);
if prev == 1 {
let res = self.lock.compare_exchange(
lock,
NO_LOCK,
Ordering::SeqCst,
Ordering::SeqCst,
);
assert!(res.is_ok());
}
}
WRITE_LOCK => {
let res =
self.lock
.compare_exchange(lock, NO_LOCK, Ordering::SeqCst, Ordering::SeqCst);
assert!(res.is_ok());
}
_ => unreachable!(),
}
}
}
/// Write-ahead log (WAL).
pub trait Wal {
/// Begin a read transaction.
fn begin_read_tx(&mut self) -> Result<()>;
fn begin_read_tx(&mut self) -> Result<LimboResult>;
/// Begin a write transaction.
fn begin_write_tx(&mut self) -> Result<()>;
fn begin_write_tx(&mut self) -> Result<LimboResult>;
/// End a read transaction.
fn end_read_tx(&self) -> Result<()>;
fn end_read_tx(&self) -> Result<LimboResult>;
/// End a write transaction.
fn end_write_tx(&self) -> Result<()>;
fn end_write_tx(&self) -> Result<LimboResult>;
/// Find the latest frame containing a page.
fn find_frame(&self, page_id: u64) -> Result<Option<u64>>;
@@ -51,6 +149,7 @@ pub trait Wal {
&mut self,
pager: &Pager,
write_counter: Rc<RefCell<usize>>,
mode: CheckpointMode,
) -> Result<CheckpointStatus>;
fn sync(&mut self) -> Result<CheckpointStatus>;
fn get_max_frame(&self) -> u64;
@@ -108,10 +207,16 @@ pub struct WalFile {
ongoing_checkpoint: OngoingCheckpoint,
checkpoint_threshold: usize,
// min and max frames for this connection
/// This is the index to the read_lock in WalFileShared that we are holding. This lock contains
/// the max frame for this connection.
max_frame_read_lock_index: usize,
/// Max frame allowed to lookup range=(minframe..max_frame)
max_frame: u64,
/// Start of range to look for frames range=(minframe..max_frame)
min_frame: u64,
}
// TODO(pere): lock only important parts + pin WalFileShared
/// WalFileShared is the part of a WAL that will be shared between threads. A wal has information
/// that needs to be communicated between threads so this struct does the job.
pub struct WalFileShared {
@@ -130,20 +235,94 @@ pub struct WalFileShared {
pages_in_frames: Vec<u64>,
last_checksum: (u32, u32), // Check of last frame in WAL, this is a cumulative checksum over all frames in the WAL
file: Rc<dyn File>,
/// read_locks is a list of read locks that can coexist with the max_frame nubmer stored in
/// value. There is a limited amount because and unbounded amount of connections could be
/// fatal. Therefore, for now we copy how SQLite behaves with limited amounts of read max
/// frames that is equal to 5
read_locks: [LimboRwLock; 5],
/// There is only one write allowed in WAL mode. This lock takes care of ensuring there is only
/// one used.
write_lock: LimboRwLock,
}
impl Wal for WalFile {
/// Begin a read transaction.
fn begin_read_tx(&mut self) -> Result<()> {
let shared = self.shared.read().unwrap();
fn begin_read_tx(&mut self) -> Result<LimboResult> {
let mut shared = self.shared.write().unwrap();
let max_frame_in_wal = shared.max_frame;
self.min_frame = shared.nbackfills + 1;
self.max_frame = shared.max_frame;
Ok(())
let mut max_read_mark = 0;
let mut max_read_mark_index = -1;
// Find the largest mark we can find, ignore frames that are impossible to be in range and
// that are not set
for (index, lock) in shared.read_locks.iter().enumerate() {
let this_mark = lock.value.load(Ordering::SeqCst);
if this_mark > max_read_mark && this_mark <= max_frame_in_wal as u32 {
max_read_mark = this_mark;
max_read_mark_index = index as i64;
}
}
// If we didn't find any mark, then let's add a new one
if max_read_mark_index == -1 {
for (index, lock) in shared.read_locks.iter_mut().enumerate() {
let busy = !lock.write();
if !busy {
// If this was busy then it must mean >1 threads tried to set this read lock
lock.value.store(max_frame_in_wal as u32, Ordering::SeqCst);
max_read_mark = max_frame_in_wal as u32;
max_read_mark_index = index as i64;
lock.unlock();
break;
}
}
}
if max_read_mark_index == -1 {
return Ok(LimboResult::Busy);
}
let lock = &mut shared.read_locks[max_read_mark_index as usize];
let busy = !lock.read();
if busy {
return Ok(LimboResult::Busy);
}
self.max_frame_read_lock_index = max_read_mark_index as usize;
self.max_frame = max_read_mark as u64;
self.min_frame = shared.nbackfills + 1;
log::trace!(
"begin_read_tx(min_frame={}, max_frame={}, lock={})",
self.min_frame,
self.max_frame,
self.max_frame_read_lock_index
);
Ok(LimboResult::Ok)
}
/// End a read transaction.
fn end_read_tx(&self) -> Result<()> {
Ok(())
fn end_read_tx(&self) -> Result<LimboResult> {
let mut shared = self.shared.write().unwrap();
let read_lock = &mut shared.read_locks[self.max_frame_read_lock_index];
read_lock.unlock();
Ok(LimboResult::Ok)
}
/// Begin a write transaction
fn begin_write_tx(&mut self) -> Result<LimboResult> {
let mut shared = self.shared.write().unwrap();
let busy = !shared.write_lock.write();
if busy {
return Ok(LimboResult::Busy);
}
Ok(LimboResult::Ok)
}
/// End a write transaction
fn end_write_tx(&self) -> Result<LimboResult> {
let mut shared = self.shared.write().unwrap();
shared.write_lock.unlock();
Ok(LimboResult::Ok)
}
/// Find the latest frame containing a page.
@@ -186,7 +365,11 @@ impl Wal for WalFile {
) -> Result<()> {
let page_id = page.get().id;
let mut shared = self.shared.write().unwrap();
let frame_id = shared.max_frame;
let frame_id = if shared.max_frame == 0 {
1
} else {
shared.max_frame
};
let offset = self.frame_offset(frame_id);
trace!(
"append_frame(frame={}, offset={}, page_id={})",
@@ -221,16 +404,6 @@ impl Wal for WalFile {
Ok(())
}
/// Begin a write transaction
fn begin_write_tx(&mut self) -> Result<()> {
Ok(())
}
/// End a write transaction
fn end_write_tx(&self) -> Result<()> {
Ok(())
}
fn should_checkpoint(&self) -> bool {
let shared = self.shared.read().unwrap();
let frame_id = shared.max_frame as usize;
@@ -241,7 +414,12 @@ impl Wal for WalFile {
&mut self,
pager: &Pager,
write_counter: Rc<RefCell<usize>>,
mode: CheckpointMode,
) -> Result<CheckpointStatus> {
assert!(
matches!(mode, CheckpointMode::Passive),
"only passive mode supported for now"
);
'checkpoint_loop: loop {
let state = self.ongoing_checkpoint.state;
log::debug!("checkpoint(state={:?})", state);
@@ -249,9 +427,29 @@ impl Wal for WalFile {
CheckpointState::Start => {
// TODO(pere): check what frames are safe to checkpoint between many readers!
self.ongoing_checkpoint.min_frame = self.min_frame;
self.ongoing_checkpoint.max_frame = self.max_frame;
let mut shared = self.shared.write().unwrap();
let max_frame_in_wal = shared.max_frame as u32;
let mut max_safe_frame = shared.max_frame;
for read_lock in shared.read_locks.iter_mut() {
let this_mark = read_lock.value.load(Ordering::SeqCst);
if this_mark < max_safe_frame as u32 {
let busy = !read_lock.write();
if !busy {
read_lock.value.store(max_frame_in_wal, Ordering::SeqCst);
read_lock.unlock();
} else {
max_safe_frame = this_mark as u64;
}
}
}
self.ongoing_checkpoint.max_frame = max_safe_frame;
self.ongoing_checkpoint.current_page = 0;
self.ongoing_checkpoint.state = CheckpointState::ReadFrame;
log::trace!(
"checkpoint_start(min_frame={}, max_frame={})",
self.ongoing_checkpoint.max_frame,
self.ongoing_checkpoint.min_frame
);
}
CheckpointState::ReadFrame => {
let shared = self.shared.read().unwrap();
@@ -272,8 +470,9 @@ impl Wal for WalFile {
.expect("page must be in frame cache if it's in list");
for frame in frames.iter().rev() {
// TODO: do proper selection of frames to checkpoint
if *frame >= self.ongoing_checkpoint.min_frame {
if *frame >= self.ongoing_checkpoint.min_frame
&& *frame <= self.ongoing_checkpoint.max_frame
{
log::debug!(
"checkpoint page(state={:?}, page={}, frame={})",
state,
@@ -328,10 +527,18 @@ impl Wal for WalFile {
return Ok(CheckpointStatus::IO);
}
let mut shared = self.shared.write().unwrap();
shared.frame_cache.clear();
shared.pages_in_frames.clear();
shared.max_frame = 0;
shared.nbackfills = 0;
let everything_backfilled =
shared.max_frame == self.ongoing_checkpoint.max_frame;
if everything_backfilled {
// Here we know that we backfilled everything, therefore we can safely
// reset the wal.
shared.frame_cache.clear();
shared.pages_in_frames.clear();
shared.max_frame = 0;
shared.nbackfills = 0;
} else {
shared.nbackfills = self.ongoing_checkpoint.max_frame;
}
self.ongoing_checkpoint.state = CheckpointState::Start;
return Ok(CheckpointStatus::Done);
}
@@ -412,10 +619,11 @@ impl WalFile {
syncing: Rc::new(RefCell::new(false)),
checkpoint_threshold: 1000,
page_size,
max_frame: 0,
min_frame: 0,
buffer_pool,
sync_state: RefCell::new(SyncState::NotSyncing),
max_frame: 0,
min_frame: 0,
max_frame_read_lock_index: 0,
}
}
@@ -488,6 +696,38 @@ impl WalFileShared {
last_checksum: checksum,
file,
pages_in_frames: Vec::new(),
read_locks: [
LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
],
write_lock: LimboRwLock {
lock: AtomicU32::new(NO_LOCK),
nreads: AtomicU32::new(0),
value: AtomicU32::new(READMARK_NOT_USED),
},
};
Ok(Arc::new(RwLock::new(shared)))
}

View File

@@ -913,6 +913,51 @@ pub fn translate_expr(
});
Ok(target_register)
}
JsonFunc::JsonArrayLength => {
let args = if let Some(args) = args {
if args.len() > 2 {
crate::bail_parse_error!(
"{} function with wrong number of arguments",
j.to_string()
)
}
args
} else {
crate::bail_parse_error!(
"{} function with no arguments",
j.to_string()
);
};
let json_reg = program.alloc_register();
let path_reg = program.alloc_register();
translate_expr(
program,
referenced_tables,
&args[0],
json_reg,
precomputed_exprs_to_registers,
)?;
if args.len() == 2 {
translate_expr(
program,
referenced_tables,
&args[1],
path_reg,
precomputed_exprs_to_registers,
)?;
}
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: json_reg,
dest: target_register,
func: func_ctx,
});
Ok(target_register)
}
},
Func::Scalar(srf) => {
match srf {

View File

@@ -2,32 +2,187 @@ use std::rc::Weak;
use std::{cell::RefCell, ops::Deref, rc::Rc};
use sqlite3_parser::ast::{
DistinctNames, InsertBody, QualifiedName, ResolveType, ResultColumn, With,
DistinctNames, Expr, InsertBody, QualifiedName, ResolveType, ResultColumn, With,
};
use crate::error::SQLITE_CONSTRAINT_PRIMARYKEY;
use crate::util::normalize_ident;
use crate::{
schema::{Schema, Table},
schema::{Column, Schema, Table},
storage::sqlite3_ondisk::DatabaseHeader,
translate::expr::translate_expr,
vdbe::{builder::ProgramBuilder, Insn, Program},
};
use crate::{Connection, Result};
#[derive(Debug)]
/// Represents how a column should be populated during an INSERT.
/// Contains both the column definition and optionally the index into the VALUES tuple.
struct ColumnMapping<'a> {
/// Reference to the column definition from the table schema
column: &'a Column,
/// If Some(i), use the i-th value from the VALUES tuple
/// If None, use NULL (column was not specified in INSERT statement)
value_index: Option<usize>,
}
/// Resolves how each column in a table should be populated during an INSERT.
/// Returns a Vec of ColumnMapping, one for each column in the table's schema.
///
/// For each column, specifies:
/// 1. The column definition (type, constraints, etc)
/// 2. Where to get the value from:
/// - Some(i) -> use i-th value from the VALUES tuple
/// - None -> use NULL (column wasn't specified in INSERT)
///
/// Two cases are handled:
/// 1. No column list specified (INSERT INTO t VALUES ...):
/// - Values are assigned to columns in table definition order
/// - If fewer values than columns, remaining columns map to None
/// 2. Column list specified (INSERT INTO t (col1, col3) VALUES ...):
/// - Named columns map to their corresponding value index
/// - Unspecified columns map to None
fn resolve_columns_for_insert<'a>(
table: &'a Table,
columns: &Option<DistinctNames>,
values: &[Vec<Expr>],
) -> Result<Vec<ColumnMapping<'a>>> {
if values.is_empty() {
crate::bail_parse_error!("no values to insert");
}
let table_columns = table.columns();
// Case 1: No columns specified - map values to columns in order
if columns.is_none() {
let num_values = values[0].len();
if num_values > table_columns.len() {
crate::bail_parse_error!(
"table {} has {} columns but {} values were supplied",
table.get_name(),
table_columns.len(),
num_values
);
}
// Verify all value tuples have same length
for value in values.iter().skip(1) {
if value.len() != num_values {
crate::bail_parse_error!("all VALUES must have the same number of terms");
}
}
// Map each column to either its corresponding value index or None
return Ok(table_columns
.iter()
.enumerate()
.map(|(i, col)| ColumnMapping {
column: col,
value_index: if i < num_values { Some(i) } else { None },
})
.collect());
}
// Case 2: Columns specified - map named columns to their values
let mut mappings: Vec<_> = table_columns
.iter()
.map(|col| ColumnMapping {
column: col,
value_index: None,
})
.collect();
// Map each named column to its value index
for (value_index, column_name) in columns.as_ref().unwrap().iter().enumerate() {
let column_name = normalize_ident(column_name.0.as_str());
let table_index = table_columns
.iter()
.position(|c| c.name.eq_ignore_ascii_case(&column_name));
if table_index.is_none() {
crate::bail_parse_error!(
"table {} has no column named {}",
table.get_name(),
column_name
);
}
mappings[table_index.unwrap()].value_index = Some(value_index);
}
Ok(mappings)
}
/// Populates the column registers with values for a single row
fn populate_column_registers(
program: &mut ProgramBuilder,
value: &[Expr],
column_mappings: &[ColumnMapping],
column_registers_start: usize,
inserting_multiple_rows: bool,
rowid_reg: usize,
) -> Result<()> {
for (i, mapping) in column_mappings.iter().enumerate() {
let target_reg = column_registers_start + i;
// Column has a value in the VALUES tuple
if let Some(value_index) = mapping.value_index {
// When inserting a single row, SQLite writes the value provided for the rowid alias column (INTEGER PRIMARY KEY)
// directly into the rowid register and writes a NULL into the rowid alias column. Not sure why this only happens
// in the single row case, but let's copy it.
let write_directly_to_rowid_reg =
mapping.column.is_rowid_alias && !inserting_multiple_rows;
let reg = if write_directly_to_rowid_reg {
rowid_reg
} else {
target_reg
};
translate_expr(
program,
None,
value.get(value_index).expect("value index out of bounds"),
reg,
None,
)?;
if write_directly_to_rowid_reg {
program.emit_insn(Insn::SoftNull { reg: target_reg });
}
} else {
// Column was not specified - use NULL if it is nullable, otherwise error
// Rowid alias columns can be NULL because we will autogenerate a rowid in that case.
let is_nullable = !mapping.column.primary_key || mapping.column.is_rowid_alias;
if is_nullable {
program.emit_insn(Insn::Null {
dest: target_reg,
dest_end: None,
});
program.mark_last_insn_constant();
} else {
crate::bail_parse_error!("column {} is not nullable", mapping.column.name);
}
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn translate_insert(
schema: &Schema,
with: &Option<With>,
or_conflict: &Option<ResolveType>,
on_conflict: &Option<ResolveType>,
tbl_name: &QualifiedName,
_columns: &Option<DistinctNames>,
columns: &Option<DistinctNames>,
body: &InsertBody,
_returning: &Option<Vec<ResultColumn>>,
database_header: Rc<RefCell<DatabaseHeader>>,
connection: Weak<Connection>,
) -> Result<Program> {
assert!(with.is_none());
assert!(or_conflict.is_none());
if with.is_some() {
crate::bail_parse_error!("WITH clause is not supported");
}
if on_conflict.is_some() {
crate::bail_parse_error!("ON CONFLICT clause is not supported");
}
let mut program = ProgramBuilder::new();
let init_label = program.allocate_label();
program.emit_insn_with_label_dependency(
@@ -46,6 +201,10 @@ pub fn translate_insert(
None => crate::bail_corrupt_error!("Parse error: no such table: {}", table_name),
};
let table = Rc::new(Table::BTree(table));
if !table.has_rowid() {
crate::bail_parse_error!("INSERT into WITHOUT ROWID table is not supported");
}
let cursor_id = program.alloc_cursor_id(
Some(table_name.0.clone()),
Some(table.clone().deref().clone()),
@@ -55,18 +214,49 @@ pub fn translate_insert(
Table::Index(index) => index.root_page,
Table::Pseudo(_) => todo!(),
};
let values = match body {
InsertBody::Select(select, None) => match &select.body.select {
sqlite3_parser::ast::OneSelect::Values(values) => values,
_ => todo!(),
},
_ => todo!(),
};
let mut num_cols = table.columns().len();
if table.has_rowid() {
num_cols += 1;
}
// column_registers_start[0] == rowid if has rowid
let column_registers_start = program.alloc_registers(num_cols);
let column_mappings = resolve_columns_for_insert(&table, columns, values)?;
// Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias)
let rowid_alias_index = table.columns().iter().position(|c| c.is_rowid_alias);
let has_user_provided_rowid = {
assert!(column_mappings.len() == table.columns().len());
if let Some(index) = rowid_alias_index {
column_mappings[index].value_index.is_some()
} else {
false
}
};
// Coroutine for values
let yield_reg = program.alloc_register();
let jump_on_definition_label = program.allocate_label();
{
// allocate a register for each column in the table. if not provided by user, they will simply be set as null.
// allocate an extra register for rowid regardless of whether user provided a rowid alias column.
let num_cols = table.columns().len();
let rowid_reg = program.alloc_registers(num_cols + 1);
let column_registers_start = rowid_reg + 1;
let rowid_alias_reg = {
if has_user_provided_rowid {
Some(column_registers_start + rowid_alias_index.unwrap())
} else {
None
}
};
let record_register = program.alloc_register();
let halt_label = program.allocate_label();
let mut loop_start_offset = 0;
let inserting_multiple_rows = values.len() > 1;
// Multiple rows - use coroutine for value population
if inserting_multiple_rows {
let yield_reg = program.alloc_register();
let jump_on_definition_label = program.allocate_label();
program.emit_insn_with_label_dependency(
Insn::InitCoroutine {
yield_reg,
@@ -75,134 +265,154 @@ pub fn translate_insert(
},
jump_on_definition_label,
);
match body {
InsertBody::Select(select, None) => match &select.body.select {
sqlite3_parser::ast::OneSelect::Select {
distinctness: _,
columns: _,
from: _,
where_clause: _,
group_by: _,
window_clause: _,
} => todo!(),
sqlite3_parser::ast::OneSelect::Values(values) => {
for value in values {
for (col, expr) in value.iter().enumerate() {
let mut col = col;
if table.has_rowid() {
col += 1;
}
translate_expr(
&mut program,
None,
expr,
column_registers_start + col,
None,
)?;
}
program.emit_insn(Insn::Yield {
yield_reg,
end_offset: 0,
});
}
}
},
InsertBody::DefaultValues => todo!("default values not yet supported"),
_ => todo!(),
for value in values {
populate_column_registers(
&mut program,
value,
&column_mappings,
column_registers_start,
true,
rowid_reg,
)?;
program.emit_insn(Insn::Yield {
yield_reg,
end_offset: 0,
});
}
program.emit_insn(Insn::EndCoroutine { yield_reg });
program.resolve_label(jump_on_definition_label, program.offset());
program.emit_insn(Insn::OpenWriteAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
// Main loop
// FIXME: rollback is not implemented. E.g. if you insert 2 rows and one fails to unique constraint violation,
// the other row will still be inserted.
loop_start_offset = program.offset();
program.emit_insn_with_label_dependency(
Insn::Yield {
yield_reg,
end_offset: halt_label,
},
halt_label,
);
} else {
// Single row - populate registers directly
program.emit_insn(Insn::OpenWriteAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
populate_column_registers(
&mut program,
&values[0],
&column_mappings,
column_registers_start,
false,
rowid_reg,
)?;
}
program.resolve_label(jump_on_definition_label, program.offset());
program.emit_insn(Insn::OpenWriteAsync {
cursor_id,
root_page,
});
program.emit_insn(Insn::OpenWriteAwait {});
// Main loop
let record_register = program.alloc_register();
let halt_label = program.allocate_label();
let loop_start_offset = program.offset();
program.emit_insn_with_label_dependency(
Insn::Yield {
yield_reg,
end_offset: halt_label,
},
halt_label,
);
if table.has_rowid() {
let row_id_reg = column_registers_start;
if let Some(rowid_alias_column) = table.get_rowid_alias_column() {
let key_reg = column_registers_start + 1 + rowid_alias_column.0;
// copy key to rowid
// Common record insertion logic for both single and multiple rows
let check_rowid_is_integer_label = rowid_alias_reg.and(Some(program.allocate_label()));
if let Some(reg) = rowid_alias_reg {
// for the row record, the rowid alias column (INTEGER PRIMARY KEY) is always set to NULL
// and its value is copied to the rowid register. in the case where a single row is inserted,
// the value is written directly to the rowid register (see populate_column_registers()).
// again, not sure why this only happens in the single row case, but let's mimic sqlite.
// in the single row case we save a Copy instruction, but in the multiple rows case we do
// it here in the loop.
if inserting_multiple_rows {
program.emit_insn(Insn::Copy {
src_reg: key_reg,
dst_reg: row_id_reg,
amount: 0,
src_reg: reg,
dst_reg: rowid_reg,
amount: 0, // TODO: rename 'amount' to something else; amount==0 means 1
});
program.emit_insn(Insn::SoftNull { reg: key_reg });
// for the row record, the rowid alias column is always set to NULL
program.emit_insn(Insn::SoftNull { reg });
}
let notnull_label = program.allocate_label();
// the user provided rowid value might itself be NULL. If it is, we create a new rowid on the next instruction.
program.emit_insn_with_label_dependency(
Insn::NotNull {
reg: row_id_reg,
target_pc: notnull_label,
reg: rowid_reg,
target_pc: check_rowid_is_integer_label.unwrap(),
},
notnull_label,
check_rowid_is_integer_label.unwrap(),
);
program.emit_insn(Insn::NewRowid {
cursor: cursor_id,
rowid_reg: row_id_reg,
prev_largest_reg: 0,
});
}
program.resolve_label(notnull_label, program.offset());
program.emit_insn(Insn::MustBeInt { reg: row_id_reg });
// Create new rowid if a) not provided by user or b) provided by user but is NULL
program.emit_insn(Insn::NewRowid {
cursor: cursor_id,
rowid_reg: rowid_reg,
prev_largest_reg: 0,
});
if let Some(must_be_int_label) = check_rowid_is_integer_label {
program.resolve_label(must_be_int_label, program.offset());
// If the user provided a rowid, it must be an integer.
program.emit_insn(Insn::MustBeInt { reg: rowid_reg });
}
// Check uniqueness constraint for rowid if it was provided by user.
// When the DB allocates it there are no need for separate uniqueness checks.
if has_user_provided_rowid {
let make_record_label = program.allocate_label();
program.emit_insn_with_label_dependency(
Insn::NotExists {
cursor: cursor_id,
rowid_reg: row_id_reg,
rowid_reg: rowid_reg,
target_pc: make_record_label,
},
make_record_label,
);
// TODO: rollback
let rowid_column_name = if let Some(index) = rowid_alias_index {
table.column_index_to_name(index).unwrap()
} else {
"rowid"
};
program.emit_insn(Insn::Halt {
err_code: SQLITE_CONSTRAINT_PRIMARYKEY,
description: format!(
"{}.{}",
table.get_name(),
table.column_index_to_name(0).unwrap()
),
description: format!("{}.{}", table.get_name(), rowid_column_name),
});
program.resolve_label(make_record_label, program.offset());
program.emit_insn(Insn::MakeRecord {
start_reg: column_registers_start + 1,
count: num_cols - 1,
dest_reg: record_register,
});
program.emit_insn(Insn::InsertAsync {
cursor: cursor_id,
key_reg: column_registers_start,
record_reg: record_register,
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
}
program.emit_insn(Insn::Goto {
target_pc: loop_start_offset,
// Create and insert the record
program.emit_insn(Insn::MakeRecord {
start_reg: column_registers_start,
count: num_cols,
dest_reg: record_register,
});
program.emit_insn(Insn::InsertAsync {
cursor: cursor_id,
key_reg: rowid_reg,
record_reg: record_register,
flag: 0,
});
program.emit_insn(Insn::InsertAwait { cursor_id });
if inserting_multiple_rows {
// For multiple rows, loop back
program.emit_insn(Insn::Goto {
target_pc: loop_start_offset,
});
}
program.resolve_label(halt_label, program.offset());
program.emit_insn(Insn::Halt {
err_code: 0,
description: String::new(),
});
program.resolve_label(init_label, program.offset());
program.emit_insn(Insn::Transaction { write: true });
program.emit_constant_insns();

View File

@@ -386,7 +386,6 @@ fn update_pragma(
query_pragma("journal_mode", header, program)?;
Ok(())
}
_ => todo!("pragma `{name}`"),
}
}
@@ -403,7 +402,7 @@ fn query_pragma(
match pragma {
PragmaName::CacheSize => {
program.emit_insn(Insn::Integer {
value: database_header.borrow().default_cache_size.into(),
value: database_header.borrow().default_page_cache_size.into(),
dest: register,
});
}
@@ -413,9 +412,6 @@ fn query_pragma(
dest: register,
});
}
_ => {
todo!("pragma `{name}`");
}
}
program.emit_insn(Insn::ResultRow {
@@ -441,7 +437,7 @@ fn update_cache_size(value: i64, header: Rc<RefCell<DatabaseHeader>>, pager: Rc<
}
// update in-memory header
header.borrow_mut().default_cache_size = cache_size_unformatted
header.borrow_mut().default_page_cache_size = cache_size_unformatted
.try_into()
.unwrap_or_else(|_| panic!("invalid value, too big for a i32 {}", value));

View File

@@ -265,7 +265,7 @@ pub fn prepare_select_plan<'a>(schema: &Schema, select: ast::Select) -> Result<P
columns,
from,
where_clause,
mut group_by,
group_by,
..
} => {
let col_count = columns.len();

View File

@@ -387,6 +387,75 @@ pub struct OwnedRecord {
pub values: Vec<OwnedValue>,
}
const I8_LOW: i64 = -128;
const I8_HIGH: i64 = 127;
const I16_LOW: i64 = -32768;
const I16_HIGH: i64 = 32767;
const I24_LOW: i64 = -8388608;
const I24_HIGH: i64 = 8388607;
const I32_LOW: i64 = -2147483648;
const I32_HIGH: i64 = 2147483647;
const I48_LOW: i64 = -140737488355328;
const I48_HIGH: i64 = 140737488355327;
/// Sqlite Serial Types
/// https://www.sqlite.org/fileformat.html#record_format
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum SerialType {
Null,
I8,
I16,
I24,
I32,
I48,
I64,
F64,
Text { content_size: usize },
Blob { content_size: usize },
}
impl From<&OwnedValue> for SerialType {
fn from(value: &OwnedValue) -> Self {
match value {
OwnedValue::Null => SerialType::Null,
OwnedValue::Integer(i) => match i {
i if *i >= I8_LOW && *i <= I8_HIGH => SerialType::I8,
i if *i >= I16_LOW && *i <= I16_HIGH => SerialType::I16,
i if *i >= I24_LOW && *i <= I24_HIGH => SerialType::I24,
i if *i >= I32_LOW && *i <= I32_HIGH => SerialType::I32,
i if *i >= I48_LOW && *i <= I48_HIGH => SerialType::I48,
_ => SerialType::I64,
},
OwnedValue::Float(_) => SerialType::F64,
OwnedValue::Text(t) => SerialType::Text {
content_size: t.value.len(),
},
OwnedValue::Blob(b) => SerialType::Blob {
content_size: b.len(),
},
OwnedValue::Agg(_) => unreachable!(),
OwnedValue::Record(_) => unreachable!(),
}
}
}
impl From<SerialType> for u64 {
fn from(serial_type: SerialType) -> Self {
match serial_type {
SerialType::Null => 0,
SerialType::I8 => 1,
SerialType::I16 => 2,
SerialType::I24 => 3,
SerialType::I32 => 4,
SerialType::I48 => 5,
SerialType::I64 => 6,
SerialType::F64 => 7,
SerialType::Text { content_size } => (content_size * 2 + 13) as u64,
SerialType::Blob { content_size } => (content_size * 2 + 12) as u64,
}
}
}
impl OwnedRecord {
pub fn new(values: Vec<OwnedValue>) -> Self {
Self { values }
@@ -395,31 +464,32 @@ impl OwnedRecord {
pub fn serialize(&self, buf: &mut Vec<u8>) {
let initial_i = buf.len();
// write serial types
for value in &self.values {
let serial_type = match value {
OwnedValue::Null => 0,
OwnedValue::Integer(_) => 6, // for now let's only do i64
OwnedValue::Float(_) => 7,
OwnedValue::Text(t) => (t.value.len() * 2 + 13) as u64,
OwnedValue::Blob(b) => (b.len() * 2 + 12) as u64,
// not serializable values
OwnedValue::Agg(_) => unreachable!(),
OwnedValue::Record(_) => unreachable!(),
};
buf.resize(buf.len() + 9, 0); // Ensure space for varint
let serial_type = SerialType::from(value);
buf.resize(buf.len() + 9, 0); // Ensure space for varint (1-9 bytes in length)
let len = buf.len();
let n = write_varint(&mut buf[len - 9..], serial_type);
let n = write_varint(&mut buf[len - 9..], serial_type.into());
buf.truncate(buf.len() - 9 + n); // Remove unused bytes
}
let mut header_size = buf.len() - initial_i;
// write content
for value in &self.values {
// TODO: make integers and floats with smaller serial types
match value {
OwnedValue::Null => {}
OwnedValue::Integer(i) => buf.extend_from_slice(&i.to_be_bytes()),
OwnedValue::Integer(i) => {
let serial_type = SerialType::from(value);
match serial_type {
SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()),
SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()),
SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte
SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()),
SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes
SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()),
_ => unreachable!(),
}
}
OwnedValue::Float(f) => buf.extend_from_slice(&f.to_be_bytes()),
OwnedValue::Text(t) => buf.extend_from_slice(t.value.as_bytes()),
OwnedValue::Blob(b) => buf.extend_from_slice(b),
@@ -490,3 +560,206 @@ pub trait Cursor {
fn get_null_flag(&self) -> bool;
fn btree_create(&mut self, flags: usize) -> u32;
}
#[cfg(test)]
mod tests {
use super::*;
use std::rc::Rc;
#[test]
fn test_serialize_null() {
let record = OwnedRecord::new(vec![OwnedValue::Null]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert_eq!(header[0], header_length as u8);
// Second byte should be serial type for NULL
assert_eq!(header[1] as u64, u64::from(SerialType::Null));
// Check that the buffer is empty after the header
assert_eq!(buf.len(), header_length);
}
#[test]
fn test_serialize_integers() {
let record = OwnedRecord::new(vec![
OwnedValue::Integer(42), // Should use SERIAL_TYPE_I8
OwnedValue::Integer(1000), // Should use SERIAL_TYPE_I16
OwnedValue::Integer(1_000_000), // Should use SERIAL_TYPE_I24
OwnedValue::Integer(1_000_000_000), // Should use SERIAL_TYPE_I32
OwnedValue::Integer(1_000_000_000_000), // Should use SERIAL_TYPE_I48
OwnedValue::Integer(i64::MAX), // Should use SERIAL_TYPE_I64
]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert!(header[0] == header_length as u8); // Header should be larger than number of values
// Check that correct serial types were chosen
assert_eq!(header[1] as u64, u64::from(SerialType::I8));
assert_eq!(header[2] as u64, u64::from(SerialType::I16));
assert_eq!(header[3] as u64, u64::from(SerialType::I24));
assert_eq!(header[4] as u64, u64::from(SerialType::I32));
assert_eq!(header[5] as u64, u64::from(SerialType::I48));
assert_eq!(header[6] as u64, u64::from(SerialType::I64));
// test that the bytes after the header can be interpreted as the correct values
let mut cur_offset = header_length;
let i8_bytes = &buf[cur_offset..cur_offset + size_of::<i8>()];
cur_offset += size_of::<i8>();
let i16_bytes = &buf[cur_offset..cur_offset + size_of::<i16>()];
cur_offset += size_of::<i16>();
let i24_bytes = &buf[cur_offset..cur_offset + size_of::<i32>() - 1];
cur_offset += size_of::<i32>() - 1; // i24
let i32_bytes = &buf[cur_offset..cur_offset + size_of::<i32>()];
cur_offset += size_of::<i32>();
let i48_bytes = &buf[cur_offset..cur_offset + size_of::<i64>() - 2];
cur_offset += size_of::<i64>() - 2; // i48
let i64_bytes = &buf[cur_offset..cur_offset + size_of::<i64>()];
let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap());
let val_int16 = i16::from_be_bytes(i16_bytes.try_into().unwrap());
let mut leading_0 = vec![0];
leading_0.extend(i24_bytes);
let val_int24 = i32::from_be_bytes(leading_0.try_into().unwrap());
let val_int32 = i32::from_be_bytes(i32_bytes.try_into().unwrap());
let mut leading_00 = vec![0, 0];
leading_00.extend(i48_bytes);
let val_int48 = i64::from_be_bytes(leading_00.try_into().unwrap());
let val_int64 = i64::from_be_bytes(i64_bytes.try_into().unwrap());
assert_eq!(val_int8, 42);
assert_eq!(val_int16, 1000);
assert_eq!(val_int24, 1_000_000);
assert_eq!(val_int32, 1_000_000_000);
assert_eq!(val_int48, 1_000_000_000_000);
assert_eq!(val_int64, i64::MAX);
// assert correct size of buffer: header + values (bytes per value depends on serial type)
assert_eq!(
buf.len(),
header_length
+ size_of::<i8>()
+ size_of::<i16>()
+ (size_of::<i32>() - 1) // i24
+ size_of::<i32>()
+ (size_of::<i64>() - 2) // i48
+ size_of::<f64>()
);
}
#[test]
fn test_serialize_float() {
let record = OwnedRecord::new(vec![OwnedValue::Float(3.14159)]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert_eq!(header[0], header_length as u8);
// Second byte should be serial type for FLOAT
assert_eq!(header[1] as u64, u64::from(SerialType::F64));
// Check that the bytes after the header can be interpreted as the float
let float_bytes = &buf[header_length..header_length + size_of::<f64>()];
let float = f64::from_be_bytes(float_bytes.try_into().unwrap());
assert_eq!(float, 3.14159);
// Check that buffer length is correct
assert_eq!(buf.len(), header_length + size_of::<f64>());
}
#[test]
fn test_serialize_text() {
let text = Rc::new("hello".to_string());
let record = OwnedRecord::new(vec![OwnedValue::Text(LimboText::new(text.clone()))]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert_eq!(header[0], header_length as u8);
// Second byte should be serial type for TEXT, which is (len * 2 + 13)
assert_eq!(header[1], (5 * 2 + 13) as u8);
// Check the actual text bytes
assert_eq!(&buf[2..7], b"hello");
// Check that buffer length is correct
assert_eq!(buf.len(), header_length + text.len());
}
#[test]
fn test_serialize_blob() {
let blob = Rc::new(vec![1, 2, 3, 4, 5]);
let record = OwnedRecord::new(vec![OwnedValue::Blob(blob.clone())]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert_eq!(header[0], header_length as u8);
// Second byte should be serial type for BLOB, which is (len * 2 + 12)
assert_eq!(header[1], (5 * 2 + 12) as u8);
// Check the actual blob bytes
assert_eq!(&buf[2..7], &[1, 2, 3, 4, 5]);
// Check that buffer length is correct
assert_eq!(buf.len(), header_length + blob.len());
}
#[test]
fn test_serialize_mixed_types() {
let text = Rc::new("test".to_string());
let record = OwnedRecord::new(vec![
OwnedValue::Null,
OwnedValue::Integer(42),
OwnedValue::Float(3.14),
OwnedValue::Text(LimboText::new(text.clone())),
]);
let mut buf = Vec::new();
record.serialize(&mut buf);
let header_length = record.values.len() + 1;
let header = &buf[0..header_length];
// First byte should be header size
assert_eq!(header[0], header_length as u8);
// Second byte should be serial type for NULL
assert_eq!(header[1] as u64, u64::from(SerialType::Null));
// Third byte should be serial type for I8
assert_eq!(header[2] as u64, u64::from(SerialType::I8));
// Fourth byte should be serial type for F64
assert_eq!(header[3] as u64, u64::from(SerialType::F64));
// Fifth byte should be serial type for TEXT, which is (len * 2 + 13)
assert_eq!(header[4] as u64, (4 * 2 + 13) as u64);
// Check that the bytes after the header can be interpreted as the correct values
let mut cur_offset = header_length;
let i8_bytes = &buf[cur_offset..cur_offset + size_of::<i8>()];
cur_offset += size_of::<i8>();
let f64_bytes = &buf[cur_offset..cur_offset + size_of::<f64>()];
cur_offset += size_of::<f64>();
let text_bytes = &buf[cur_offset..cur_offset + text.len()];
let val_int8 = i8::from_be_bytes(i8_bytes.try_into().unwrap());
let val_float = f64::from_be_bytes(f64_bytes.try_into().unwrap());
let val_text = String::from_utf8(text_bytes.to_vec()).unwrap();
assert_eq!(val_int8, 42);
assert_eq!(val_float, 3.14);
assert_eq!(val_text, "test");
// Check that buffer length is correct
assert_eq!(
buf.len(),
header_length + size_of::<i8>() + size_of::<f64>() + text.len()
);
}
}

View File

@@ -4,7 +4,7 @@ use sqlite3_parser::ast::{Expr, FunctionTail, Literal};
use crate::{
schema::{self, Schema},
Result, RowResult, Rows, IO,
Result, Rows, StepResult, IO,
};
// https://sqlite.org/lang_keywords.html
@@ -27,7 +27,7 @@ pub fn parse_schema_rows(rows: Option<Rows>, schema: &mut Schema, io: Arc<dyn IO
if let Some(mut rows) = rows {
loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let ty = row.get::<&str>(0)?;
if ty != "table" && ty != "index" {
continue;
@@ -53,13 +53,14 @@ pub fn parse_schema_rows(rows: Option<Rows>, schema: &mut Schema, io: Arc<dyn IO
_ => continue,
}
}
RowResult::IO => {
StepResult::IO => {
// TODO: How do we ensure that the I/O we submitted to
// read the schema is actually complete?
io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => break,
}
}
}

View File

@@ -59,10 +59,6 @@ impl ProgramBuilder {
reg
}
pub fn next_free_register(&self) -> usize {
self.next_free_register
}
pub fn alloc_cursor_id(
&mut self,
table_identifier: Option<String>,

View File

@@ -1,5 +1,4 @@
use super::{Insn, InsnReference, OwnedValue, Program};
use crate::types::LimboText;
use std::rc::Rc;
pub fn insn_to_str(

View File

@@ -28,6 +28,7 @@ use crate::error::{LimboError, SQLITE_CONSTRAINT_PRIMARYKEY};
use crate::ext::{exec_ts_from_uuid7, exec_uuid, exec_uuidblob, exec_uuidstr, ExtFunc, UuidFunc};
use crate::function::{AggFunc, FuncCtx, MathFunc, MathFuncArity, ScalarFunc};
use crate::pseudo::PseudoCursor;
use crate::result::LimboResult;
use crate::schema::Table;
use crate::storage::sqlite3_ondisk::DatabaseHeader;
use crate::storage::{btree::BTreeCursor, pager::Pager};
@@ -36,10 +37,10 @@ use crate::types::{
};
use crate::util::parse_schema_rows;
#[cfg(feature = "json")]
use crate::{function::JsonFunc, json::get_json, json::json_array};
use crate::{function::JsonFunc, json::get_json, json::json_array, json::json_array_length};
use crate::{Connection, Result, TransactionState};
use crate::{Rows, DATABASE_VERSION};
use macros::Description;
use limbo_macros::Description;
use datetime::{exec_date, exec_time, exec_unixepoch};
@@ -545,6 +546,7 @@ pub enum StepResult<'a> {
IO,
Row(Record<'a>),
Interrupt,
Busy,
}
/// If there is I/O, the instruction is restarted.
@@ -1665,29 +1667,34 @@ impl Program {
}
Insn::Transaction { write } => {
let connection = self.connection.upgrade().unwrap();
if let Some(db) = connection.db.upgrade() {
// TODO(pere): are backpointers good ?? this looks ugly af
// upgrade transaction if needed
let new_transaction_state =
match (db.transaction_state.borrow().clone(), write) {
(crate::TransactionState::Write, true) => TransactionState::Write,
(crate::TransactionState::Write, false) => TransactionState::Write,
(crate::TransactionState::Read, true) => TransactionState::Write,
(crate::TransactionState::Read, false) => TransactionState::Read,
(crate::TransactionState::None, true) => TransactionState::Read,
(crate::TransactionState::None, false) => TransactionState::Read,
};
// TODO(Pere):
// 1. lock wal
// 2. lock shared
// 3. lock write db if write
db.transaction_state.replace(new_transaction_state.clone());
if matches!(new_transaction_state, TransactionState::Write) {
pager.begin_read_tx()?;
} else {
pager.begin_write_tx()?;
let current_state = connection.transaction_state.borrow().clone();
let (new_transaction_state, updated) = match (&current_state, write) {
(crate::TransactionState::Write, true) => (TransactionState::Write, false),
(crate::TransactionState::Write, false) => (TransactionState::Write, false),
(crate::TransactionState::Read, true) => (TransactionState::Write, true),
(crate::TransactionState::Read, false) => (TransactionState::Read, false),
(crate::TransactionState::None, true) => (TransactionState::Write, true),
(crate::TransactionState::None, false) => (TransactionState::Read, true),
};
if updated && matches!(current_state, TransactionState::None) {
if let LimboResult::Busy = pager.begin_read_tx()? {
log::trace!("begin_read_tx busy");
return Ok(StepResult::Busy);
}
}
if updated && matches!(new_transaction_state, TransactionState::Write) {
if let LimboResult::Busy = pager.begin_write_tx()? {
log::trace!("begin_write_tx busy");
return Ok(StepResult::Busy);
}
}
if updated {
connection
.transaction_state
.replace(new_transaction_state.clone());
}
state.pc += 1;
}
Insn::Goto { target_pc } => {
@@ -2282,6 +2289,21 @@ impl Program {
Err(e) => return Err(e),
}
}
#[cfg(feature = "json")]
crate::function::Func::Json(JsonFunc::JsonArrayLength) => {
let json_value = &state.registers[*start_reg];
let path_value = if arg_count > 1 {
Some(&state.registers[*start_reg + 1])
} else {
None
};
let json_array_length = json_array_length(json_value, path_value);
match json_array_length {
Ok(length) => state.registers[*dest] = length,
Err(e) => return Err(e),
}
}
crate::function::Func::Scalar(scalar_func) => match scalar_func {
ScalarFunc::Cast => {
assert!(arg_count == 2);

View File

@@ -1,5 +1,13 @@
# Copyright 2024 the Limbo authors. All rights reserved. MIT license.
[package]
name = "macros"
name = "limbo_macros"
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
description = "The Limbo database library"
[lib]
proc-macro = true

View File

@@ -38,11 +38,11 @@ fn main() {
loop {
let row = rows.next_row().unwrap();
match row {
limbo_core::RowResult::Row(_) => {
limbo_core::StepResult::Row(_) => {
count += 1;
}
limbo_core::RowResult::IO => yield,
limbo_core::RowResult::Done => break,
limbo_core::StepResult::IO => yield,
limbo_core::StepResult::Done => break,
}
}
assert!(count == 100);

View File

@@ -22,3 +22,4 @@ log = "0.4.20"
tempfile = "3.0.7"
env_logger = "0.10.1"
anarchist-readable-name-generator-lib = "0.1.2"
clap = { version = "4.5", features = ["derive"] }

View File

@@ -1,6 +1,6 @@
use std::{fmt::Display, rc::Rc};
use limbo_core::{Connection, Result, RowResult};
use limbo_core::{Connection, Result, StepResult};
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
@@ -106,7 +106,7 @@ impl Interactions {
.iter_mut()
.find(|t| t.name == insert.table)
.unwrap();
table.rows.push(insert.values.clone());
table.rows.extend(insert.values.clone());
}
Query::Delete(_) => todo!(),
Query::Select(_) => {}
@@ -215,7 +215,7 @@ impl Interaction {
let mut out = Vec::new();
while let Ok(row) = rows.next_row() {
match row {
RowResult::Row(row) => {
StepResult::Row(row) => {
let mut r = Vec::new();
for el in &row.values {
let v = match el {
@@ -230,11 +230,12 @@ impl Interaction {
out.push(r);
}
RowResult::IO => {}
RowResult::Interrupt => {}
RowResult::Done => {
StepResult::IO => {}
StepResult::Interrupt => {}
StepResult::Done => {
break;
}
StepResult::Busy => {}
}
}
@@ -319,7 +320,7 @@ fn property_insert_select<R: rand::Rng>(rng: &mut R, env: &SimulatorEnv) -> Inte
// Insert the row
let insert_query = Interaction::Query(Query::Insert(Insert {
table: table.name.clone(),
values: row.clone(),
values: vec![row.clone()],
}));
// Select the row

View File

@@ -37,10 +37,15 @@ impl ArbitraryFrom<Vec<&Table>> for Select {
impl ArbitraryFrom<Table> for Insert {
fn arbitrary_from<R: Rng>(rng: &mut R, table: &Table) -> Self {
let values = table
.columns
.iter()
.map(|c| Value::arbitrary_from(rng, &c.column_type))
let num_rows = rng.gen_range(1..10);
let values: Vec<Vec<Value>> = (0..num_rows)
.map(|_| {
table
.columns
.iter()
.map(|c| Value::arbitrary_from(rng, &c.column_type))
.collect()
})
.collect();
Insert {
table: table.name.clone(),

View File

@@ -1,58 +1,140 @@
use clap::Parser;
use generation::plan::{Interaction, InteractionPlan, ResultSet};
use generation::{pick, pick_index, Arbitrary, ArbitraryFrom};
use limbo_core::{Connection, Database, File, OpenFlags, PlatformIO, Result, RowResult, IO};
use model::query::{Create, Insert, Predicate, Query, Select};
use model::table::{Column, Name, Table, Value};
use properties::{property_insert_select, property_select_all};
use generation::{pick_index, ArbitraryFrom};
use limbo_core::{Connection, Database, Result, StepResult, IO};
use model::table::Value;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use std::cell::RefCell;
use runner::cli::SimulatorCLI;
use runner::env::{SimConnection, SimulatorEnv, SimulatorOpts};
use runner::io::SimulatorIO;
use std::backtrace::Backtrace;
use std::io::Write;
use std::path::Path;
use std::rc::Rc;
use std::sync::Arc;
use tempfile::TempDir;
mod generation;
mod model;
mod properties;
struct SimulatorEnv {
opts: SimulatorOpts,
tables: Vec<Table>,
connections: Vec<SimConnection>,
io: Arc<SimulatorIO>,
db: Arc<Database>,
rng: ChaCha8Rng,
}
#[derive(Clone)]
enum SimConnection {
Connected(Rc<Connection>),
Disconnected,
}
#[derive(Debug, Clone)]
struct SimulatorOpts {
ticks: usize,
max_connections: usize,
max_tables: usize,
// this next options are the distribution of workload where read_percent + write_percent +
// delete_percent == 100%
read_percent: usize,
write_percent: usize,
delete_percent: usize,
max_interactions: usize,
page_size: usize,
}
mod runner;
#[allow(clippy::arc_with_non_send_sync)]
fn main() {
let _ = env_logger::try_init();
let seed = match std::env::var("SEED") {
Ok(seed) => seed.parse::<u64>().unwrap(),
Err(_) => rand::thread_rng().next_u64(),
let cli_opts = SimulatorCLI::parse();
let seed = match cli_opts.seed {
Some(seed) => seed,
None => rand::thread_rng().next_u64(),
};
println!("Seed: {}", seed);
let output_dir = match &cli_opts.output_dir {
Some(dir) => Path::new(dir).to_path_buf(),
None => TempDir::new().unwrap().into_path(),
};
let db_path = output_dir.join("simulator.db");
let plan_path = output_dir.join("simulator.plan");
// Print the seed, the locations of the database and the plan file
log::info!("database path: {:?}", db_path);
log::info!("simulator plan path: {:?}", plan_path);
log::info!("seed: {}", seed);
std::panic::set_hook(Box::new(move |info| {
log::error!("panic occurred");
let payload = info.payload();
if let Some(s) = payload.downcast_ref::<&str>() {
log::error!("{}", s);
} else if let Some(s) = payload.downcast_ref::<String>() {
log::error!("{}", s);
} else {
log::error!("unknown panic payload");
}
let bt = Backtrace::force_capture();
log::error!("captured backtrace:\n{}", bt);
}));
let result = std::panic::catch_unwind(|| run_simulation(seed, &cli_opts, &db_path, &plan_path));
if cli_opts.doublecheck {
// Move the old database and plan file to a new location
let old_db_path = db_path.with_extension("_old.db");
let old_plan_path = plan_path.with_extension("_old.plan");
std::fs::rename(&db_path, &old_db_path).unwrap();
std::fs::rename(&plan_path, &old_plan_path).unwrap();
// Run the simulation again
let result2 =
std::panic::catch_unwind(|| run_simulation(seed, &cli_opts, &db_path, &plan_path));
match (result, result2) {
(Ok(Ok(_)), Err(_)) => {
log::error!("doublecheck failed! first run succeeded, but second run panicked.");
}
(Ok(Err(_)), Err(_)) => {
log::error!(
"doublecheck failed! first run failed assertion, but second run panicked."
);
}
(Err(_), Ok(Ok(_))) => {
log::error!("doublecheck failed! first run panicked, but second run succeeded.");
}
(Err(_), Ok(Err(_))) => {
log::error!(
"doublecheck failed! first run panicked, but second run failed assertion."
);
}
(Ok(Ok(_)), Ok(Err(_))) => {
log::error!(
"doublecheck failed! first run succeeded, but second run failed assertion."
);
}
(Ok(Err(_)), Ok(Ok(_))) => {
log::error!(
"doublecheck failed! first run failed assertion, but second run succeeded."
);
}
(Err(_), Err(_)) | (Ok(_), Ok(_)) => {
// Compare the two database files byte by byte
let old_db = std::fs::read(&old_db_path).unwrap();
let new_db = std::fs::read(&db_path).unwrap();
if old_db != new_db {
log::error!("doublecheck failed! database files are different.");
} else {
log::info!("doublecheck succeeded! database files are the same.");
}
}
}
// Move the new database and plan file to a new location
let new_db_path = db_path.with_extension("_double.db");
let new_plan_path = plan_path.with_extension("_double.plan");
std::fs::rename(&db_path, &new_db_path).unwrap();
std::fs::rename(&plan_path, &new_plan_path).unwrap();
// Move the old database and plan file back
std::fs::rename(&old_db_path, &db_path).unwrap();
std::fs::rename(&old_plan_path, &plan_path).unwrap();
}
// Print the seed, the locations of the database and the plan file at the end again for easily accessing them.
println!("database path: {:?}", db_path);
println!("simulator plan path: {:?}", plan_path);
println!("seed: {}", seed);
}
fn run_simulation(
seed: u64,
cli_opts: &SimulatorCLI,
db_path: &Path,
plan_path: &Path,
) -> Result<()> {
let mut rng = ChaCha8Rng::seed_from_u64(seed);
let (read_percent, write_percent, delete_percent) = {
@@ -65,8 +147,12 @@ fn main() {
(read_percent, write_percent, delete_percent)
};
if cli_opts.maximum_size < 1 {
panic!("maximum size must be at least 1");
}
let opts = SimulatorOpts {
ticks: rng.gen_range(0..10240),
ticks: rng.gen_range(1..=cli_opts.maximum_size),
max_connections: 1, // TODO: for now let's use one connection as we didn't implement
// correct transactions procesing
max_tables: rng.gen_range(0..128),
@@ -74,21 +160,19 @@ fn main() {
write_percent,
delete_percent,
page_size: 4096, // TODO: randomize this too
max_interactions: rng.gen_range(0..10240),
max_interactions: rng.gen_range(1..=cli_opts.maximum_size),
};
let io = Arc::new(SimulatorIO::new(seed, opts.page_size).unwrap());
let mut path = TempDir::new().unwrap().into_path();
path.push("simulator.db");
println!("path to db '{:?}'", path);
let db = match Database::open_file(io.clone(), path.as_path().to_str().unwrap()) {
let db = match Database::open_file(io.clone(), db_path.to_str().unwrap()) {
Ok(db) => db,
Err(e) => {
panic!("error opening simulator test file {:?}: {:?}", path, e);
panic!("error opening simulator test file {:?}: {:?}", db_path, e);
}
};
let connections = vec![SimConnection::Disconnected; opts.max_connections];
let mut env = SimulatorEnv {
opts,
tables: Vec::new(),
@@ -98,30 +182,29 @@ fn main() {
db,
};
println!("Initial opts {:?}", env.opts);
log::info!("Generating database interaction plan...");
let mut plans = (1..=env.opts.max_connections)
.map(|_| InteractionPlan::arbitrary_from(&mut env.rng.clone(), &env))
.collect::<Vec<_>>();
let mut f = std::fs::File::create(plan_path).unwrap();
// todo: create a detailed plan file with all the plans. for now, we only use 1 connection, so it's safe to use the first plan.
f.write(plans[0].to_string().as_bytes()).unwrap();
log::info!("{}", plans[0].stats());
log::info!("Executing database interaction plan...");
let result = execute_plans(&mut env, &mut plans);
let result = execute_plans(&mut env, &mut plans);
if result.is_err() {
log::error!("error executing plans: {:?}", result.err());
log::error!("error executing plans: {:?}", result.as_ref().err());
}
log::info!("db is at {:?}", path);
let mut path = TempDir::new().unwrap().into_path();
path.push("simulator.plan");
let mut f = std::fs::File::create(path.clone()).unwrap();
f.write(plans[0].to_string().as_bytes()).unwrap();
log::info!("plan saved at {:?}", path);
log::info!("seed was {}", seed);
env.io.print_stats();
log::info!("Simulation completed");
result
}
fn execute_plans(env: &mut SimulatorEnv, plans: &mut Vec<InteractionPlan>) -> Result<()> {
@@ -208,270 +291,3 @@ fn compare_equal_rows(a: &[Vec<Value>], b: &[Vec<Value>]) {
}
}
}
fn maybe_add_table(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) -> Result<()> {
if env.tables.len() < env.opts.max_tables {
let table = Table {
rows: Vec::new(),
name: Name::arbitrary(&mut env.rng).0,
columns: (1..env.rng.gen_range(1..128))
.map(|_| Column::arbitrary(&mut env.rng))
.collect(),
};
let query = Query::Create(Create {
table: table.clone(),
});
let rows = get_all_rows(env, conn, query.to_string().as_str())?;
log::debug!("{:?}", rows);
let rows = get_all_rows(
env,
conn,
format!(
"SELECT sql FROM sqlite_schema WHERE type IN ('table', 'index') AND name = '{}';",
table.name
)
.as_str(),
)?;
log::debug!("{:?}", rows);
assert!(rows.len() == 1);
let as_text = match &rows[0][0] {
Value::Text(t) => t,
_ => unreachable!(),
};
assert!(
*as_text != query.to_string(),
"table was not inserted correctly"
);
env.tables.push(table);
}
Ok(())
}
fn get_all_rows(
env: &mut SimulatorEnv,
conn: &mut Rc<Connection>,
query: &str,
) -> Result<Vec<Vec<Value>>> {
log::info!("running query '{}'", &query[0..query.len().min(4096)]);
let mut out = Vec::new();
let rows = conn.query(query);
if rows.is_err() {
let err = rows.err();
log::error!(
"Error running query '{}': {:?}",
&query[0..query.len().min(4096)],
err
);
return Err(err.unwrap());
}
let rows = rows.unwrap();
assert!(rows.is_some());
let mut rows = rows.unwrap();
'rows_loop: loop {
env.io.inject_fault(env.rng.gen_ratio(1, 10000));
match rows.next_row()? {
RowResult::Row(row) => {
let mut r = Vec::new();
for el in &row.values {
let v = match el {
limbo_core::Value::Null => Value::Null,
limbo_core::Value::Integer(i) => Value::Integer(*i),
limbo_core::Value::Float(f) => Value::Float(*f),
limbo_core::Value::Text(t) => Value::Text(t.to_string()),
limbo_core::Value::Blob(b) => Value::Blob(b.to_vec()),
};
r.push(v);
}
out.push(r);
}
RowResult::IO => {
env.io.inject_fault(env.rng.gen_ratio(1, 10000));
if env.io.run_once().is_err() {
log::info!("query inject fault");
break 'rows_loop;
}
}
RowResult::Interrupt => {
break;
}
RowResult::Done => {
break;
}
}
}
Ok(out)
}
struct SimulatorIO {
inner: Box<dyn IO>,
fault: RefCell<bool>,
files: RefCell<Vec<Rc<SimulatorFile>>>,
rng: RefCell<ChaCha8Rng>,
nr_run_once_faults: RefCell<usize>,
page_size: usize,
}
impl SimulatorIO {
fn new(seed: u64, page_size: usize) -> Result<Self> {
let inner = Box::new(PlatformIO::new()?);
let fault = RefCell::new(false);
let files = RefCell::new(Vec::new());
let rng = RefCell::new(ChaCha8Rng::seed_from_u64(seed));
let nr_run_once_faults = RefCell::new(0);
Ok(Self {
inner,
fault,
files,
rng,
nr_run_once_faults,
page_size,
})
}
fn inject_fault(&self, fault: bool) {
self.fault.replace(fault);
for file in self.files.borrow().iter() {
file.inject_fault(fault);
}
}
fn print_stats(&self) {
println!("run_once faults: {}", self.nr_run_once_faults.borrow());
for file in self.files.borrow().iter() {
file.print_stats();
}
}
}
impl IO for SimulatorIO {
fn open_file(
&self,
path: &str,
flags: OpenFlags,
_direct: bool,
) -> Result<Rc<dyn limbo_core::File>> {
let inner = self.inner.open_file(path, flags, false)?;
let file = Rc::new(SimulatorFile {
inner,
fault: RefCell::new(false),
nr_pread_faults: RefCell::new(0),
nr_pwrite_faults: RefCell::new(0),
reads: RefCell::new(0),
writes: RefCell::new(0),
syncs: RefCell::new(0),
page_size: self.page_size,
});
self.files.borrow_mut().push(file.clone());
Ok(file)
}
fn run_once(&self) -> Result<()> {
if *self.fault.borrow() {
*self.nr_run_once_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.run_once().unwrap();
Ok(())
}
fn generate_random_number(&self) -> i64 {
self.rng.borrow_mut().next_u64() as i64
}
fn get_current_time(&self) -> String {
"2024-01-01 00:00:00".to_string()
}
}
struct SimulatorFile {
inner: Rc<dyn File>,
fault: RefCell<bool>,
nr_pread_faults: RefCell<usize>,
nr_pwrite_faults: RefCell<usize>,
writes: RefCell<usize>,
reads: RefCell<usize>,
syncs: RefCell<usize>,
page_size: usize,
}
impl SimulatorFile {
fn inject_fault(&self, fault: bool) {
self.fault.replace(fault);
}
fn print_stats(&self) {
println!(
"pread faults: {}, pwrite faults: {}, reads: {}, writes: {}, syncs: {}",
*self.nr_pread_faults.borrow(),
*self.nr_pwrite_faults.borrow(),
*self.reads.borrow(),
*self.writes.borrow(),
*self.syncs.borrow(),
);
}
}
impl limbo_core::File for SimulatorFile {
fn lock_file(&self, exclusive: bool) -> Result<()> {
if *self.fault.borrow() {
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.lock_file(exclusive)
}
fn unlock_file(&self) -> Result<()> {
if *self.fault.borrow() {
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.unlock_file()
}
fn pread(&self, pos: usize, c: Rc<limbo_core::Completion>) -> Result<()> {
if *self.fault.borrow() {
*self.nr_pread_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
*self.reads.borrow_mut() += 1;
self.inner.pread(pos, c)
}
fn pwrite(
&self,
pos: usize,
buffer: Rc<std::cell::RefCell<limbo_core::Buffer>>,
c: Rc<limbo_core::Completion>,
) -> Result<()> {
if *self.fault.borrow() {
*self.nr_pwrite_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
*self.writes.borrow_mut() += 1;
self.inner.pwrite(pos, buffer, c)
}
fn sync(&self, c: Rc<limbo_core::Completion>) -> Result<()> {
*self.syncs.borrow_mut() += 1;
self.inner.sync(c)
}
fn size(&self) -> Result<u64> {
self.inner.size()
}
}
impl Drop for SimulatorFile {
fn drop(&mut self) {
self.inner.unlock_file().expect("Failed to unlock file");
}
}

View File

@@ -75,7 +75,7 @@ pub(crate) struct Select {
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct Insert {
pub(crate) table: String,
pub(crate) values: Vec<Value>,
pub(crate) values: Vec<Vec<Value>>,
}
#[derive(Clone, Debug, PartialEq)]
@@ -104,14 +104,21 @@ impl Display for Query {
predicate: guard,
}) => write!(f, "SELECT * FROM {} WHERE {}", table, guard),
Query::Insert(Insert { table, values }) => {
write!(f, "INSERT INTO {} VALUES (", table)?;
for (i, v) in values.iter().enumerate() {
write!(f, "INSERT INTO {} VALUES ", table)?;
for (i, row) in values.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
write!(f, "{}", v)?;
write!(f, "(")?;
for (j, value) in row.iter().enumerate() {
if j != 0 {
write!(f, ", ")?;
}
write!(f, "{}", value)?;
}
write!(f, ")")?;
}
write!(f, ")")
Ok(())
}
Query::Delete(Delete {
table,

View File

@@ -1,78 +0,0 @@
use std::rc::Rc;
use limbo_core::Connection;
use rand::Rng;
use crate::{
compare_equal_rows,
generation::ArbitraryFrom,
get_all_rows,
model::{
query::{Insert, Predicate, Query, Select},
table::Value,
},
SimulatorEnv,
};
pub fn property_insert_select(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) {
// Get a random table
let table = env.rng.gen_range(0..env.tables.len());
// Pick a random column
let column_index = env.rng.gen_range(0..env.tables[table].columns.len());
let column = &env.tables[table].columns[column_index].clone();
let mut rng = env.rng.clone();
// Generate a random value of the column type
let value = Value::arbitrary_from(&mut rng, &column.column_type);
// Create a whole new row
let mut row = Vec::new();
for (i, column) in env.tables[table].columns.iter().enumerate() {
if i == column_index {
row.push(value.clone());
} else {
let value = Value::arbitrary_from(&mut rng, &column.column_type);
row.push(value);
}
}
// Insert the row
let query = Query::Insert(Insert {
table: env.tables[table].name.clone(),
values: row.clone(),
});
let _ = get_all_rows(env, conn, query.to_string().as_str()).unwrap();
// Shadow operation on the table
env.tables[table].rows.push(row.clone());
// Create a query that selects the row
let query = Query::Select(Select {
table: env.tables[table].name.clone(),
predicate: Predicate::Eq(column.name.clone(), value),
});
// Get all rows
let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap();
// Check that the row is there
assert!(rows.iter().any(|r| r == &row));
}
pub fn property_select_all(env: &mut SimulatorEnv, conn: &mut Rc<Connection>) {
// Get a random table
let table = env.rng.gen_range(0..env.tables.len());
// Create a query that selects all rows
let query = Query::Select(Select {
table: env.tables[table].name.clone(),
predicate: Predicate::And(Vec::new()),
});
// Get all rows
let rows = get_all_rows(env, conn, query.to_string().as_str()).unwrap();
// Make sure the rows are the same
compare_equal_rows(&rows, &env.tables[table].rows);
}

24
simulator/runner/cli.rs Normal file
View File

@@ -0,0 +1,24 @@
use clap::{command, Parser};
#[derive(Parser)]
#[command(name = "limbo-simulator")]
#[command(author, version, about, long_about = None)]
pub struct SimulatorCLI {
#[clap(short, long, help = "set seed for reproducible runs", default_value = None)]
pub seed: Option<u64>,
#[clap(short, long, help = "set custom output directory for produced files", default_value = None)]
pub output_dir: Option<String>,
#[clap(
short,
long,
help = "enable doublechecking, run the simulator with the plan twice and check output equality"
)]
pub doublecheck: bool,
#[clap(
short,
long,
help = "change the maximum size of the randomly generated sequence of interactions",
default_value_t = 1024
)]
pub maximum_size: usize,
}

38
simulator/runner/env.rs Normal file
View File

@@ -0,0 +1,38 @@
use std::rc::Rc;
use std::sync::Arc;
use limbo_core::{Connection, Database};
use rand_chacha::ChaCha8Rng;
use crate::model::table::Table;
use crate::runner::io::SimulatorIO;
pub(crate) struct SimulatorEnv {
pub(crate) opts: SimulatorOpts,
pub(crate) tables: Vec<Table>,
pub(crate) connections: Vec<SimConnection>,
pub(crate) io: Arc<SimulatorIO>,
pub(crate) db: Arc<Database>,
pub(crate) rng: ChaCha8Rng,
}
#[derive(Clone)]
pub(crate) enum SimConnection {
Connected(Rc<Connection>),
Disconnected,
}
#[derive(Debug, Clone)]
pub(crate) struct SimulatorOpts {
pub(crate) ticks: usize,
pub(crate) max_connections: usize,
pub(crate) max_tables: usize,
// this next options are the distribution of workload where read_percent + write_percent +
// delete_percent == 100%
pub(crate) read_percent: usize,
pub(crate) write_percent: usize,
pub(crate) delete_percent: usize,
pub(crate) max_interactions: usize,
pub(crate) page_size: usize,
}

93
simulator/runner/file.rs Normal file
View File

@@ -0,0 +1,93 @@
use std::{cell::RefCell, rc::Rc};
use limbo_core::{File, Result};
pub(crate) struct SimulatorFile {
pub(crate) inner: Rc<dyn File>,
pub(crate) fault: RefCell<bool>,
pub(crate) nr_pread_faults: RefCell<usize>,
pub(crate) nr_pwrite_faults: RefCell<usize>,
pub(crate) writes: RefCell<usize>,
pub(crate) reads: RefCell<usize>,
pub(crate) syncs: RefCell<usize>,
pub(crate) page_size: usize,
}
impl SimulatorFile {
pub(crate) fn inject_fault(&self, fault: bool) {
self.fault.replace(fault);
}
pub(crate) fn print_stats(&self) {
println!(
"pread faults: {}, pwrite faults: {}, reads: {}, writes: {}, syncs: {}",
*self.nr_pread_faults.borrow(),
*self.nr_pwrite_faults.borrow(),
*self.reads.borrow(),
*self.writes.borrow(),
*self.syncs.borrow(),
);
}
}
impl limbo_core::File for SimulatorFile {
fn lock_file(&self, exclusive: bool) -> Result<()> {
if *self.fault.borrow() {
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.lock_file(exclusive)
}
fn unlock_file(&self) -> Result<()> {
if *self.fault.borrow() {
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.unlock_file()
}
fn pread(&self, pos: usize, c: Rc<limbo_core::Completion>) -> Result<()> {
if *self.fault.borrow() {
*self.nr_pread_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
*self.reads.borrow_mut() += 1;
self.inner.pread(pos, c)
}
fn pwrite(
&self,
pos: usize,
buffer: Rc<std::cell::RefCell<limbo_core::Buffer>>,
c: Rc<limbo_core::Completion>,
) -> Result<()> {
if *self.fault.borrow() {
*self.nr_pwrite_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
*self.writes.borrow_mut() += 1;
self.inner.pwrite(pos, buffer, c)
}
fn sync(&self, c: Rc<limbo_core::Completion>) -> Result<()> {
*self.syncs.borrow_mut() += 1;
self.inner.sync(c)
}
fn size(&self) -> Result<u64> {
self.inner.size()
}
}
impl Drop for SimulatorFile {
fn drop(&mut self) {
self.inner.unlock_file().expect("Failed to unlock file");
}
}

90
simulator/runner/io.rs Normal file
View File

@@ -0,0 +1,90 @@
use std::{cell::RefCell, rc::Rc};
use limbo_core::{OpenFlags, PlatformIO, Result, IO};
use rand::{RngCore, SeedableRng};
use rand_chacha::ChaCha8Rng;
use crate::runner::file::SimulatorFile;
pub(crate) struct SimulatorIO {
pub(crate) inner: Box<dyn IO>,
pub(crate) fault: RefCell<bool>,
pub(crate) files: RefCell<Vec<Rc<SimulatorFile>>>,
pub(crate) rng: RefCell<ChaCha8Rng>,
pub(crate) nr_run_once_faults: RefCell<usize>,
pub(crate) page_size: usize,
}
impl SimulatorIO {
pub(crate) fn new(seed: u64, page_size: usize) -> Result<Self> {
let inner = Box::new(PlatformIO::new()?);
let fault = RefCell::new(false);
let files = RefCell::new(Vec::new());
let rng = RefCell::new(ChaCha8Rng::seed_from_u64(seed));
let nr_run_once_faults = RefCell::new(0);
Ok(Self {
inner,
fault,
files,
rng,
nr_run_once_faults,
page_size,
})
}
pub(crate) fn inject_fault(&self, fault: bool) {
self.fault.replace(fault);
for file in self.files.borrow().iter() {
file.inject_fault(fault);
}
}
pub(crate) fn print_stats(&self) {
println!("run_once faults: {}", self.nr_run_once_faults.borrow());
for file in self.files.borrow().iter() {
file.print_stats();
}
}
}
impl IO for SimulatorIO {
fn open_file(
&self,
path: &str,
flags: OpenFlags,
_direct: bool,
) -> Result<Rc<dyn limbo_core::File>> {
let inner = self.inner.open_file(path, flags, false)?;
let file = Rc::new(SimulatorFile {
inner,
fault: RefCell::new(false),
nr_pread_faults: RefCell::new(0),
nr_pwrite_faults: RefCell::new(0),
reads: RefCell::new(0),
writes: RefCell::new(0),
syncs: RefCell::new(0),
page_size: self.page_size,
});
self.files.borrow_mut().push(file.clone());
Ok(file)
}
fn run_once(&self) -> Result<()> {
if *self.fault.borrow() {
*self.nr_run_once_faults.borrow_mut() += 1;
return Err(limbo_core::LimboError::InternalError(
"Injected fault".into(),
));
}
self.inner.run_once().unwrap();
Ok(())
}
fn generate_random_number(&self) -> i64 {
self.rng.borrow_mut().next_u64() as i64
}
fn get_current_time(&self) -> String {
"2024-01-01 00:00:00".to_string()
}
}

4
simulator/runner/mod.rs Normal file
View File

@@ -0,0 +1,4 @@
pub mod cli;
pub mod env;
pub mod file;
pub mod io;

View File

@@ -239,13 +239,14 @@ pub unsafe extern "C" fn sqlite3_step(stmt: *mut sqlite3_stmt) -> std::ffi::c_in
let stmt = &mut *stmt;
if let Ok(result) = stmt.stmt.step() {
match result {
limbo_core::RowResult::IO => SQLITE_BUSY,
limbo_core::RowResult::Done => SQLITE_DONE,
limbo_core::RowResult::Interrupt => SQLITE_INTERRUPT,
limbo_core::RowResult::Row(row) => {
limbo_core::StepResult::IO => SQLITE_BUSY,
limbo_core::StepResult::Done => SQLITE_DONE,
limbo_core::StepResult::Interrupt => SQLITE_INTERRUPT,
limbo_core::StepResult::Row(row) => {
stmt.row.replace(Some(row));
SQLITE_ROW
}
limbo_core::StepResult::Busy => SQLITE_BUSY,
}
} else {
SQLITE_ERROR

View File

@@ -40,7 +40,7 @@ impl TempDatabase {
#[cfg(test)]
mod tests {
use super::*;
use limbo_core::{CheckpointStatus, Connection, RowResult, Value};
use limbo_core::{CheckpointStatus, Connection, StepResult, Value};
use log::debug;
#[ignore]
@@ -63,10 +63,10 @@ mod tests {
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -80,7 +80,7 @@ mod tests {
match conn.query(list_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let first_value = row.values.first().expect("missing id");
let id = match first_value {
Value::Integer(i) => *i as i32,
@@ -90,11 +90,14 @@ mod tests {
assert_eq!(current_read_index, id);
current_read_index += 1;
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => {
panic!("Database is busy");
}
}
},
Ok(None) => {}
@@ -124,10 +127,10 @@ mod tests {
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -143,7 +146,7 @@ mod tests {
match conn.query(list_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let first_value = &row.values[0];
let text = &row.values[1];
let id = match first_value {
@@ -158,11 +161,12 @@ mod tests {
assert_eq!(1, id);
compare_string(&huge_text, text);
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => unreachable!(),
}
},
Ok(None) => {}
@@ -196,10 +200,10 @@ mod tests {
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -215,7 +219,7 @@ mod tests {
match conn.query(list_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let first_value = &row.values[0];
let text = &row.values[1];
let id = match first_value {
@@ -232,11 +236,12 @@ mod tests {
compare_string(huge_text, text);
current_index += 1;
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => unreachable!(),
}
},
Ok(None) => {}
@@ -264,10 +269,10 @@ mod tests {
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -285,7 +290,7 @@ mod tests {
match conn.query(list_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let first_value = &row.values[0];
let id = match first_value {
Value::Integer(i) => *i as i32,
@@ -295,11 +300,12 @@ mod tests {
assert_eq!(current_index, id as usize);
current_index += 1;
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => unreachable!(),
}
},
Ok(None) => {}
@@ -323,10 +329,10 @@ mod tests {
match conn.query(insert_query) {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -347,7 +353,7 @@ mod tests {
if let Some(ref mut rows) = conn.query(list_query).unwrap() {
loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
let first_value = &row.values[0];
let count = match first_value {
Value::Integer(i) => *i as i32,
@@ -356,11 +362,12 @@ mod tests {
log::debug!("counted {}", count);
return Ok(count as usize);
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => panic!("Database is busy"),
}
}
}
@@ -429,10 +436,10 @@ mod tests {
if let Some(ref mut rows) = insert_query {
loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
}
@@ -443,16 +450,17 @@ mod tests {
if let Some(ref mut rows) = select_query {
loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
if let Value::Integer(id) = row.values[0] {
assert_eq!(id, 1, "First insert should have rowid 1");
}
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => panic!("Database is busy"),
}
}
}
@@ -461,10 +469,10 @@ mod tests {
match conn.query("INSERT INTO test_rowid (id, val) VALUES (5, 'test2')") {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Done => break,
StepResult::Done => break,
_ => unreachable!(),
}
},
@@ -477,16 +485,17 @@ mod tests {
match conn.query("SELECT last_insert_rowid()") {
Ok(Some(ref mut rows)) => loop {
match rows.next_row()? {
RowResult::Row(row) => {
StepResult::Row(row) => {
if let Value::Integer(id) = row.values[0] {
last_id = id;
}
}
RowResult::IO => {
StepResult::IO => {
tmp_db.io.run_once()?;
}
RowResult::Interrupt => break,
RowResult::Done => break,
StepResult::Interrupt => break,
StepResult::Done => break,
StepResult::Busy => panic!("Database is busy"),
}
},
Ok(None) => {}

View File

@@ -83,3 +83,35 @@ do_execsql_test json_array_json {
do_execsql_test json_array_nested {
SELECT json_array(json_array(1,2,3), json('[1,2,3]'), '[1,2,3]')
} {{[[1,2,3],[1,2,3],"[1,2,3]"]}}
do_execsql_test json_array_length {
SELECT json_array_length('[1,2,3,4]');
} {{4}}
do_execsql_test json_array_length_empty {
SELECT json_array_length('[]');
} {{0}}
do_execsql_test json_array_length_root {
SELECT json_array_length('[1,2,3,4]', '$');
} {{4}}
do_execsql_test json_array_length_not_array {
SELECT json_array_length('{"one":[1,2,3]}');
} {{0}}
do_execsql_test json_array_length_via_prop {
SELECT json_array_length('{"one":[1,2,3]}', '$.one');
} {{3}}
do_execsql_test json_array_length_via_index {
SELECT json_array_length('[[1,2,3,4]]', '$[0]');
} {{4}}
do_execsql_test json_array_length_via_index_not_array {
SELECT json_array_length('[1,2,3,4]', '$[2]');
} {{0}}
do_execsql_test json_array_length_via_bad_prop {
SELECT json_array_length('{"one":[1,2,3]}', '$.two');
} {{}}