From bf37fd33140bbdb6f2aa7fb206963fb1ace72e1e Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 13:24:34 +0100 Subject: [PATCH 01/22] wip --- bindings/go/rs_src/types.rs | 16 +- bindings/java/rs_src/limbo_statement.rs | 10 +- bindings/javascript/src/lib.rs | 14 +- bindings/python/src/lib.rs | 12 +- bindings/rust/src/lib.rs | 2 +- bindings/wasm/lib.rs | 12 +- cli/app.rs | 16 +- core/lib.rs | 3 +- core/pseudo.rs | 8 +- core/storage/btree.rs | 158 ++++++----- core/types.rs | 255 ++++++++++++++++-- core/vdbe/mod.rs | 41 ++- core/vdbe/sorter.rs | 12 +- simulator/generation/plan.rs | 10 +- sqlite3/src/lib.rs | 2 +- testing/testing.db | Bin 1212416 -> 1216512 bytes .../functions/test_function_rowid.rs | 6 +- tests/integration/fuzz/mod.rs | 14 +- .../query_processing/test_read_path.rs | 12 +- .../query_processing/test_write_path.rs | 26 +- tests/integration/wal/test_wal.rs | 6 +- 21 files changed, 438 insertions(+), 197 deletions(-) diff --git a/bindings/go/rs_src/types.rs b/bindings/go/rs_src/types.rs index c8508fea7..7e3e592b9 100644 --- a/bindings/go/rs_src/types.rs +++ b/bindings/go/rs_src/types.rs @@ -143,23 +143,21 @@ impl LimboValue { Box::into_raw(Box::new(self)) as *const c_void } - pub fn from_value(value: &limbo_core::OwnedValue) -> Self { + pub fn from_value(value: &limbo_core::RefValue) -> Self { match value { - limbo_core::OwnedValue::Integer(i) => { + limbo_core::RefValue::Integer(i) => { LimboValue::new(ValueType::Integer, ValueUnion::from_int(*i)) } - limbo_core::OwnedValue::Float(r) => { + limbo_core::RefValue::Float(r) => { LimboValue::new(ValueType::Real, ValueUnion::from_real(*r)) } - limbo_core::OwnedValue::Text(s) => { + limbo_core::RefValue::Text(s) => { LimboValue::new(ValueType::Text, ValueUnion::from_str(s.as_str())) } - limbo_core::OwnedValue::Blob(b) => { - LimboValue::new(ValueType::Blob, ValueUnion::from_bytes(b)) - } - limbo_core::OwnedValue::Null => { - LimboValue::new(ValueType::Null, ValueUnion::from_null()) + limbo_core::RefValue::Blob(b) => { + LimboValue::new(ValueType::Blob, ValueUnion::from_bytes(b.to_slice())) } + limbo_core::RefValue::Null => LimboValue::new(ValueType::Null, ValueUnion::from_null()), } } diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index 0a6cf8c3b..8f9395e23 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -107,15 +107,15 @@ fn row_to_obj_array<'local>( for (i, value) in row.get_values().iter().enumerate() { let obj = match value { - limbo_core::OwnedValue::Null => JObject::null(), - limbo_core::OwnedValue::Integer(i) => { + limbo_core::RefValue::Null => JObject::null(), + limbo_core::RefValue::Integer(i) => { env.new_object("java/lang/Long", "(J)V", &[JValue::Long(*i)])? } - limbo_core::OwnedValue::Float(f) => { + limbo_core::RefValue::Float(f) => { env.new_object("java/lang/Double", "(D)V", &[JValue::Double(*f)])? } - limbo_core::OwnedValue::Text(s) => env.new_string(s.as_str())?.into(), - limbo_core::OwnedValue::Blob(b) => env.byte_array_from_slice(&b)?.into(), + limbo_core::RefValue::Text(s) => env.new_string(s.as_str())?.into(), + limbo_core::RefValue::Blob(b) => env.byte_array_from_slice(&b.to_slice())?.into(), }; if let Err(e) = env.set_object_array_element(&obj_array, i as i32, obj) { eprintln!("Error on parsing row: {:?}", e); diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 51ef0db9d..2db1fec48 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -92,14 +92,14 @@ impl Statement { } } -fn to_js_value(env: &napi::Env, value: &limbo_core::OwnedValue) -> JsUnknown { +fn to_js_value(env: &napi::Env, value: &limbo_core::RefValue) -> JsUnknown { match value { - limbo_core::OwnedValue::Null => env.get_null().unwrap().into_unknown(), - limbo_core::OwnedValue::Integer(i) => env.create_int64(*i).unwrap().into_unknown(), - limbo_core::OwnedValue::Float(f) => env.create_double(*f).unwrap().into_unknown(), - limbo_core::OwnedValue::Text(s) => env.create_string(s.as_str()).unwrap().into_unknown(), - limbo_core::OwnedValue::Blob(b) => { - env.create_buffer_copy(b.as_ref()).unwrap().into_unknown() + limbo_core::RefValue::Null => env.get_null().unwrap().into_unknown(), + limbo_core::RefValue::Integer(i) => env.create_int64(*i).unwrap().into_unknown(), + limbo_core::RefValue::Float(f) => env.create_double(*f).unwrap().into_unknown(), + limbo_core::RefValue::Text(s) => env.create_string(s.as_str()).unwrap().into_unknown(), + limbo_core::RefValue::Blob(b) => { + env.create_buffer_copy(b.to_slice()).unwrap().into_unknown() } } } diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 6167e069e..2f25a205d 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -326,13 +326,11 @@ fn row_to_py(py: Python, row: &limbo_core::Row) -> Result { let mut py_values = Vec::new(); for value in row.get_values() { match value { - limbo_core::OwnedValue::Null => py_values.push(py.None()), - limbo_core::OwnedValue::Integer(i) => py_values.push(i.into_pyobject(py)?.into()), - limbo_core::OwnedValue::Float(f) => py_values.push(f.into_pyobject(py)?.into()), - limbo_core::OwnedValue::Text(s) => py_values.push(s.as_str().into_pyobject(py)?.into()), - limbo_core::OwnedValue::Blob(b) => { - py_values.push(PyBytes::new(py, b.as_slice()).into()) - } + limbo_core::RefValue::Null => py_values.push(py.None()), + limbo_core::RefValue::Integer(i) => py_values.push(i.into_pyobject(py)?.into()), + limbo_core::RefValue::Float(f) => py_values.push(f.into_pyobject(py)?.into()), + limbo_core::RefValue::Text(s) => py_values.push(s.as_str().into_pyobject(py)?.into()), + limbo_core::RefValue::Blob(b) => py_values.push(PyBytes::new(py, b.to_slice()).into()), } } Ok(PyTuple::new(py, &py_values) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index aefc9ab8f..781a40c94 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -212,7 +212,7 @@ impl Rows { Ok(limbo_core::StepResult::Row) => { let row = stmt.row().unwrap(); Ok(Some(Row { - values: row.get_values().to_vec(), + values: row.get_values().iter().map(|v| v.to_owned()).collect(), })) } _ => Ok(None), diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 3a5819efc..330d4dd68 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -177,10 +177,10 @@ impl Statement { } } -fn to_js_value(value: &limbo_core::OwnedValue) -> JsValue { +fn to_js_value(value: &limbo_core::RefValue) -> JsValue { match value { - limbo_core::OwnedValue::Null => JsValue::null(), - limbo_core::OwnedValue::Integer(i) => { + limbo_core::RefValue::Null => JsValue::null(), + limbo_core::RefValue::Integer(i) => { let i = *i; if i >= i32::MIN as i64 && i <= i32::MAX as i64 { JsValue::from(i as i32) @@ -188,9 +188,9 @@ fn to_js_value(value: &limbo_core::OwnedValue) -> JsValue { JsValue::from(i) } } - limbo_core::OwnedValue::Float(f) => JsValue::from(*f), - limbo_core::OwnedValue::Text(t) => JsValue::from_str(t.as_str()), - limbo_core::OwnedValue::Blob(b) => js_sys::Uint8Array::from(b.as_slice()).into(), + limbo_core::RefValue::Float(f) => JsValue::from(*f), + limbo_core::RefValue::Text(t) => JsValue::from_str(t.as_str()), + limbo_core::RefValue::Blob(b) => js_sys::Uint8Array::from(b.to_slice()).into(), } } diff --git a/cli/app.rs b/cli/app.rs index 1e12ae42f..cca35d4d8 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -5,7 +5,7 @@ use crate::{ opcodes_dictionary::OPCODE_DESCRIPTIONS, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; -use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; +use limbo_core::{Database, LimboError, OwnedValue, RefValue, Statement, StepResult}; use clap::{Parser, ValueEnum}; use rustyline::{history::DefaultHistory, Editor}; @@ -769,19 +769,19 @@ impl<'a> Limbo<'a> { row.max_height(1); for (idx, value) in record.get_values().iter().enumerate() { let (content, alignment) = match value { - OwnedValue::Null => { + RefValue::Null => { (self.opts.null_value.clone(), CellAlignment::Left) } - OwnedValue::Integer(_) => { + RefValue::Integer(_) => { (format!("{}", value), CellAlignment::Right) } - OwnedValue::Float(_) => { + RefValue::Float(_) => { (format!("{}", value), CellAlignment::Right) } - OwnedValue::Text(_) => { + RefValue::Text(_) => { (format!("{}", value), CellAlignment::Left) } - OwnedValue::Blob(_) => { + RefValue::Blob(_) => { (format!("{}", value), CellAlignment::Left) } }; @@ -849,7 +849,7 @@ impl<'a> Limbo<'a> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let Some(OwnedValue::Text(schema)) = row.get_values().first() { + if let Some(RefValue::Text(schema)) = row.get_values().first() { let _ = self.write_fmt(format_args!("{};", schema.as_str())); found = true; } @@ -907,7 +907,7 @@ impl<'a> Limbo<'a> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let Some(OwnedValue::Text(table)) = row.get_values().first() { + if let Some(RefValue::Text(table)) = row.get_values().first() { tables.push_str(table.as_str()); tables.push(' '); } diff --git a/core/lib.rs b/core/lib.rs index c9f0342db..0314d9b81 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -63,6 +63,7 @@ use storage::{ }; use translate::select::prepare_select_plan; pub use types::OwnedValue; +pub use types::RefValue; use util::{columns_from_create_table_body, parse_schema_rows}; use vdbe::{builder::QueryMode, VTabOpaqueCursor}; @@ -596,7 +597,7 @@ impl Statement { } } -pub type Row = types::Record; +pub type Row = types::ImmutableRecord; pub type StepResult = vdbe::StepResult; diff --git a/core/pseudo.rs b/core/pseudo.rs index bcc6c91f0..c4bb1ee40 100644 --- a/core/pseudo.rs +++ b/core/pseudo.rs @@ -1,7 +1,7 @@ -use crate::types::Record; +use crate::types::ImmutableRecord; pub struct PseudoCursor { - current: Option, + current: Option, } impl PseudoCursor { @@ -9,11 +9,11 @@ impl PseudoCursor { Self { current: None } } - pub fn record(&self) -> Option<&Record> { + pub fn record(&self) -> Option<&ImmutableRecord> { self.current.as_ref() } - pub fn insert(&mut self, record: Record) { + pub fn insert(&mut self, record: ImmutableRecord) { self.current = Some(record); } } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 393079029..458b9d26f 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7,8 +7,8 @@ use crate::storage::sqlite3_ondisk::{ use crate::MvCursor; use crate::types::{ - compare_immutable_to_record, compare_record_to_immutable, CursorResult, ImmutableRecord, - OwnedValue, Record, RefValue, SeekKey, SeekOp, + compare_immutable, compare_immutable_to_record, CursorResult, ImmutableRecord, OwnedValue, + RefValue, SeekKey, SeekOp, }; use crate::{return_corrupt, LimboError, Result}; @@ -610,8 +610,7 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = - compare_immutable_to_record(&record.get_values(), &index_key.get_values()); + let order = compare_immutable(&record.get_values(), index_key.get_values()); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), @@ -654,8 +653,7 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = - compare_immutable_to_record(&record.get_values(), &index_key.get_values()); + let order = compare_immutable(&record.get_values(), index_key.get_values()); let found = match op { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), @@ -753,7 +751,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record(payload)? }; - let order = compare_immutable_to_record( + let order = compare_immutable( &record.get_values().as_slice()[..record.len() - 1], &index_key.get_values().as_slice()[..], ); @@ -939,10 +937,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record(payload)? }; - let order = compare_record_to_immutable( - &index_key.get_values(), - &record.get_values(), - ); + let order = compare_immutable(index_key.get_values(), record.get_values()); let target_leaf_page_is_in_the_left_subtree = match cmp { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), @@ -984,7 +979,11 @@ impl BTreeCursor { /// Insert a record into the btree. /// If the insert operation overflows the page, it will be split and the btree will be balanced. - fn insert_into_page(&mut self, key: &OwnedValue, record: &Record) -> Result> { + fn insert_into_page( + &mut self, + key: &OwnedValue, + record: &ImmutableRecord, + ) -> Result> { if let CursorState::None = &self.state { self.state = CursorState::Write(WriteInfo::new()); } @@ -1943,7 +1942,7 @@ impl BTreeCursor { pub fn insert( &mut self, key: &OwnedValue, - record: &Record, + record: &ImmutableRecord, moved_before: bool, /* Indicate whether it's necessary to traverse to find the leaf page */ ) -> Result> { let int_key = match key { @@ -1954,8 +1953,7 @@ impl BTreeCursor { Some(mv_cursor) => { let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, *int_key as u64); - let mut record_buf = Vec::new(); - record.serialize(&mut record_buf); + let record_buf = record.payload.to_vec(); let row = crate::mvcc::database::Row::new(row_id, record_buf); mv_cursor.borrow_mut().insert(row).unwrap(); } @@ -2594,7 +2592,7 @@ impl BTreeCursor { &mut self, page_ref: PageRef, cell_idx: usize, - record: &Record, + record: &ImmutableRecord, ) -> Result> { // build the new payload let page_type = page_ref.get().contents.as_ref().unwrap().page_type(); @@ -3354,7 +3352,7 @@ fn fill_cell_payload( page_type: PageType, int_key: Option, cell_payload: &mut Vec, - record: &Record, + record: &ImmutableRecord, usable_space: u16, pager: Rc, ) { @@ -3363,8 +3361,7 @@ fn fill_cell_payload( PageType::TableLeaf | PageType::IndexLeaf )); // TODO: make record raw from start, having to serialize is not good - let mut record_buf = Vec::new(); - record.serialize(&mut record_buf); + let record_buf = record.payload.to_vec(); // fill in header if matches!(page_type, PageType::TableLeaf) { @@ -3537,6 +3534,7 @@ mod tests { use crate::storage::sqlite3_ondisk; use crate::storage::sqlite3_ondisk::DatabaseHeader; use crate::types::Text; + use crate::vdbe::Register; use crate::Connection; use crate::{BufferPool, DatabaseStorage, WalFile, WalFileShared, WriteCompletion}; use std::cell::RefCell; @@ -3558,7 +3556,7 @@ mod tests { pager::PageRef, sqlite3_ondisk::{BTreeCell, PageContent, PageType}, }, - types::{OwnedValue, Record}, + types::OwnedValue, Database, Page, Pager, PlatformIO, }; @@ -3616,7 +3614,7 @@ mod tests { id: usize, pos: usize, page: &mut PageContent, - record: Record, + record: ImmutableRecord, conn: &Rc, ) -> Vec { let mut payload: Vec = Vec::new(); @@ -3639,7 +3637,8 @@ mod tests { let page = get_page(2); let page = page.get_contents(); let header_size = 8; - let record = Record::new([OwnedValue::Integer(1)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(1))]); let payload = add_record(1, 0, page, record, &conn); assert_eq!(page.cell_count(), 1); let free = compute_free_space(page, 4096); @@ -3667,7 +3666,9 @@ mod tests { let mut cells = Vec::new(); let usable_space = 4096; for i in 0..3 { - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let payload = add_record(i, i, page, record, &conn); assert_eq!(page.cell_count(), i + 1); let free = compute_free_space(page, usable_space); @@ -3892,7 +3893,9 @@ mod tests { ) .unwrap(); let key = OwnedValue::Integer(*key); - let value = Record::new(vec![OwnedValue::Blob(Rc::new(vec![0; *size]))]); + let value = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Blob(Rc::new(vec![0; *size])), + )]); tracing::info!("insert key:{}", key); run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); tracing::info!( @@ -3957,7 +3960,9 @@ mod tests { .unwrap(); let key = OwnedValue::Integer(key); - let value = Record::new(vec![OwnedValue::Blob(Rc::new(vec![0; size]))]); + let value = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Blob(Rc::new(vec![0; size])), + )]); run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); } tracing::info!( @@ -3994,7 +3999,9 @@ mod tests { let usable_space = 4096; let total_cells = 10; for i in 0..total_cells { - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let payload = add_record(i, i, page, record, &conn); assert_eq!(page.cell_count(), i + 1); let free = compute_free_space(page, usable_space); @@ -4352,7 +4359,9 @@ mod tests { let mut cells = Vec::new(); let usable_space = 4096; for i in 0..3 { - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let payload = add_record(i, i, page, record, &conn); assert_eq!(page.cell_count(), i + 1); let free = compute_free_space(page, usable_space); @@ -4392,7 +4401,9 @@ mod tests { let usable_space = 4096; let total_cells = 10; for i in 0..total_cells { - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let payload = add_record(i, i, page, record, &conn); assert_eq!(page.cell_count(), i + 1); let free = compute_free_space(page, usable_space); @@ -4448,7 +4459,9 @@ mod tests { // allow appends with extra place to insert let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1); let free = compute_free_space(page, usable_space); - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let mut payload: Vec = Vec::new(); fill_cell_payload( page.page_type(), @@ -4517,7 +4530,9 @@ mod tests { // allow appends with extra place to insert let cell_idx = rng.next_u64() as usize % (page.cell_count() + 1); let free = compute_free_space(page, usable_space); - let record = Record::new([OwnedValue::Integer(i as i64)].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue( + OwnedValue::Integer(i as i64), + )]); let mut payload: Vec = Vec::new(); fill_cell_payload( page.page_type(), @@ -4571,7 +4586,8 @@ mod tests { let header_size = 8; let usable_space = 4096; - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let payload = add_record(0, 0, page, record, &conn); let free = compute_free_space(page, usable_space); assert_eq!(free, 4096 - payload.len() as u16 - 2 - header_size); @@ -4586,7 +4602,8 @@ mod tests { let page = page.get_contents(); let usable_space = 4096; - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let payload = add_record(0, 0, page, record, &conn); assert_eq!(page.cell_count(), 1); @@ -4611,20 +4628,18 @@ mod tests { let page = page.get_contents(); let usable_space = 4096; - let record = Record::new( - [ - OwnedValue::Integer(0), - OwnedValue::Text(Text::new("aaaaaaaa")), - ] - .to_vec(), - ); + let record = ImmutableRecord::from_registers(&[ + Register::OwnedValue(OwnedValue::Integer(0)), + Register::OwnedValue(OwnedValue::Text(Text::new("aaaaaaaa"))), + ]); let _ = add_record(0, 0, page, record, &conn); assert_eq!(page.cell_count(), 1); drop_cell(page, 0, usable_space).unwrap(); assert_eq!(page.cell_count(), 0); - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let payload = add_record(0, 0, page, record, &conn); assert_eq!(page.cell_count(), 1); @@ -4647,13 +4662,10 @@ mod tests { let page = page.get_contents(); let usable_space = 4096; - let record = Record::new( - [ - OwnedValue::Integer(0), - OwnedValue::Text(Text::new("aaaaaaaa")), - ] - .to_vec(), - ); + let record = ImmutableRecord::from_registers(&[ + Register::OwnedValue(OwnedValue::Integer(0)), + Register::OwnedValue(OwnedValue::Text(Text::new("aaaaaaaa"))), + ]); let _ = add_record(0, 0, page, record, &conn); for _ in 0..100 { @@ -4661,7 +4673,8 @@ mod tests { drop_cell(page, 0, usable_space).unwrap(); assert_eq!(page.cell_count(), 0); - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let payload = add_record(0, 0, page, record, &conn); assert_eq!(page.cell_count(), 1); @@ -4685,11 +4698,14 @@ mod tests { let page = page.get_contents(); let usable_space = 4096; - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let payload = add_record(0, 0, page, record, &conn); - let record = Record::new([OwnedValue::Integer(1)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(1))]); let _ = add_record(1, 1, page, record, &conn); - let record = Record::new([OwnedValue::Integer(2)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(2))]); let _ = add_record(2, 2, page, record, &conn); drop_cell(page, 1, usable_space).unwrap(); @@ -4707,21 +4723,25 @@ mod tests { let page = page.get_contents(); let usable_space = 4096; - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, 0, page, record, &conn); - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, 0, page, record, &conn); drop_cell(page, 0, usable_space).unwrap(); defragment_page(page, usable_space); - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, 1, page, record, &conn); drop_cell(page, 0, usable_space).unwrap(); - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, 1, page, record, &conn); } @@ -4733,7 +4753,8 @@ mod tests { let page = get_page(2); let usable_space = 4096; let insert = |pos, page| { - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, pos, page, record, &conn); }; let drop = |pos, page| { @@ -4772,7 +4793,8 @@ mod tests { let page = get_page(2); let usable_space = 4096; let insert = |pos, page| { - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let _ = add_record(0, pos, page, record, &conn); }; let drop = |pos, page| { @@ -4781,7 +4803,8 @@ mod tests { let defragment = |page| { defragment_page(page, usable_space); }; - let record = Record::new([OwnedValue::Integer(0)].to_vec()); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(0))]); let mut payload: Vec = Vec::new(); fill_cell_payload( page.get_contents().page_type(), @@ -4815,7 +4838,8 @@ mod tests { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let key = OwnedValue::Integer(i); - let value = Record::new(vec![OwnedValue::Integer(i)]); + let value = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Integer(i))]); tracing::trace!("before insert {}", i); run_until_done( || { @@ -4850,7 +4874,9 @@ mod tests { let page = get_page(2); let usable_space = 4096; - let record = Record::new([OwnedValue::Blob(Rc::new(vec![0; 3600]))].to_vec()); + let record = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Blob( + Rc::new(vec![0; 3600]), + ))]); let mut payload: Vec = Vec::new(); fill_cell_payload( page.get_contents().page_type(), @@ -4886,7 +4912,9 @@ mod tests { for i in 1..=10000 { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); let key = OwnedValue::Integer(i); - let value = Record::new(vec![OwnedValue::Text(Text::new("hello world"))]); + let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text( + Text::new("hello world"), + ))]); run_until_done( || { @@ -4957,13 +4985,11 @@ mod tests { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); tracing::info!("INSERT INTO t VALUES ({});", i,); let key = OwnedValue::Integer(i as i64); - let value = Record::new( - [OwnedValue::Text(Text { + let value = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text(Text { value: Rc::new(huge_texts[i].as_bytes().to_vec()), subtype: crate::types::TextSubtype::Text, - })] - .to_vec(), - ); + }))]); tracing::trace!("before insert {}", i); tracing::debug!( "=========== btree before ===========\n{}\n\n", diff --git a/core/types.rs b/core/types.rs index 3636c36c2..9dbdebb5c 100644 --- a/core/types.rs +++ b/core/types.rs @@ -4,9 +4,10 @@ use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; use crate::storage::btree::BTreeCursor; +use crate::storage::buffer_pool; use crate::storage::sqlite3_ondisk::write_varint; use crate::vdbe::sorter::Sorter; -use crate::vdbe::VTabOpaqueCursor; +use crate::vdbe::{Register, VTabOpaqueCursor}; use crate::Result; use std::cmp::Ordering; use std::fmt::Display; @@ -71,6 +72,16 @@ impl Text { } } +impl TextRef { + pub fn as_str(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(self.value.to_slice()) } + } + + pub fn to_string(&self) -> String { + self.as_str().to_string() + } +} + #[derive(Debug, Clone)] pub enum OwnedValue { Null, @@ -132,6 +143,26 @@ impl OwnedValue { OwnedValue::Blob(_) => OwnedValueType::Blob, } } + pub fn serialize_serial(&self, out: &mut Vec) { + match self { + OwnedValue::Null => {} + OwnedValue::Integer(i) => { + let serial_type = SerialType::from(self); + match serial_type { + SerialType::I8 => out.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => out.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => out.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialType::I32 => out.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => out.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => out.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + } + } + OwnedValue::Float(f) => out.extend_from_slice(&f.to_be_bytes()), + OwnedValue::Text(t) => out.extend_from_slice(&t.value), + OwnedValue::Blob(b) => out.extend_from_slice(b), + }; + } } #[derive(Debug, Clone, PartialEq)] @@ -562,33 +593,33 @@ impl std::ops::DivAssign for OwnedValue { } pub trait FromValue<'a> { - fn from_value(value: &'a OwnedValue) -> Result + fn from_value(value: &'a RefValue) -> Result where Self: Sized + 'a; } impl<'a> FromValue<'a> for i64 { - fn from_value(value: &'a OwnedValue) -> Result { + fn from_value(value: &'a RefValue) -> Result { match value { - OwnedValue::Integer(i) => Ok(*i), + RefValue::Integer(i) => Ok(*i), _ => Err(LimboError::ConversionError("Expected integer value".into())), } } } impl<'a> FromValue<'a> for String { - fn from_value(value: &'a OwnedValue) -> Result { + fn from_value(value: &'a RefValue) -> Result { match value { - OwnedValue::Text(s) => Ok(s.as_str().to_string()), + RefValue::Text(s) => Ok(s.as_str().to_string()), _ => Err(LimboError::ConversionError("Expected text value".into())), } } } impl<'a> FromValue<'a> for &'a str { - fn from_value(value: &'a OwnedValue) -> Result { + fn from_value(value: &'a RefValue) -> Result { match value { - OwnedValue::Text(s) => Ok(s.as_str()), + RefValue::Text(s) => Ok(s.as_str()), _ => Err(LimboError::ConversionError("Expected text value".into())), } } @@ -609,10 +640,10 @@ pub struct Record { } impl Record { - pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { - let value = &self.values[idx]; - T::from_value(value) - } + // pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { + // let value = &self.values[idx]; + // T::from_value(value) + // } pub fn count(&self) -> usize { self.values.len() @@ -636,10 +667,13 @@ impl Record { } impl ImmutableRecord { - // pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { - // let value = &self.values[idx]; - // T::from_value(value) - // } + pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { + let value = self + .values + .get(idx) + .ok_or(LimboError::InternalError("Index out of bounds".into()))?; + T::from_value(value) + } pub fn count(&self) -> usize { self.values.len() @@ -660,9 +694,166 @@ impl ImmutableRecord { pub fn len(&self) -> usize { self.values.len() } + + pub fn from_registers(registers: &[Register]) -> Self { + let mut values = Vec::with_capacity(registers.len()); + let mut size_header = 0; + let mut size_values = 0; + + let mut serial_type_buf = [0; 9]; + // write serial types + for value in registers { + let value = value.get_owned_value(); + let serial_type = SerialType::from(value); + let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); + + let value_size = match serial_type { + SerialType::Null => 0, + SerialType::I8 => 8, + SerialType::I16 => 16, + SerialType::I24 => 24, + SerialType::I32 => 32, + SerialType::I48 => 48, + SerialType::I64 => 64, + SerialType::F64 => 64, + SerialType::Text { content_size } => content_size, + SerialType::Blob { content_size } => content_size, + }; + + size_header += n; + size_values += value_size; + } + let mut header_size = size_header; + let mut header_bytes_buf: Vec = Vec::new(); + if header_size <= 126 { + // common case + header_size += 1; + } else { + todo!("calculate big header size extra bytes"); + // get header varint len + // header_size += n; + // if( nVarint {} + OwnedValue::Integer(i) => { + values.push(RefValue::Integer(*i)); + let serial_type = SerialType::from(value); + match serial_type { + SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte + SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + _ => unreachable!(), + } + } + OwnedValue::Float(f) => { + values.push(RefValue::Float(*f)); + buf.extend_from_slice(&f.to_be_bytes()) + } + OwnedValue::Text(t) => { + buf.extend_from_slice(&t.value); + let end_offset = buf.len(); + let len = end_offset - start_offset; + let ptr = unsafe { buf.as_ptr().add(start_offset) }; + let value = RefValue::Text(TextRef { + value: RawSlice::new(ptr, len), + subtype: t.subtype.clone(), + }); + values.push(value); + } + OwnedValue::Blob(b) => { + buf.extend_from_slice(b); + let end_offset = buf.len(); + let len = end_offset - start_offset; + let ptr = unsafe { buf.as_ptr().add(start_offset) }; + values.push(RefValue::Blob(RawSlice::new(ptr, len))); + } + }; + } + + Self { + payload: Pin::new(buf), + values, + } + } +} + +impl Clone for ImmutableRecord { + fn clone(&self) -> Self { + let mut new_values = Vec::new(); + let new_payload = self.payload.clone(); + for value in &self.values { + let value = match value { + RefValue::Null => RefValue::Null, + RefValue::Integer(i) => RefValue::Integer(*i), + RefValue::Float(f) => RefValue::Float(*f), + RefValue::Text(text_ref) => { + // let's update pointer + let ptr_start = self.payload.as_ptr() as usize; + let ptr_end = text_ref.value.data as usize; + let len = ptr_end - ptr_start; + let new_ptr = unsafe { new_payload.as_ptr().add(len) }; + RefValue::Text(TextRef { + value: RawSlice::new(new_ptr, text_ref.value.len), + subtype: text_ref.subtype.clone(), + }) + } + RefValue::Blob(raw_slice) => { + let ptr_start = self.payload.as_ptr() as usize; + let ptr_end = raw_slice.data as usize; + let len = ptr_end - ptr_start; + let new_ptr = unsafe { new_payload.as_ptr().add(len) }; + RefValue::Blob(RawSlice::new(new_ptr, raw_slice.len)) + } + }; + new_values.push(value); + } + Self { + payload: new_payload, + values: new_values, + } + } } impl RefValue { + pub fn to_ffi(&self) -> ExtValue { + match self { + Self::Null => ExtValue::null(), + Self::Integer(i) => ExtValue::from_integer(*i), + Self::Float(fl) => ExtValue::from_float(*fl), + Self::Text(text) => ExtValue::from_text( + std::str::from_utf8(text.value.to_slice()) + .unwrap() + .to_string(), + ), + Self::Blob(blob) => ExtValue::from_blob(blob.to_slice().to_vec()), + } + } + pub fn to_owned(&self) -> OwnedValue { match self { RefValue::Null => OwnedValue::Null, @@ -675,6 +866,24 @@ impl RefValue { RefValue::Blob(b) => OwnedValue::Blob(Rc::new(b.to_slice().to_vec())), } } + pub fn to_blob(&self) -> Option<&[u8]> { + match self { + Self::Blob(blob) => Some(blob.to_slice()), + _ => None, + } + } +} + +impl Display for RefValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, "NULL"), + Self::Integer(i) => write!(f, "{}", i), + Self::Float(fl) => write!(f, "{:?}", fl), + Self::Text(s) => write!(f, "{}", s.as_str()), + Self::Blob(b) => write!(f, "{}", String::from_utf8_lossy(b.to_slice())), + } + } } impl Eq for RefValue {} @@ -873,6 +1082,18 @@ pub fn compare_immutable_to_record( Ordering::Equal } +pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { + for (a, b) in l.iter().zip(r.iter()) { + match a.partial_cmp(b).unwrap() { + Ordering::Equal => {} + order => { + return order; + } + } + } + Ordering::Equal +} + const I8_LOW: i64 = -128; const I8_HIGH: i64 = 127; const I16_LOW: i64 = -32768; @@ -1063,7 +1284,7 @@ pub enum SeekOp { #[derive(Clone, PartialEq, Debug)] pub enum SeekKey<'a> { TableRowId(u64), - IndexKey(&'a Record), + IndexKey(&'a ImmutableRecord), } impl RawSlice { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f51effe47..4ff6856e1 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -40,7 +40,8 @@ use crate::storage::wal::CheckpointResult; use crate::storage::{btree::BTreeCursor, pager::Pager}; use crate::translate::plan::{ResultSetColumn, TableReference}; use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, OwnedValue, Record, SeekKey, SeekOp, + AggContext, Cursor, CursorResult, ExternalAggState, ImmutableRecord, OwnedValue, SeekKey, + SeekOp, }; use crate::util::{ cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, @@ -235,7 +236,7 @@ enum HaltState { pub enum Register { OwnedValue(OwnedValue), Aggregate(AggContext), - Record(Record), + Record(ImmutableRecord), } /// The program state describes the environment in which the program executes. @@ -243,7 +244,7 @@ pub struct ProgramState { pub pc: InsnReference, cursors: RefCell>>, registers: Vec, - pub(crate) result_row: Option, + pub(crate) result_row: Option, last_compare: Option, deferred_seek: Option<(CursorID, CursorID)>, ended_coroutine: Bitfield<4>, // flag to indicate that a coroutine has ended (key is the yield register. currently we assume that the yield register is always between 0-255, YOLO) @@ -1199,7 +1200,7 @@ impl Program { }; if let Some(record) = record { state.registers[*dest] = - Register::OwnedValue(record.get_value(*column).clone()); + Register::OwnedValue(record.get_value(*column).to_owned()); } else { state.registers[*dest] = Register::OwnedValue(OwnedValue::Null); } @@ -1209,7 +1210,7 @@ impl Program { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_pseudo_mut(); if let Some(record) = cursor.record() { - record.get_value(*column).clone() + record.get_value(*column).to_owned() } else { OwnedValue::Null } @@ -1230,12 +1231,12 @@ impl Program { count, dest_reg, } => { - let record = make_owned_record(&state.registers, start_reg, count); + let record = make_record(&state.registers, start_reg, count); state.registers[*dest_reg] = Register::Record(record); state.pc += 1; } Insn::ResultRow { start_reg, count } => { - let record = make_owned_record(&state.registers, start_reg, count); + let record = make_record(&state.registers, start_reg, count); state.result_row = Some(record); state.pc += 1; return Ok(StepResult::Row); @@ -1547,7 +1548,7 @@ impl Program { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); let record_from_regs = - make_owned_record(&state.registers, start_reg, num_regs); + make_record(&state.registers, start_reg, num_regs); let found = return_if_io!( cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GE) ); @@ -1605,8 +1606,8 @@ impl Program { let found = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); - let record_from_regs: Record = - make_owned_record(&state.registers, start_reg, num_regs); + let record_from_regs = + make_record(&state.registers, start_reg, num_regs); let found = return_if_io!( cursor.seek(SeekKey::IndexKey(&record_from_regs), SeekOp::GT) ); @@ -1663,8 +1664,7 @@ impl Program { let pc = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); - let record_from_regs: Record = - make_owned_record(&state.registers, start_reg, num_regs); + let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] @@ -1693,8 +1693,7 @@ impl Program { let pc = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); - let record_from_regs: Record = - make_owned_record(&state.registers, start_reg, num_regs); + let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] @@ -1723,8 +1722,7 @@ impl Program { let pc = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); - let record_from_regs: Record = - make_owned_record(&state.registers, start_reg, num_regs); + let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] @@ -1753,8 +1751,7 @@ impl Program { let pc = { let mut cursor = state.get_cursor(*cursor_id); let cursor = cursor.as_btree_mut(); - let record_from_regs: Record = - make_owned_record(&state.registers, start_reg, num_regs); + let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values if idx_record.get_values()[..record_from_regs.len()] @@ -3606,12 +3603,8 @@ fn get_new_rowid(cursor: &mut BTreeCursor, mut rng: R) -> Result Record { - let mut values = Vec::with_capacity(*count); - for r in registers.iter().skip(*start_reg).take(*count) { - values.push(r.get_owned_value().clone()) - } - Record::new(values) +fn make_record(registers: &[Register], start_reg: &usize, count: &usize) -> ImmutableRecord { + ImmutableRecord::from_registers(®isters[*start_reg..*start_reg + *count]) } fn trace_insn(program: &Program, addr: InsnReference, insn: &Insn) { diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 60f839b1c..7b87a5387 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,9 +1,9 @@ -use crate::types::Record; +use crate::types::{ImmutableRecord, Record}; use std::cmp::Ordering; pub struct Sorter { - records: Vec, - current: Option, + records: Vec, + current: Option, order: Vec, } @@ -51,11 +51,11 @@ impl Sorter { pub fn next(&mut self) { self.current = self.records.pop(); } - pub fn record(&self) -> Option<&Record> { + pub fn record(&self) -> Option<&ImmutableRecord> { self.current.as_ref() } - pub fn insert(&mut self, record: &Record) { - self.records.push(Record::new(record.get_values().to_vec())); + pub fn insert(&mut self, record: &ImmutableRecord) { + self.records.push(record.clone()); } } diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index ecad92344..2799976a2 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -494,13 +494,13 @@ impl Interaction { let mut r = Vec::new(); for v in row.get_values() { let v = match v { - limbo_core::OwnedValue::Null => Value::Null, - limbo_core::OwnedValue::Integer(i) => Value::Integer(*i), - limbo_core::OwnedValue::Float(f) => Value::Float(*f), - limbo_core::OwnedValue::Text(t) => { + limbo_core::RefValue::Null => Value::Null, + limbo_core::RefValue::Integer(i) => Value::Integer(*i), + limbo_core::RefValue::Float(f) => Value::Float(*f), + limbo_core::RefValue::Text(t) => { Value::Text(t.as_str().to_string()) } - limbo_core::OwnedValue::Blob(b) => Value::Blob(b.to_vec()), + limbo_core::RefValue::Blob(b) => Value::Blob(b.to_slice().to_vec()), }; r.push(v); } diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 299ae7310..85799c364 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -637,7 +637,7 @@ pub unsafe extern "C" fn sqlite3_column_text( None => return std::ptr::null(), }; match row.get_values().get(idx as usize) { - Some(limbo_core::OwnedValue::Text(text)) => text.as_str().as_ptr(), + Some(limbo_core::RefValue::Text(text)) => text.as_str().as_ptr(), _ => std::ptr::null(), } } diff --git a/testing/testing.db b/testing/testing.db index e508d589dfd51183fb8bf04a1e1f46a212be79b5..984b71fecd5bd6a1b53304575976b5123264f629 100644 GIT binary patch delta 192 zcmZo@@M>7#H9=aCm4Sg#lK}x(Ch8cAvNGu9?cxOrvhe<5;CsrK#HYghYqO$48?TlI z3!6AUKa+h)Vp2|OMFpd#bC9cJh^s<~qmz%TLWM?!LS|k`YI1UC+ZjrvoPpY74QNDnfX*0_@445@u_T9RM^DZtiso> z!p8{2OhC*G#4JF}3dC$c%nrmHK+FlmTtLhX#5_RE3&bFO{6H)K#Dd#Z_=N5h0stE} B87Tk& diff --git a/tests/integration/functions/test_function_rowid.rs b/tests/integration/functions/test_function_rowid.rs index 78a77ce9f..6afdae0ca 100644 --- a/tests/integration/functions/test_function_rowid.rs +++ b/tests/integration/functions/test_function_rowid.rs @@ -30,7 +30,7 @@ fn test_last_insert_rowid_basic() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { + if let limbo_core::RefValue::Integer(id) = row.get_value(0) { assert_eq!(*id, 1, "First insert should have rowid 1"); } } @@ -66,7 +66,7 @@ fn test_last_insert_rowid_basic() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { + if let limbo_core::RefValue::Integer(id) = row.get_value(0) { last_id = *id; } } @@ -112,7 +112,7 @@ fn test_integer_primary_key() -> anyhow::Result<()> { match select_query.step()? { StepResult::Row => { let row = select_query.row().unwrap(); - if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { + if let limbo_core::RefValue::Integer(id) = row.get_value(0) { rowids.push(*id); } } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 92f10ebab..881ab4e54 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -74,11 +74,15 @@ mod tests { .get_values() .iter() .map(|x| match x { - limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, - limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), - limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), - limbo_core::OwnedValue::Text(x) => rusqlite::types::Value::Text(x.to_string()), - limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), + limbo_core::RefValue::Null => rusqlite::types::Value::Null, + limbo_core::RefValue::Integer(x) => rusqlite::types::Value::Integer(*x), + limbo_core::RefValue::Float(x) => rusqlite::types::Value::Real(*x), + limbo_core::RefValue::Text(x) => { + rusqlite::types::Value::Text(x.as_str().to_string()) + } + limbo_core::RefValue::Blob(x) => { + rusqlite::types::Value::Blob(x.to_slice().to_vec()) + } }) .collect(); rows.push(row); diff --git a/tests/integration/query_processing/test_read_path.rs b/tests/integration/query_processing/test_read_path.rs index b90d8f9d4..072db306f 100644 --- a/tests/integration/query_processing/test_read_path.rs +++ b/tests/integration/query_processing/test_read_path.rs @@ -63,23 +63,23 @@ fn test_statement_bind() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - if let limbo_core::OwnedValue::Text(s) = row.get_value(0) { + if let limbo_core::RefValue::Text(s) = row.get_value(0) { assert_eq!(s.as_str(), "hello") } - if let limbo_core::OwnedValue::Text(s) = row.get_value(1) { + if let limbo_core::RefValue::Text(s) = row.get_value(1) { assert_eq!(s.as_str(), "hello") } - if let limbo_core::OwnedValue::Integer(i) = row.get_value(2) { + if let limbo_core::RefValue::Integer(i) = row.get_value(2) { assert_eq!(*i, 42) } - if let limbo_core::OwnedValue::Blob(v) = row.get_value(3) { - assert_eq!(v.as_ref(), &vec![0x1 as u8, 0x2, 0x3]) + if let limbo_core::RefValue::Blob(v) = row.get_value(3) { + assert_eq!(v.to_slice(), &vec![0x1 as u8, 0x2, 0x3]) } - if let limbo_core::OwnedValue::Float(f) = row.get_value(4) { + if let limbo_core::RefValue::Float(f) = row.get_value(4) { assert_eq!(*f, 0.5) } } diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 2e9941045..0dd438f2e 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -1,6 +1,6 @@ use crate::common::{self, maybe_setup_tracing}; use crate::common::{compare_string, do_flush, TempDatabase}; -use limbo_core::{Connection, StepResult}; +use limbo_core::{Connection, RefValue, StepResult}; use log::debug; use std::rc::Rc; @@ -47,12 +47,12 @@ fn test_simple_overflow_page() -> anyhow::Result<()> { let first_value = row.get_value(0); let text = row.get_value(1); let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, + limbo_core::RefValue::Integer(i) => *i as i32, + limbo_core::RefValue::Float(f) => *f as i32, _ => unreachable!(), }; let text = match text { - limbo_core::OwnedValue::Text(t) => t.as_str(), + limbo_core::RefValue::Text(t) => t.as_str(), _ => unreachable!(), }; assert_eq!(1, id); @@ -123,12 +123,12 @@ fn test_sequential_overflow_page() -> anyhow::Result<()> { let first_value = row.get_value(0); let text = row.get_value(1); let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, + limbo_core::RefValue::Integer(i) => *i as i32, + limbo_core::RefValue::Float(f) => *f as i32, _ => unreachable!(), }; let text = match text { - limbo_core::OwnedValue::Text(t) => t.as_str(), + limbo_core::RefValue::Text(t) => t.as_str(), _ => unreachable!(), }; let huge_text = &huge_texts[current_index]; @@ -194,8 +194,8 @@ fn test_sequential_write() -> anyhow::Result<()> { let row = rows.row().unwrap(); let first_value = row.get_values().first().expect("missing id"); let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, + limbo_core::RefValue::Integer(i) => *i as i32, + limbo_core::RefValue::Float(f) => *f as i32, _ => unreachable!(), }; assert_eq!(current_read_index, id); @@ -260,7 +260,7 @@ fn test_regression_multi_row_insert() -> anyhow::Result<()> { let row = rows.row().unwrap(); let first_value = row.get_values().first().expect("missing id"); let id = match first_value { - limbo_core::OwnedValue::Float(f) => *f as i32, + RefValue::Float(f) => *f as i32, _ => panic!("expected float"), }; actual_ids.push(id); @@ -370,8 +370,8 @@ fn test_wal_checkpoint() -> anyhow::Result<()> { let row = rows.row().unwrap(); let first_value = row.get_value(0); let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, - limbo_core::OwnedValue::Float(f) => *f as i32, + RefValue::Integer(i) => *i as i32, + RefValue::Float(f) => *f as i32, _ => unreachable!(), }; assert_eq!(current_index, id as usize); @@ -434,7 +434,7 @@ fn test_wal_restart() -> anyhow::Result<()> { let row = rows.row().unwrap(); let first_value = row.get_value(0); let count = match first_value { - limbo_core::OwnedValue::Integer(i) => i, + RefValue::Integer(i) => i, _ => unreachable!(), }; debug!("counted {}", count); diff --git a/tests/integration/wal/test_wal.rs b/tests/integration/wal/test_wal.rs index b76ac1773..4cf829919 100644 --- a/tests/integration/wal/test_wal.rs +++ b/tests/integration/wal/test_wal.rs @@ -1,5 +1,5 @@ use crate::common::{do_flush, maybe_setup_tracing, TempDatabase}; -use limbo_core::{Connection, LimboError, Result, StepResult}; +use limbo_core::{Connection, LimboError, RefValue, Result, StepResult}; use std::cell::RefCell; use std::ops::Deref; use std::rc::Rc; @@ -83,7 +83,7 @@ fn test_wal_1_writer_1_reader() -> Result<()> { let row = rows.row().unwrap(); let first_value = row.get_value(0); let id = match first_value { - limbo_core::OwnedValue::Integer(i) => *i as i32, + RefValue::Integer(i) => *i as i32, _ => unreachable!(), }; assert_eq!(id, i); @@ -158,7 +158,7 @@ pub(crate) fn execute_and_get_ints( let row = stmt.row().unwrap(); for value in row.get_values() { let out = match value { - limbo_core::OwnedValue::Integer(i) => i, + limbo_core::RefValue::Integer(i) => i, _ => { return Err(LimboError::ConversionError(format!( "cannot convert {value} to int" From 78e9f1c09a16be07c9d8cd568f07630d3e74d5f2 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 14:03:50 +0100 Subject: [PATCH 02/22] append writer --- core/types.rs | 65 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/core/types.rs b/core/types.rs index 9dbdebb5c..005236acf 100644 --- a/core/types.rs +++ b/core/types.rs @@ -665,6 +665,22 @@ impl Record { self.values.len() } } +struct AppendWriter<'a> { + buf: &'a mut Vec, + pos: usize, +} + +impl<'a> AppendWriter<'a> { + pub fn new(buf: &'a mut Vec, pos: usize) -> Self { + Self { buf, pos } + } + + #[inline] + pub fn extend_from_slice(&mut self, slice: &[u8]) { + self.buf[self.pos..self.pos + slice.len()].copy_from_slice(slice); + self.pos += slice.len(); + } +} impl ImmutableRecord { pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { @@ -697,6 +713,7 @@ impl ImmutableRecord { pub fn from_registers(registers: &[Register]) -> Self { let mut values = Vec::with_capacity(registers.len()); + let mut serials = Vec::with_capacity(registers.len()); let mut size_header = 0; let mut size_values = 0; @@ -706,6 +723,7 @@ impl ImmutableRecord { let value = value.get_owned_value(); let serial_type = SerialType::from(value); let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); + serials.push(serial_type_buf.clone()); let value_size = match serial_type { SerialType::Null => 0, @@ -735,50 +753,51 @@ impl ImmutableRecord { // if( nVarint {} OwnedValue::Integer(i) => { values.push(RefValue::Integer(*i)); let serial_type = SerialType::from(value); match serial_type { - SerialType::I8 => buf.extend_from_slice(&(*i as i8).to_be_bytes()), - SerialType::I16 => buf.extend_from_slice(&(*i as i16).to_be_bytes()), - SerialType::I24 => buf.extend_from_slice(&(*i as i32).to_be_bytes()[1..]), // remove most significant byte - SerialType::I32 => buf.extend_from_slice(&(*i as i32).to_be_bytes()), - SerialType::I48 => buf.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes - SerialType::I64 => buf.extend_from_slice(&i.to_be_bytes()), + SerialType::I8 => writer.extend_from_slice(&(*i as i8).to_be_bytes()), + SerialType::I16 => writer.extend_from_slice(&(*i as i16).to_be_bytes()), + SerialType::I24 => { + writer.extend_from_slice(&(*i as i32).to_be_bytes()[1..]) + } // remove most significant byte + SerialType::I32 => writer.extend_from_slice(&(*i as i32).to_be_bytes()), + SerialType::I48 => writer.extend_from_slice(&i.to_be_bytes()[2..]), // remove 2 most significant bytes + SerialType::I64 => writer.extend_from_slice(&i.to_be_bytes()), _ => unreachable!(), } } OwnedValue::Float(f) => { values.push(RefValue::Float(*f)); - buf.extend_from_slice(&f.to_be_bytes()) + writer.extend_from_slice(&f.to_be_bytes()) } OwnedValue::Text(t) => { - buf.extend_from_slice(&t.value); - let end_offset = buf.len(); + writer.extend_from_slice(&t.value); + let end_offset = writer.pos; let len = end_offset - start_offset; - let ptr = unsafe { buf.as_ptr().add(start_offset) }; + let ptr = unsafe { writer.buf.as_ptr().add(start_offset) }; let value = RefValue::Text(TextRef { value: RawSlice::new(ptr, len), subtype: t.subtype.clone(), @@ -786,10 +805,10 @@ impl ImmutableRecord { values.push(value); } OwnedValue::Blob(b) => { - buf.extend_from_slice(b); - let end_offset = buf.len(); + writer.extend_from_slice(b); + let end_offset = writer.pos; let len = end_offset - start_offset; - let ptr = unsafe { buf.as_ptr().add(start_offset) }; + let ptr = unsafe { writer.buf.as_ptr().add(start_offset) }; values.push(RefValue::Blob(RawSlice::new(ptr, len))); } }; From 5b7fcd27bdb3da6302a5dee8274493d8af89bf25 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 14:33:50 +0100 Subject: [PATCH 03/22] make column reuse blob/text fields --- bindings/go/rs_src/types.rs | 3 +- bindings/java/rs_src/limbo_statement.rs | 3 +- bindings/python/src/lib.rs | 2 +- cli/app.rs | 2 +- core/json/json_operations.rs | 4 +- core/json/mod.rs | 16 ++-- core/storage/btree.rs | 12 +-- core/types.rs | 21 +++-- core/vdbe/explain.rs | 2 +- core/vdbe/mod.rs | 102 ++++++++++++++---------- 10 files changed, 93 insertions(+), 74 deletions(-) diff --git a/bindings/go/rs_src/types.rs b/bindings/go/rs_src/types.rs index 7e3e592b9..7bded37ae 100644 --- a/bindings/go/rs_src/types.rs +++ b/bindings/go/rs_src/types.rs @@ -1,5 +1,4 @@ use std::ffi::{c_char, c_void}; -use std::rc::Rc; #[allow(dead_code)] #[repr(C)] @@ -196,7 +195,7 @@ impl LimboValue { return limbo_core::OwnedValue::Null; } let bytes = self.value.to_bytes(); - limbo_core::OwnedValue::Blob(Rc::new(bytes.to_vec())) + limbo_core::OwnedValue::Blob(bytes.to_vec()) } ValueType::Null => limbo_core::OwnedValue::Null, } diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index 8f9395e23..fbb33fa79 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -7,7 +7,6 @@ use jni::sys::{jdouble, jint, jlong}; use jni::JNIEnv; use limbo_core::{OwnedValue, Statement, StepResult}; use std::num::NonZero; -use std::rc::Rc; pub const STEP_RESULT_ID_ROW: i32 = 10; #[allow(dead_code)] @@ -264,7 +263,7 @@ pub extern "system" fn Java_tech_turso_core_LimboStatement_bindBlob<'local>( stmt.stmt.bind_at( NonZero::new(position as usize).unwrap(), - OwnedValue::Blob(Rc::new(blob)), + OwnedValue::Blob(blob), ); SQLITE_OK } diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index 2f25a205d..ff963f1db 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -350,7 +350,7 @@ fn py_to_owned_value(obj: &Bound) -> Result { } else if let Ok(string) = obj.extract::() { return Ok(OwnedValue::Text(Text::from_str(string))); } else if let Ok(bytes) = obj.downcast::() { - return Ok(OwnedValue::Blob(Rc::new(bytes.as_bytes().to_vec()))); + return Ok(OwnedValue::Blob(bytes.as_bytes().to_vec())); } else { return Err(PyErr::new::(format!( "Unsupported Python type: {}", diff --git a/cli/app.rs b/cli/app.rs index cca35d4d8..c85f20d61 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -5,7 +5,7 @@ use crate::{ opcodes_dictionary::OPCODE_DESCRIPTIONS, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; -use limbo_core::{Database, LimboError, OwnedValue, RefValue, Statement, StepResult}; +use limbo_core::{Database, LimboError, RefValue, Statement, StepResult}; use clap::{Parser, ValueEnum}; use rustyline::{history::DefaultHistory, Editor}; diff --git a/core/json/json_operations.rs b/core/json/json_operations.rs index 3c83c3ba3..17369cc3c 100644 --- a/core/json/json_operations.rs +++ b/core/json/json_operations.rs @@ -185,7 +185,7 @@ pub fn jsonb_remove(args: &[Register], json_cache: &JsonCacheCell) -> crate::Res } } - Ok(OwnedValue::Blob(Rc::new(json.data()))) + Ok(OwnedValue::Blob(json.data())) } pub fn json_replace(args: &[Register], json_cache: &JsonCacheCell) -> crate::Result { @@ -554,7 +554,7 @@ mod tests { #[test] #[should_panic(expected = "blob is not supported!")] fn test_blob_not_supported() { - let target = OwnedValue::Blob(Rc::new(vec![1, 2, 3])); + let target = OwnedValue::Blob(vec![1, 2, 3]); let patch = create_text("{}"); json_patch(&target, &patch).unwrap(); } diff --git a/core/json/mod.rs b/core/json/mod.rs index 039440d1d..07d93b06d 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -73,7 +73,7 @@ pub fn get_json(json_value: &OwnedValue, indent: Option<&str>) -> crate::Result< let jsonbin = Jsonb::new(b.len(), Some(b)); jsonbin.is_valid()?; Ok(OwnedValue::Text(Text { - value: Rc::new(jsonbin.to_string()?.into_bytes()), + value: jsonbin.to_string()?.into_bytes(), subtype: TextSubtype::Json, })) } @@ -95,7 +95,7 @@ pub fn jsonb(json_value: &OwnedValue, cache: &JsonCacheCell) -> crate::Result Ok(OwnedValue::Blob(Rc::new(jsonbin.data()))), + Ok(jsonbin) => Ok(OwnedValue::Blob(jsonbin.data())), Err(_) => { bail_parse_error!("malformed JSON") } @@ -405,7 +405,7 @@ fn json_string_to_db_type( ) -> crate::Result { let mut json_string = json.to_string()?; if matches!(flag, OutputVariant::Binary) { - return Ok(OwnedValue::Blob(Rc::new(json.data()))); + return Ok(OwnedValue::Blob(json.data())); } match element_type { ElementType::ARRAY | ElementType::OBJECT => Ok(OwnedValue::Text(Text::json(json_string))), @@ -414,12 +414,12 @@ fn json_string_to_db_type( json_string.remove(json_string.len() - 1); json_string.remove(0); Ok(OwnedValue::Text(Text { - value: Rc::new(json_string.into_bytes()), + value: json_string.into_bytes(), subtype: TextSubtype::Json, })) } else { Ok(OwnedValue::Text(Text { - value: Rc::new(json_string.into_bytes()), + value: json_string.into_bytes(), subtype: TextSubtype::Text, })) } @@ -764,7 +764,7 @@ mod tests { #[test] fn test_get_json_blob_valid_jsonb() { let binary_json = vec![124, 55, 104, 101, 121, 39, 121, 111]; - let input = OwnedValue::Blob(Rc::new(binary_json)); + let input = OwnedValue::Blob(binary_json); let result = get_json(&input, None).unwrap(); if let OwnedValue::Text(result_str) = result { assert!(result_str.as_str().contains(r#"{"hey":"yo"}"#)); @@ -777,7 +777,7 @@ mod tests { #[test] fn test_get_json_blob_invalid_jsonb() { let binary_json: Vec = vec![0xA2, 0x62, 0x6B, 0x31, 0x62, 0x76]; // Incomplete binary JSON - let input = OwnedValue::Blob(Rc::new(binary_json)); + let input = OwnedValue::Blob(binary_json); let result = get_json(&input, None); println!("{:?}", result); match result { @@ -832,7 +832,7 @@ mod tests { #[test] fn test_json_array_blob_invalid() { - let blob = Register::OwnedValue(OwnedValue::Blob(Rc::new("1".as_bytes().to_vec()))); + let blob = Register::OwnedValue(OwnedValue::Blob("1".as_bytes().to_vec())); let input = vec![blob]; diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 458b9d26f..27340583c 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -3894,7 +3894,7 @@ mod tests { .unwrap(); let key = OwnedValue::Integer(*key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( - OwnedValue::Blob(Rc::new(vec![0; *size])), + OwnedValue::Blob(vec![0; *size]), )]); tracing::info!("insert key:{}", key); run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); @@ -3961,7 +3961,7 @@ mod tests { let key = OwnedValue::Integer(key); let value = ImmutableRecord::from_registers(&[Register::OwnedValue( - OwnedValue::Blob(Rc::new(vec![0; size])), + OwnedValue::Blob(vec![0; size]), )]); run_until_done(|| cursor.insert(&key, &value, true), pager.deref()).unwrap(); } @@ -4874,9 +4874,11 @@ mod tests { let page = get_page(2); let usable_space = 4096; - let record = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Blob( - Rc::new(vec![0; 3600]), - ))]); + let record = + ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Blob(vec![ + 0; + 3600 + ]))]); let mut payload: Vec = Vec::new(); fill_cell_payload( page.get_contents().page_type(), diff --git a/core/types.rs b/core/types.rs index 005236acf..ba5641b0d 100644 --- a/core/types.rs +++ b/core/types.rs @@ -34,7 +34,7 @@ pub enum TextSubtype { #[derive(Debug, Clone, PartialEq)] pub struct Text { - pub value: Rc>, + pub value: Vec, pub subtype: TextSubtype, } @@ -51,14 +51,14 @@ impl Text { pub fn new(value: &str) -> Self { Self { - value: Rc::new(value.as_bytes().to_vec()), + value: value.as_bytes().to_vec(), subtype: TextSubtype::Text, } } pub fn json(value: String) -> Self { Self { - value: Rc::new(value.into_bytes()), + value: value.into_bytes(), subtype: TextSubtype::Json, } } @@ -88,7 +88,7 @@ pub enum OwnedValue { Integer(i64), Float(f64), Text(Text), - Blob(Rc>), + Blob(Vec), } #[derive(Debug, Clone, PartialEq)] @@ -97,7 +97,7 @@ pub struct RawSlice { len: usize, } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub enum RefValue { Null, Integer(i64), @@ -120,7 +120,7 @@ impl OwnedValue { } pub fn from_blob(data: Vec) -> Self { - OwnedValue::Blob(std::rc::Rc::new(data)) + OwnedValue::Blob(data) } pub fn to_text(&self) -> Option<&str> { @@ -341,7 +341,7 @@ impl OwnedValue { let Some(blob) = v.to_blob() else { return Ok(OwnedValue::Null); }; - Ok(OwnedValue::Blob(Rc::new(blob))) + Ok(OwnedValue::Blob(blob)) } ExtValueType::Error => { let Some(err) = v.to_error_details() else { @@ -742,7 +742,6 @@ impl ImmutableRecord { size_values += value_size; } let mut header_size = size_header; - let mut header_bytes_buf: Vec = Vec::new(); if header_size <= 126 { // common case header_size += 1; @@ -879,10 +878,10 @@ impl RefValue { RefValue::Integer(i) => OwnedValue::Integer(*i), RefValue::Float(f) => OwnedValue::Float(*f), RefValue::Text(text_ref) => OwnedValue::Text(Text { - value: Rc::new(text_ref.value.to_slice().to_vec()), + value: text_ref.value.to_slice().to_vec(), subtype: text_ref.subtype.clone(), }), - RefValue::Blob(b) => OwnedValue::Blob(Rc::new(b.to_slice().to_vec())), + RefValue::Blob(b) => OwnedValue::Blob(b.to_slice().to_vec()), } } pub fn to_blob(&self) -> Option<&[u8]> { @@ -1457,7 +1456,7 @@ mod tests { #[test] fn test_serialize_blob() { - let blob = Rc::new(vec![1, 2, 3, 4, 5]); + let blob = vec![1, 2, 3, 4, 5]; let record = Record::new(vec![OwnedValue::Blob(blob.clone())]); let mut buf = Vec::new(); record.serialize(&mut buf); diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index 613cd66ea..67333c334 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -672,7 +672,7 @@ pub fn insn_to_str( 0, *dest as i32, 0, - OwnedValue::Blob(Rc::new(value.clone())), + OwnedValue::Blob(value.clone()), 0, format!( "r[{}]={} (len={})", diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 4ff6856e1..53d3095c1 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -50,7 +50,7 @@ use crate::util::{ use crate::vdbe::builder::CursorType; use crate::vdbe::insn::Insn; use crate::vector::{vector32, vector64, vector_distance_cos, vector_extract}; -use crate::{bail_constraint_error, info, CheckpointStatus}; +use crate::{bail_constraint_error, info, CheckpointStatus, RefValue}; #[cfg(feature = "json")] use crate::{ function::JsonFunc, json::get_json, json::is_json_valid, json::json_array, @@ -1180,17 +1180,38 @@ impl Program { ); let cursor = cursor.as_btree_mut(); let record = cursor.record(); - if let Some(record) = record.as_ref() { + let value = if let Some(record) = record.as_ref() { if cursor.get_null_flag() { - OwnedValue::Null + RefValue::Null } else { - record.get_value(*column).to_owned() + record.get_value(*column).clone() } } else { - OwnedValue::Null - } + RefValue::Null + }; + value }; - state.registers[*dest] = Register::OwnedValue(value); + // If we are copying a text/blob, let's try to simply update size of text if we need to allocate more and reuse. + match (&value, &mut state.registers[*dest]) { + ( + RefValue::Text(text_ref), + Register::OwnedValue(OwnedValue::Text(text_reg)), + ) => { + text_reg.value.clear(); + text_reg.value.extend_from_slice(text_ref.value.to_slice()); + } + ( + RefValue::Blob(raw_slice), + Register::OwnedValue(OwnedValue::Blob(blob_reg)), + ) => { + blob_reg.clear(); + blob_reg.extend_from_slice(raw_slice.to_slice()); + } + _ => { + let reg = &mut state.registers[*dest]; + *reg = Register::OwnedValue(value.to_owned()); + } + } } CursorType::Sorter => { let record = { @@ -1436,8 +1457,7 @@ impl Program { state.pc += 1; } Insn::Blob { value, dest } => { - state.registers[*dest] = - Register::OwnedValue(OwnedValue::Blob(Rc::new(value.clone()))); + state.registers[*dest] = Register::OwnedValue(OwnedValue::Blob(value.clone())); state.pc += 1; } Insn::RowId { cursor_id, dest } => { @@ -3908,7 +3928,7 @@ fn exec_randomblob(reg: &OwnedValue) -> OwnedValue { let mut blob: Vec = vec![0; length]; getrandom::getrandom(&mut blob).expect("Failed to generate random blob"); - OwnedValue::Blob(Rc::new(blob)) + OwnedValue::Blob(blob) } fn exec_quote(value: &OwnedValue) -> OwnedValue { @@ -4060,7 +4080,7 @@ fn exec_instr(reg: &OwnedValue, pattern: &OwnedValue) -> OwnedValue { if let (OwnedValue::Blob(reg), OwnedValue::Blob(pattern)) = (reg, pattern) { let result = reg .windows(pattern.len()) - .position(|window| window == **pattern) + .position(|window| window == *pattern) .map_or(0, |i| i + 1); return OwnedValue::Integer(result as i64); } @@ -4117,7 +4137,7 @@ fn exec_unhex(reg: &OwnedValue, ignored_chars: Option<&OwnedValue>) -> OwnedValu OwnedValue::Null => OwnedValue::Null, _ => match ignored_chars { None => match hex::decode(reg.to_string()) { - Ok(bytes) => OwnedValue::Blob(Rc::new(bytes)), + Ok(bytes) => OwnedValue::Blob(bytes), Err(_) => OwnedValue::Null, }, Some(ignore) => match ignore { @@ -4129,7 +4149,7 @@ fn exec_unhex(reg: &OwnedValue, ignored_chars: Option<&OwnedValue>) -> OwnedValu .trim_end_matches(|x| pat.contains(x)) .to_string(); match hex::decode(trimmed) { - Ok(bytes) => OwnedValue::Blob(Rc::new(bytes)), + Ok(bytes) => OwnedValue::Blob(bytes), Err(_) => OwnedValue::Null, } } @@ -4240,7 +4260,7 @@ fn exec_zeroblob(req: &OwnedValue) -> OwnedValue { OwnedValue::Text(s) => s.as_str().parse().unwrap_or(0), _ => 0, }; - OwnedValue::Blob(Rc::new(vec![0; length.max(0) as usize])) + OwnedValue::Blob(vec![0; length.max(0) as usize]) } // exec_if returns whether you should jump @@ -4264,7 +4284,7 @@ fn exec_cast(value: &OwnedValue, datatype: &str) -> OwnedValue { // Convert to TEXT first, then interpret as BLOB // TODO: handle encoding let text = value.to_string(); - OwnedValue::Blob(Rc::new(text.into_bytes())) + OwnedValue::Blob(text.into_bytes()) } // TEXT To cast a BLOB value to TEXT, the sequence of bytes that make up the BLOB is interpreted as text encoded using the database encoding. // Casting an INTEGER or REAL value into TEXT renders the value as if via sqlite3_snprintf() except that the resulting TEXT uses the encoding of the database connection. @@ -4493,7 +4513,7 @@ mod tests { let expected_len = OwnedValue::Integer(7); assert_eq!(exec_length(&input_float), expected_len); - let expected_blob = OwnedValue::Blob(Rc::new("example".as_bytes().to_vec())); + let expected_blob = OwnedValue::Blob("example".as_bytes().to_vec()); let expected_len = OwnedValue::Integer(7); assert_eq!(exec_length(&expected_blob), expected_len); } @@ -4531,7 +4551,7 @@ mod tests { let expected: OwnedValue = OwnedValue::build_text("text"); assert_eq!(exec_typeof(&input), expected); - let input = OwnedValue::Blob(Rc::new("limbo".as_bytes().to_vec())); + let input = OwnedValue::Blob("limbo".as_bytes().to_vec()); let expected: OwnedValue = OwnedValue::build_text("blob"); assert_eq!(exec_typeof(&input), expected); } @@ -4565,7 +4585,7 @@ mod tests { ); assert_eq!(exec_unicode(&OwnedValue::Null), OwnedValue::Null); assert_eq!( - exec_unicode(&OwnedValue::Blob(Rc::new("example".as_bytes().to_vec()))), + exec_unicode(&OwnedValue::Blob("example".as_bytes().to_vec())), OwnedValue::Integer(101) ); } @@ -4725,11 +4745,11 @@ mod tests { #[test] fn test_unhex() { let input = OwnedValue::build_text("6f"); - let expected = OwnedValue::Blob(Rc::new(vec![0x6f])); + let expected = OwnedValue::Blob(vec![0x6f]); assert_eq!(exec_unhex(&input, None), expected); let input = OwnedValue::build_text("6f"); - let expected = OwnedValue::Blob(Rc::new(vec![0x6f])); + let expected = OwnedValue::Blob(vec![0x6f]); assert_eq!(exec_unhex(&input, None), expected); let input = OwnedValue::build_text("611"); @@ -4737,7 +4757,7 @@ mod tests { assert_eq!(exec_unhex(&input, None), expected); let input = OwnedValue::build_text(""); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_unhex(&input, None), expected); let input = OwnedValue::build_text("61x"); @@ -5124,23 +5144,23 @@ mod tests { let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Blob(Rc::new(vec![1, 2, 3, 4, 5])); - let pattern = OwnedValue::Blob(Rc::new(vec![3, 4])); + let input = OwnedValue::Blob(vec![1, 2, 3, 4, 5]); + let pattern = OwnedValue::Blob(vec![3, 4]); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Blob(Rc::new(vec![1, 2, 3, 4, 5])); - let pattern = OwnedValue::Blob(Rc::new(vec![3, 2])); + let input = OwnedValue::Blob(vec![1, 2, 3, 4, 5]); + let pattern = OwnedValue::Blob(vec![3, 2]); let expected = OwnedValue::Integer(0); assert_eq!(exec_instr(&input, &pattern), expected); - let input = OwnedValue::Blob(Rc::new(vec![0x61, 0x62, 0x63, 0x64, 0x65])); + let input = OwnedValue::Blob(vec![0x61, 0x62, 0x63, 0x64, 0x65]); let pattern = OwnedValue::build_text("cd"); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); let input = OwnedValue::build_text("abcde"); - let pattern = OwnedValue::Blob(Rc::new(vec![0x63, 0x64])); + let pattern = OwnedValue::Blob(vec![0x63, 0x64]); let expected = OwnedValue::Integer(3); assert_eq!(exec_instr(&input, &pattern), expected); } @@ -5191,19 +5211,19 @@ mod tests { let expected = Some(OwnedValue::Integer(0)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Blob(Rc::new(b"abc".to_vec())); + let input = OwnedValue::Blob(b"abc".to_vec()); let expected = Some(OwnedValue::Null); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Blob(Rc::new(b"42".to_vec())); + let input = OwnedValue::Blob(b"42".to_vec()); let expected = Some(OwnedValue::Integer(1)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Blob(Rc::new(b"-42".to_vec())); + let input = OwnedValue::Blob(b"-42".to_vec()); let expected = Some(OwnedValue::Integer(-1)); assert_eq!(exec_sign(&input), expected); - let input = OwnedValue::Blob(Rc::new(b"0".to_vec())); + let input = OwnedValue::Blob(b"0".to_vec()); let expected = Some(OwnedValue::Integer(0)); assert_eq!(exec_sign(&input), expected); @@ -5215,39 +5235,39 @@ mod tests { #[test] fn test_exec_zeroblob() { let input = OwnedValue::Integer(0); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::Null; - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::Integer(4); - let expected = OwnedValue::Blob(Rc::new(vec![0; 4])); + let expected = OwnedValue::Blob(vec![0; 4]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::Integer(-1); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::build_text("5"); - let expected = OwnedValue::Blob(Rc::new(vec![0; 5])); + let expected = OwnedValue::Blob(vec![0; 5]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::build_text("-5"); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::build_text("text"); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); let input = OwnedValue::Float(2.6); - let expected = OwnedValue::Blob(Rc::new(vec![0; 2])); + let expected = OwnedValue::Blob(vec![0; 2]); assert_eq!(exec_zeroblob(&input), expected); - let input = OwnedValue::Blob(Rc::new(vec![1])); - let expected = OwnedValue::Blob(Rc::new(vec![])); + let input = OwnedValue::Blob(vec![1]); + let expected = OwnedValue::Blob(vec![]); assert_eq!(exec_zeroblob(&input), expected); } From ee55116ca676ce2db39bb396e36c270fee1e4532 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 15:53:03 +0100 Subject: [PATCH 04/22] return row as reference to registers --- bindings/go/rs_src/rows.rs | 6 +- bindings/go/rs_src/types.rs | 19 +++++ bindings/java/rs_src/limbo_statement.rs | 12 +-- bindings/javascript/src/lib.rs | 16 ++-- bindings/python/src/lib.rs | 12 +-- bindings/rust/src/lib.rs | 2 +- bindings/wasm/lib.rs | 12 +-- cli/app.rs | 20 ++--- core/lib.rs | 2 +- core/vdbe/mod.rs | 80 ++++++++++++++++++- simulator/generation/plan.rs | 10 +-- sqlite3/src/lib.rs | 5 +- tests/integration/fuzz/mod.rs | 13 ++- .../query_processing/test_write_path.rs | 22 +++-- tests/integration/wal/test_wal.rs | 2 +- 15 files changed, 166 insertions(+), 67 deletions(-) diff --git a/bindings/go/rs_src/rows.rs b/bindings/go/rs_src/rows.rs index a1e6382d9..2de0bf8a2 100644 --- a/bindings/go/rs_src/rows.rs +++ b/bindings/go/rs_src/rows.rs @@ -2,7 +2,7 @@ use crate::{ types::{LimboValue, ResultCode}, LimboConn, }; -use limbo_core::{LimboError, Statement, StepResult}; +use limbo_core::{LimboError, OwnedValue, Statement, StepResult}; use std::ffi::{c_char, c_void}; pub struct LimboRows<'conn> { @@ -75,8 +75,8 @@ pub extern "C" fn rows_get_value(ctx: *mut c_void, col_idx: usize) -> *const c_v let ctx = LimboRows::from_ptr(ctx); if let Some(row) = ctx.stmt.row() { - if let Some(value) = row.get_values().get(col_idx) { - return LimboValue::from_value(value).to_ptr(); + if let Ok(value) = row.get::<&OwnedValue>(col_idx) { + return LimboValue::from_owned_value(value).to_ptr(); } } std::ptr::null() diff --git a/bindings/go/rs_src/types.rs b/bindings/go/rs_src/types.rs index 7bded37ae..712306a34 100644 --- a/bindings/go/rs_src/types.rs +++ b/bindings/go/rs_src/types.rs @@ -142,6 +142,25 @@ impl LimboValue { Box::into_raw(Box::new(self)) as *const c_void } + pub fn from_owned_value(value: &limbo_core::OwnedValue) -> Self { + match value { + limbo_core::OwnedValue::Integer(i) => { + LimboValue::new(ValueType::Integer, ValueUnion::from_int(*i)) + } + limbo_core::OwnedValue::Float(r) => { + LimboValue::new(ValueType::Real, ValueUnion::from_real(*r)) + } + limbo_core::OwnedValue::Text(s) => { + LimboValue::new(ValueType::Text, ValueUnion::from_str(s.as_str())) + } + limbo_core::OwnedValue::Blob(b) => { + LimboValue::new(ValueType::Blob, ValueUnion::from_bytes(b.as_slice())) + } + limbo_core::OwnedValue::Null => { + LimboValue::new(ValueType::Null, ValueUnion::from_null()) + } + } + } pub fn from_value(value: &limbo_core::RefValue) -> Self { match value { limbo_core::RefValue::Integer(i) => { diff --git a/bindings/java/rs_src/limbo_statement.rs b/bindings/java/rs_src/limbo_statement.rs index fbb33fa79..b28ff55b1 100644 --- a/bindings/java/rs_src/limbo_statement.rs +++ b/bindings/java/rs_src/limbo_statement.rs @@ -104,17 +104,17 @@ fn row_to_obj_array<'local>( ) -> Result> { let obj_array = env.new_object_array(row.len() as i32, "java/lang/Object", JObject::null())?; - for (i, value) in row.get_values().iter().enumerate() { + for (i, value) in row.get_values().enumerate() { let obj = match value { - limbo_core::RefValue::Null => JObject::null(), - limbo_core::RefValue::Integer(i) => { + limbo_core::OwnedValue::Null => JObject::null(), + limbo_core::OwnedValue::Integer(i) => { env.new_object("java/lang/Long", "(J)V", &[JValue::Long(*i)])? } - limbo_core::RefValue::Float(f) => { + limbo_core::OwnedValue::Float(f) => { env.new_object("java/lang/Double", "(D)V", &[JValue::Double(*f)])? } - limbo_core::RefValue::Text(s) => env.new_string(s.as_str())?.into(), - limbo_core::RefValue::Blob(b) => env.byte_array_from_slice(&b.to_slice())?.into(), + limbo_core::OwnedValue::Text(s) => env.new_string(s.as_str())?.into(), + limbo_core::OwnedValue::Blob(b) => env.byte_array_from_slice(&b.as_slice())?.into(), }; if let Err(e) = env.set_object_array_element(&obj_array, i as i32, obj) { eprintln!("Error on parsing row: {:?}", e); diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 2db1fec48..614b17677 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -76,7 +76,7 @@ impl Statement { Ok(limbo_core::StepResult::Row) => { let row = stmt.row().unwrap(); let mut obj = env.create_object()?; - for (idx, value) in row.get_values().iter().enumerate() { + for (idx, value) in row.get_values().enumerate() { let key = stmt.get_column_name(idx); let js_value = to_js_value(&env, value); obj.set_named_property(&key, js_value)?; @@ -92,14 +92,14 @@ impl Statement { } } -fn to_js_value(env: &napi::Env, value: &limbo_core::RefValue) -> JsUnknown { +fn to_js_value(env: &napi::Env, value: &limbo_core::OwnedValue) -> JsUnknown { match value { - limbo_core::RefValue::Null => env.get_null().unwrap().into_unknown(), - limbo_core::RefValue::Integer(i) => env.create_int64(*i).unwrap().into_unknown(), - limbo_core::RefValue::Float(f) => env.create_double(*f).unwrap().into_unknown(), - limbo_core::RefValue::Text(s) => env.create_string(s.as_str()).unwrap().into_unknown(), - limbo_core::RefValue::Blob(b) => { - env.create_buffer_copy(b.to_slice()).unwrap().into_unknown() + limbo_core::OwnedValue::Null => env.get_null().unwrap().into_unknown(), + limbo_core::OwnedValue::Integer(i) => env.create_int64(*i).unwrap().into_unknown(), + limbo_core::OwnedValue::Float(f) => env.create_double(*f).unwrap().into_unknown(), + limbo_core::OwnedValue::Text(s) => env.create_string(s.as_str()).unwrap().into_unknown(), + limbo_core::OwnedValue::Blob(b) => { + env.create_buffer_copy(b.as_slice()).unwrap().into_unknown() } } } diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index ff963f1db..851136b50 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -326,11 +326,13 @@ fn row_to_py(py: Python, row: &limbo_core::Row) -> Result { let mut py_values = Vec::new(); for value in row.get_values() { match value { - limbo_core::RefValue::Null => py_values.push(py.None()), - limbo_core::RefValue::Integer(i) => py_values.push(i.into_pyobject(py)?.into()), - limbo_core::RefValue::Float(f) => py_values.push(f.into_pyobject(py)?.into()), - limbo_core::RefValue::Text(s) => py_values.push(s.as_str().into_pyobject(py)?.into()), - limbo_core::RefValue::Blob(b) => py_values.push(PyBytes::new(py, b.to_slice()).into()), + limbo_core::OwnedValue::Null => py_values.push(py.None()), + limbo_core::OwnedValue::Integer(i) => py_values.push(i.into_pyobject(py)?.into()), + limbo_core::OwnedValue::Float(f) => py_values.push(f.into_pyobject(py)?.into()), + limbo_core::OwnedValue::Text(s) => py_values.push(s.as_str().into_pyobject(py)?.into()), + limbo_core::OwnedValue::Blob(b) => { + py_values.push(PyBytes::new(py, b.as_slice()).into()) + } } } Ok(PyTuple::new(py, &py_values) diff --git a/bindings/rust/src/lib.rs b/bindings/rust/src/lib.rs index 781a40c94..fe653fdb5 100644 --- a/bindings/rust/src/lib.rs +++ b/bindings/rust/src/lib.rs @@ -212,7 +212,7 @@ impl Rows { Ok(limbo_core::StepResult::Row) => { let row = stmt.row().unwrap(); Ok(Some(Row { - values: row.get_values().iter().map(|v| v.to_owned()).collect(), + values: row.get_values().map(|v| v.to_owned()).collect(), })) } _ => Ok(None), diff --git a/bindings/wasm/lib.rs b/bindings/wasm/lib.rs index 330d4dd68..3a5819efc 100644 --- a/bindings/wasm/lib.rs +++ b/bindings/wasm/lib.rs @@ -177,10 +177,10 @@ impl Statement { } } -fn to_js_value(value: &limbo_core::RefValue) -> JsValue { +fn to_js_value(value: &limbo_core::OwnedValue) -> JsValue { match value { - limbo_core::RefValue::Null => JsValue::null(), - limbo_core::RefValue::Integer(i) => { + limbo_core::OwnedValue::Null => JsValue::null(), + limbo_core::OwnedValue::Integer(i) => { let i = *i; if i >= i32::MIN as i64 && i <= i32::MAX as i64 { JsValue::from(i as i32) @@ -188,9 +188,9 @@ fn to_js_value(value: &limbo_core::RefValue) -> JsValue { JsValue::from(i) } } - limbo_core::RefValue::Float(f) => JsValue::from(*f), - limbo_core::RefValue::Text(t) => JsValue::from_str(t.as_str()), - limbo_core::RefValue::Blob(b) => js_sys::Uint8Array::from(b.to_slice()).into(), + limbo_core::OwnedValue::Float(f) => JsValue::from(*f), + limbo_core::OwnedValue::Text(t) => JsValue::from_str(t.as_str()), + limbo_core::OwnedValue::Blob(b) => js_sys::Uint8Array::from(b.as_slice()).into(), } } diff --git a/cli/app.rs b/cli/app.rs index c85f20d61..f6d53ee19 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -5,7 +5,7 @@ use crate::{ opcodes_dictionary::OPCODE_DESCRIPTIONS, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; -use limbo_core::{Database, LimboError, RefValue, Statement, StepResult}; +use limbo_core::{Database, LimboError, OwnedValue, RefValue, Statement, StepResult}; use clap::{Parser, ValueEnum}; use rustyline::{history::DefaultHistory, Editor}; @@ -711,7 +711,7 @@ impl<'a> Limbo<'a> { match rows.step() { Ok(StepResult::Row) => { let row = rows.row().unwrap(); - for (i, value) in row.get_values().iter().enumerate() { + for (i, value) in row.get_values().enumerate() { if i > 0 { let _ = self.writer.write(b"|"); } @@ -767,21 +767,21 @@ impl<'a> Limbo<'a> { let record = rows.row().unwrap(); let mut row = Row::new(); row.max_height(1); - for (idx, value) in record.get_values().iter().enumerate() { + for (idx, value) in record.get_values().enumerate() { let (content, alignment) = match value { - RefValue::Null => { + OwnedValue::Null => { (self.opts.null_value.clone(), CellAlignment::Left) } - RefValue::Integer(_) => { + OwnedValue::Integer(_) => { (format!("{}", value), CellAlignment::Right) } - RefValue::Float(_) => { + OwnedValue::Float(_) => { (format!("{}", value), CellAlignment::Right) } - RefValue::Text(_) => { + OwnedValue::Text(_) => { (format!("{}", value), CellAlignment::Left) } - RefValue::Blob(_) => { + OwnedValue::Blob(_) => { (format!("{}", value), CellAlignment::Left) } }; @@ -849,7 +849,7 @@ impl<'a> Limbo<'a> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let Some(RefValue::Text(schema)) = row.get_values().first() { + if let Ok(OwnedValue::Text(schema)) = row.get::<&OwnedValue>(0) { let _ = self.write_fmt(format_args!("{};", schema.as_str())); found = true; } @@ -907,7 +907,7 @@ impl<'a> Limbo<'a> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let Some(RefValue::Text(table)) = row.get_values().first() { + if let Ok(OwnedValue::Text(table)) = row.get::<&OwnedValue>(0) { tables.push_str(table.as_str()); tables.push(' '); } diff --git a/core/lib.rs b/core/lib.rs index 0314d9b81..f1a1b37a0 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -597,7 +597,7 @@ impl Statement { } } -pub type Row = types::ImmutableRecord; +pub type Row = vdbe::Row; pub type StepResult = vdbe::StepResult; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 53d3095c1..f530a33fa 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -239,12 +239,19 @@ pub enum Register { Record(ImmutableRecord), } +/// A row is a the list of registers that hold the values for a filtered row. This row is a pointer, therefore +/// after stepping again, row will be invalidated to be sure it doesn't point to somewhere unexpected. +pub struct Row { + values: *const Register, + count: usize, +} + /// The program state describes the environment in which the program executes. pub struct ProgramState { pub pc: InsnReference, cursors: RefCell>>, registers: Vec, - pub(crate) result_row: Option, + pub(crate) result_row: Option, last_compare: Option, deferred_seek: Option<(CursorID, CursorID)>, ended_coroutine: Bitfield<4>, // flag to indicate that a coroutine has ended (key is the yield register. currently we assume that the yield register is always between 0-255, YOLO) @@ -403,6 +410,8 @@ impl Program { if state.is_interrupted() { return Ok(StepResult::Interrupt); } + // invalidate row + let _ = state.result_row.take(); let insn = &self.insns[state.pc as usize]; trace_insn(self, state.pc as InsnReference, insn); match insn { @@ -1257,8 +1266,12 @@ impl Program { state.pc += 1; } Insn::ResultRow { start_reg, count } => { - let record = make_record(&state.registers, start_reg, count); - state.result_row = Some(record); + let row = Row { + values: &state.registers[*start_reg] as *const Register, + count: *count, + }; + + state.result_row = Some(row); state.pc += 1; return Ok(StepResult::Row); } @@ -4486,6 +4499,67 @@ fn exec_math_log(arg: &OwnedValue, base: Option<&OwnedValue>) -> OwnedValue { OwnedValue::Float(result) } +pub trait FromValueRow<'a> { + fn from_value(value: &'a OwnedValue) -> Result + where + Self: Sized + 'a; +} + +impl<'a> FromValueRow<'a> for i64 { + fn from_value(value: &'a OwnedValue) -> Result { + match value { + OwnedValue::Integer(i) => Ok(*i), + _ => Err(LimboError::ConversionError("Expected integer value".into())), + } + } +} + +impl<'a> FromValueRow<'a> for String { + fn from_value(value: &'a OwnedValue) -> Result { + match value { + OwnedValue::Text(s) => Ok(s.as_str().to_string()), + _ => Err(LimboError::ConversionError("Expected text value".into())), + } + } +} + +impl<'a> FromValueRow<'a> for &'a str { + fn from_value(value: &'a OwnedValue) -> Result { + match value { + OwnedValue::Text(s) => Ok(s.as_str()), + _ => Err(LimboError::ConversionError("Expected text value".into())), + } + } +} + +impl<'a> FromValueRow<'a> for &'a OwnedValue { + fn from_value(value: &'a OwnedValue) -> Result { + Ok(value) + } +} + +impl Row { + pub fn get<'a, T: FromValueRow<'a> + 'a>(&'a self, idx: usize) -> Result { + let value = unsafe { self.values.add(idx).as_ref().unwrap() }; + let value = match value { + Register::OwnedValue(owned_value) => owned_value, + _ => unreachable!("a row should be formed of values only"), + }; + T::from_value(value) + } + + pub fn get_values(&self) -> impl Iterator { + let values = unsafe { std::slice::from_raw_parts(self.values, self.count) }; + dbg!(&values); + // This should be ownedvalues + // TODO: add check for this + values.iter().map(|v| v.get_owned_value()) + } + + pub fn len(&self) -> usize { + self.count + } +} #[cfg(test)] mod tests { use crate::vdbe::{exec_replace, Register}; diff --git a/simulator/generation/plan.rs b/simulator/generation/plan.rs index 2799976a2..ecad92344 100644 --- a/simulator/generation/plan.rs +++ b/simulator/generation/plan.rs @@ -494,13 +494,13 @@ impl Interaction { let mut r = Vec::new(); for v in row.get_values() { let v = match v { - limbo_core::RefValue::Null => Value::Null, - limbo_core::RefValue::Integer(i) => Value::Integer(*i), - limbo_core::RefValue::Float(f) => Value::Float(*f), - limbo_core::RefValue::Text(t) => { + limbo_core::OwnedValue::Null => Value::Null, + limbo_core::OwnedValue::Integer(i) => Value::Integer(*i), + limbo_core::OwnedValue::Float(f) => Value::Float(*f), + limbo_core::OwnedValue::Text(t) => { Value::Text(t.as_str().to_string()) } - limbo_core::RefValue::Blob(b) => Value::Blob(b.to_slice().to_vec()), + limbo_core::OwnedValue::Blob(b) => Value::Blob(b.to_vec()), }; r.push(v); } diff --git a/sqlite3/src/lib.rs b/sqlite3/src/lib.rs index 85799c364..f54e6bdb2 100644 --- a/sqlite3/src/lib.rs +++ b/sqlite3/src/lib.rs @@ -1,6 +1,7 @@ #![allow(clippy::missing_safety_doc)] #![allow(non_camel_case_types)] +use limbo_core::OwnedValue; use log::trace; use std::ffi::{self, CStr, CString}; @@ -636,8 +637,8 @@ pub unsafe extern "C" fn sqlite3_column_text( Some(row) => row, None => return std::ptr::null(), }; - match row.get_values().get(idx as usize) { - Some(limbo_core::RefValue::Text(text)) => text.as_str().as_ptr(), + match row.get::<&OwnedValue>(idx as usize) { + Ok(limbo_core::OwnedValue::Text(text)) => text.as_str().as_ptr(), _ => std::ptr::null(), } } diff --git a/tests/integration/fuzz/mod.rs b/tests/integration/fuzz/mod.rs index 881ab4e54..f776fc9a7 100644 --- a/tests/integration/fuzz/mod.rs +++ b/tests/integration/fuzz/mod.rs @@ -72,17 +72,14 @@ mod tests { }; let row = row .get_values() - .iter() .map(|x| match x { - limbo_core::RefValue::Null => rusqlite::types::Value::Null, - limbo_core::RefValue::Integer(x) => rusqlite::types::Value::Integer(*x), - limbo_core::RefValue::Float(x) => rusqlite::types::Value::Real(*x), - limbo_core::RefValue::Text(x) => { + limbo_core::OwnedValue::Null => rusqlite::types::Value::Null, + limbo_core::OwnedValue::Integer(x) => rusqlite::types::Value::Integer(*x), + limbo_core::OwnedValue::Float(x) => rusqlite::types::Value::Real(*x), + limbo_core::OwnedValue::Text(x) => { rusqlite::types::Value::Text(x.as_str().to_string()) } - limbo_core::RefValue::Blob(x) => { - rusqlite::types::Value::Blob(x.to_slice().to_vec()) - } + limbo_core::OwnedValue::Blob(x) => rusqlite::types::Value::Blob(x.to_vec()), }) .collect(); rows.push(row); diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 0dd438f2e..543e0678a 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -1,6 +1,6 @@ use crate::common::{self, maybe_setup_tracing}; use crate::common::{compare_string, do_flush, TempDatabase}; -use limbo_core::{Connection, RefValue, StepResult}; +use limbo_core::{Connection, OwnedValue, RefValue, StepResult}; use log::debug; use std::rc::Rc; @@ -192,10 +192,10 @@ fn test_sequential_write() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_values().first().expect("missing id"); + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); let id = match first_value { - limbo_core::RefValue::Integer(i) => *i as i32, - limbo_core::RefValue::Float(f) => *f as i32, + limbo_core::OwnedValue::Integer(i) => *i as i32, + limbo_core::OwnedValue::Float(f) => *f as i32, _ => unreachable!(), }; assert_eq!(current_read_index, id); @@ -258,9 +258,9 @@ fn test_regression_multi_row_insert() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_values().first().expect("missing id"); + let first_value = row.get::<&OwnedValue>(0).expect("missing id"); let id = match first_value { - RefValue::Float(f) => *f as i32, + OwnedValue::Float(f) => *f as i32, _ => panic!("expected float"), }; actual_ids.push(id); @@ -304,7 +304,10 @@ fn test_statement_reset() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(*row.get_value(0), limbo_core::OwnedValue::Integer(1)); + assert_eq!( + *row.get::<&OwnedValue>(0), + limbo_core::OwnedValue::Integer(1) + ); break; } StepResult::IO => tmp_db.io.run_once()?, @@ -318,7 +321,10 @@ fn test_statement_reset() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(*row.get_value(0), limbo_core::OwnedValue::Integer(1)); + assert_eq!( + *row.get::<&OwnedValue>(0), + limbo_core::OwnedValue::Integer(1) + ); break; } StepResult::IO => tmp_db.io.run_once()?, diff --git a/tests/integration/wal/test_wal.rs b/tests/integration/wal/test_wal.rs index 4cf829919..53790a7df 100644 --- a/tests/integration/wal/test_wal.rs +++ b/tests/integration/wal/test_wal.rs @@ -158,7 +158,7 @@ pub(crate) fn execute_and_get_ints( let row = stmt.row().unwrap(); for value in row.get_values() { let out = match value { - limbo_core::RefValue::Integer(i) => i, + limbo_core::OwnedValue::Integer(i) => i, _ => { return Err(LimboError::ConversionError(format!( "cannot convert {value} to int" From 3317195a53d07c446ed93dbf0325dd851ae248b9 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 17:37:43 +0100 Subject: [PATCH 05/22] Reusable ImmutableRecord -> allocation reduction Improve allocation usage from ImmutableRecords by reusing them. ImmutableRecord is basically a contigous piece of memory that holds the current record. If we move to some other record we usually deallocate the previous one and allocate a new one -- obviously this is wasteful. With this commit we will reuse the ImmutableRecord to allow payload to be extended if needed or reused if we can, making it faster to iterate records basically. --- cli/app.rs | 1 - core/storage/btree.rs | 125 ++++++++++++++++++++------------- core/storage/sqlite3_ondisk.rs | 2 +- core/types.rs | 8 ++- 4 files changed, 83 insertions(+), 53 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index f6d53ee19..4e3167200 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -319,7 +319,6 @@ impl<'a> Limbo<'a> { |row: &limbo_core::Row| -> Result<(), LimboError> { let values = row .get_values() - .iter() .zip(value_types.iter()) .map(|(value, value_type)| { // If the type affinity is TEXT, replace each single diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 27340583c..a36f5cd5a 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -246,7 +246,6 @@ pub struct BTreeCursor { root_page: usize, /// Rowid and record are stored before being consumed. rowid: Cell>, - record: RefCell>, null_flag: bool, /// Index internal pages are consumed on the way up, so we store going upwards flag in case /// we just moved to a parent page and the parent page is an internal index page which requires @@ -260,6 +259,9 @@ pub struct BTreeCursor { /// Page stack used to traverse the btree. /// Each cursor has a stack because each cursor traverses the btree independently. stack: PageStack, + /// Reusable immutable record, used to allow better allocation strategy. + reusable_immutable_record: RefCell>, + empty_record: RefCell, } /// Stack of pages representing the tree traversal order. @@ -297,7 +299,6 @@ impl BTreeCursor { pager, root_page, rowid: Cell::new(None), - record: RefCell::new(None), null_flag: false, going_upwards: false, state: CursorState::None, @@ -307,6 +308,8 @@ impl BTreeCursor { cell_indices: RefCell::new([0; BTCURSOR_MAX_DEPTH + 1]), stack: RefCell::new([const { None }; BTCURSOR_MAX_DEPTH + 1]), }, + reusable_immutable_record: RefCell::new(None), + empty_record: RefCell::new(true), } } @@ -326,7 +329,7 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result, Option)>> { + fn get_prev_record(&mut self) -> Result)>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -343,7 +346,7 @@ impl BTreeCursor { self.stack.pop(); } else { // moved to begin of btree - return Ok(CursorResult::Ok((None, None))); + return Ok(CursorResult::Ok((None))); } } // continue to next loop to get record from the new page @@ -401,7 +404,7 @@ impl BTreeCursor { crate::storage::sqlite3_ondisk::read_record(_payload)? }; self.stack.retreat(); - return Ok(CursorResult::Ok((Some(_rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(_rowid))); } BTreeCell::IndexInteriorCell(_) => todo!(), BTreeCell::IndexLeafCell(_) => todo!(), @@ -474,18 +477,18 @@ impl BTreeCursor { fn get_next_record( &mut self, predicate: Option<(SeekKey<'_>, SeekOp)>, - ) -> Result, Option)>> { + ) -> Result>> { if let Some(mv_cursor) = &self.mv_cursor { let mut mv_cursor = mv_cursor.borrow_mut(); let rowid = mv_cursor.current_row_id(); match rowid { Some(rowid) => { let record = mv_cursor.current_row().unwrap().unwrap(); - let record = crate::storage::sqlite3_ondisk::read_record(&record.data)?; + crate::storage::sqlite3_ondisk::read_record(&record.data)?; mv_cursor.forward(); - return Ok(CursorResult::Ok((Some(rowid.row_id), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid.row_id))); } - None => return Ok(CursorResult::Ok((None, None))), + None => return Ok(CursorResult::Ok(None)), } } loop { @@ -519,7 +522,7 @@ impl BTreeCursor { self.stack.pop(); continue; } else { - return Ok(CursorResult::Ok((None, None))); + return Ok(CursorResult::Ok(None)); } } } @@ -534,7 +537,7 @@ impl BTreeCursor { self.stack.pop(); continue; } else { - return Ok(CursorResult::Ok((None, None))); + return Ok(CursorResult::Ok(None)); } } assert!(cell_idx < contents.cell_count()); @@ -573,7 +576,7 @@ impl BTreeCursor { crate::storage::sqlite3_ondisk::read_record(_payload)? }; self.stack.advance(); - return Ok(CursorResult::Ok((Some(*_rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(*_rowid))); } BTreeCell::IndexInteriorCell(IndexInteriorCell { payload, @@ -599,29 +602,34 @@ impl BTreeCursor { self.going_upwards = false; self.stack.advance(); if predicate.is_none() { - let rowid = match record.last_value() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; - return Ok(CursorResult::Ok((Some(rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid))); } let (key, op) = predicate.as_ref().unwrap(); let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable(&record.get_values(), index_key.get_values()); + let order = compare_immutable( + &self.get_immutable_record().as_ref().unwrap().get_values(), + index_key.get_values(), + ); let found = match op { SeekOp::GT => order.is_gt(), SeekOp::GE => order.is_ge(), SeekOp::EQ => order.is_eq(), }; if found { - let rowid = match record.last_value() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; - return Ok(CursorResult::Ok((Some(rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid))); } else { continue; } @@ -643,28 +651,33 @@ impl BTreeCursor { self.stack.advance(); if predicate.is_none() { - let rowid = match record.last_value() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; - return Ok(CursorResult::Ok((Some(rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid))); } let (key, op) = predicate.as_ref().unwrap(); let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let order = compare_immutable(&record.get_values(), index_key.get_values()); + let order = compare_immutable( + &self.get_immutable_record().as_ref().unwrap().get_values(), + index_key.get_values(), + ); let found = match op { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), SeekOp::EQ => order.is_le(), }; if found { - let rowid = match record.last_value() { + let rowid = match self.get_immutable_record().as_ref().unwrap().last_value() + { Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; - return Ok(CursorResult::Ok((Some(rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid))); } else { continue; } @@ -677,11 +690,7 @@ impl BTreeCursor { /// This may be used to seek to a specific record in a point query (e.g. SELECT * FROM table WHERE col = 10) /// or e.g. find the first record greater than the seek key in a range query (e.g. SELECT * FROM table WHERE col > 10). /// We don't include the rowid in the comparison and that's why the last value from the record is not included. - fn do_seek( - &mut self, - key: SeekKey<'_>, - op: SeekOp, - ) -> Result, Option)>> { + fn do_seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result>> { return_if_io!(self.move_to(key.clone(), op.clone())); { @@ -729,7 +738,7 @@ impl BTreeCursor { crate::storage::sqlite3_ondisk::read_record(payload)? }; self.stack.advance(); - return Ok(CursorResult::Ok((Some(*cell_rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(*cell_rowid))); } else { self.stack.advance(); } @@ -751,6 +760,8 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record(payload)? }; + let record = self.get_immutable_record(); + let record = record.as_ref().unwrap(); let order = compare_immutable( &record.get_values().as_slice()[..record.len() - 1], &index_key.get_values().as_slice()[..], @@ -766,7 +777,7 @@ impl BTreeCursor { Some(RefValue::Integer(rowid)) => *rowid as u64, _ => unreachable!("index cells should have an integer rowid"), }; - return Ok(CursorResult::Ok((Some(rowid), Some(record)))); + return Ok(CursorResult::Ok(Some(rowid))); } } cell_type => { @@ -796,7 +807,7 @@ impl BTreeCursor { return self.get_next_record(Some((key, op))); } - Ok(CursorResult::Ok((None, None))) + Ok(CursorResult::Ok(None)) } /// Move the cursor to the root page of the btree. @@ -928,7 +939,7 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( payload, *next_page, @@ -937,7 +948,10 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record(payload)? }; - let order = compare_immutable(index_key.get_values(), record.get_values()); + let order = compare_immutable( + index_key.get_values(), + self.get_immutable_record().as_ref().unwrap().get_values(), + ); let target_leaf_page_is_in_the_left_subtree = match cmp { SeekOp::GT => order.is_lt(), SeekOp::GE => order.is_le(), @@ -1853,19 +1867,18 @@ impl BTreeCursor { pub fn seek_to_last(&mut self) -> Result> { return_if_io!(self.move_to_rightmost()); - let (rowid, record) = return_if_io!(self.get_next_record(None)); + let rowid = return_if_io!(self.get_next_record(None)); if rowid.is_none() { let is_empty = return_if_io!(self.is_empty_table()); assert!(is_empty); return Ok(CursorResult::Ok(())); } self.rowid.replace(rowid); - self.record.replace(record); Ok(CursorResult::Ok(())) } pub fn is_empty(&self) -> bool { - self.record.borrow().is_none() + *self.empty_record.borrow() } pub fn root_page(&self) -> usize { @@ -1874,15 +1887,15 @@ impl BTreeCursor { pub fn rewind(&mut self) -> Result> { if self.mv_cursor.is_some() { - let (rowid, record) = return_if_io!(self.get_next_record(None)); + let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); - self.record.replace(record); + self.empty_record.replace(rowid.is_none()); } else { self.move_to_root(); - let (rowid, record) = return_if_io!(self.get_next_record(None)); + let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); - self.record.replace(record); + self.empty_record.replace(rowid.is_none()); } Ok(CursorResult::Ok(())) } @@ -1896,18 +1909,18 @@ impl BTreeCursor { } pub fn next(&mut self) -> Result> { - let (rowid, record) = return_if_io!(self.get_next_record(None)); + let rowid = return_if_io!(self.get_next_record(None)); self.rowid.replace(rowid); - self.record.replace(record); + self.empty_record.replace(rowid.is_none()); Ok(CursorResult::Ok(())) } pub fn prev(&mut self) -> Result> { assert!(self.mv_cursor.is_none()); match self.get_prev_record()? { - CursorResult::Ok((rowid, record)) => { + CursorResult::Ok(rowid) => { self.rowid.replace(rowid); - self.record.replace(record); + self.empty_record.replace(rowid.is_none()); Ok(CursorResult::Ok(())) } CursorResult::IO => Ok(CursorResult::IO), @@ -1929,14 +1942,14 @@ impl BTreeCursor { pub fn seek(&mut self, key: SeekKey<'_>, op: SeekOp) -> Result> { assert!(self.mv_cursor.is_none()); - let (rowid, record) = return_if_io!(self.do_seek(key, op)); + let rowid = return_if_io!(self.do_seek(key, op)); self.rowid.replace(rowid); - self.record.replace(record); + self.empty_record.replace(rowid.is_none()); Ok(CursorResult::Ok(rowid.is_some())) } pub fn record(&self) -> Ref> { - self.record.borrow() + self.reusable_immutable_record.borrow() } pub fn insert( @@ -2689,6 +2702,21 @@ impl BTreeCursor { } Ok(CursorResult::Ok(())) } + + fn get_lazy_immutable_record(&self) -> std::cell::RefMut<'_, Option> { + if self.reusable_immutable_record.borrow().is_none() { + let record = ImmutableRecord { + payload: Vec::with_capacity(4096), + values: Vec::with_capacity(10), + }; + self.reusable_immutable_record.replace(Some(record)); + } + self.reusable_immutable_record.borrow_mut() + } + + fn get_immutable_record(&self) -> std::cell::RefMut<'_, Option> { + self.reusable_immutable_record.borrow_mut() + } } impl PageStack { @@ -4989,7 +5017,7 @@ mod tests { let key = OwnedValue::Integer(i as i64); let value = ImmutableRecord::from_registers(&[Register::OwnedValue(OwnedValue::Text(Text { - value: Rc::new(huge_texts[i].as_bytes().to_vec()), + value: huge_texts[i].as_bytes().to_vec(), subtype: crate::types::TextSubtype::Text, }))]); tracing::trace!("before insert {}", i); @@ -5014,8 +5042,7 @@ mod tests { let mut cursor = BTreeCursor::new(None, pager.clone(), root_page); cursor.move_to_root(); for i in 0..iterations { - let (rowid, _) = - run_until_done(|| cursor.get_next_record(None), pager.deref()).unwrap(); + let rowid = run_until_done(|| cursor.get_next_record(None), pager.deref()).unwrap(); assert_eq!(rowid.unwrap(), i as u64, "got!=expected"); } } diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 02c5b63d2..d3dca321a 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1079,7 +1079,7 @@ pub fn read_record(payload: &[u8]) -> Result { } Ok(ImmutableRecord { - payload: std::pin::Pin::new(payload), + payload: payload, values, }) } diff --git a/core/types.rs b/core/types.rs index ba5641b0d..d0501f671 100644 --- a/core/types.rs +++ b/core/types.rs @@ -628,9 +628,13 @@ impl<'a> FromValue<'a> for &'a str { /// This struct serves the purpose of not allocating multiple vectors of bytes if not needed. /// A value in a record that has already been serialized can stay serialized and what this struct offsers /// is easy acces to each value which point to the payload. +/// The name might be contradictory as it is immutable in the sense that you cannot modify the values without modifying the payload. #[derive(Debug, Eq, Ord, PartialEq, PartialOrd)] pub struct ImmutableRecord { - pub payload: Pin>, // << point to this + // We have to be super careful with this buffer since we make values point to the payload we need to take care reallocations + // happen in a controlled manner. If we realocate with values that should be correct, they will now point to undefined data. + // We don't use pin here because it would make it imposible to reuse the buffer if we need to push a new record in the same struct. + pub payload: Vec, pub values: Vec, } @@ -814,7 +818,7 @@ impl ImmutableRecord { } Self { - payload: Pin::new(buf), + payload: buf, values, } } From 105b4212747d56f4f489d7ff9fda88141f458315 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Thu, 27 Mar 2025 17:40:09 +0100 Subject: [PATCH 06/22] make read_record, read_varint and read_value faster We make read_record faster by not allocating Vec if not needed. This is why I introduced a simple `SmallVec` that will have a stack allocated list for the simplest workloads, and a heap allocated if we were to require more stuff. Both read_varint and read_value, at least in my mac m4, were not inlined. Since these functions are called so many times it made sense to inline them to avoid call overhead. With this I saw something like 20% improvement over previous commit in my m4. --- core/storage/sqlite3_ondisk.rs | 71 ++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index d3dca321a..5e4692ffd 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -51,6 +51,7 @@ use crate::types::{ImmutableRecord, RawSlice, RefValue, TextRef, TextSubtype}; use crate::{File, Result}; use parking_lot::RwLock; use std::cell::RefCell; +use std::mem::MaybeUninit; use std::pin::Pin; use std::rc::Rc; use std::sync::Arc; @@ -1053,17 +1054,56 @@ pub fn validate_serial_type(value: u64) -> Result { } } -pub fn read_record(payload: &[u8]) -> Result { +struct SmallVec { + pub data: [std::mem::MaybeUninit; 64], + pub len: usize, + pub extra_data: Option>, +} + +impl SmallVec { + pub fn new() -> Self { + Self { + data: unsafe { std::mem::MaybeUninit::uninit().assume_init() }, + len: 0, + extra_data: None, + } + } + + pub fn push(&mut self, value: T) { + if self.len < self.data.len() { + self.data[self.len] = MaybeUninit::new(value); + self.len += 1; + } else { + if self.extra_data.is_none() { + self.extra_data = Some(Vec::new()); + } + self.extra_data.as_mut().unwrap().push(value); + self.len += 1; + } + } + + pub fn has_extra_data(&self) -> bool { + self.len >= self.len + } +} + +pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Result<()> { + // Let's clear previous use + reuse_immutable.payload.clear(); + reuse_immutable.values.clear(); + // Copy payload to ImmutableRecord in order to make RefValue that point to this new buffer. + // By reusing this immutable record we make it less allocation expensive. + reuse_immutable.payload.extend_from_slice(payload); + let mut pos = 0; let (header_size, nr) = read_varint(payload)?; assert!((header_size as usize) >= nr); let mut header_size = (header_size as usize) - nr; - let payload = payload.to_vec(); pos += nr; - let mut serial_types = Vec::with_capacity(header_size); + let mut serial_types = SmallVec::new(); while header_size > 0 { - let (serial_type, nr) = read_varint(&payload[pos..])?; + let (serial_type, nr) = read_varint(&reuse_immutable.payload[pos..])?; let serial_type = validate_serial_type(serial_type)?; serial_types.push(serial_type); pos += nr; @@ -1071,21 +1111,27 @@ pub fn read_record(payload: &[u8]) -> Result { header_size -= nr; } - let mut values = Vec::with_capacity(serial_types.len()); - for &serial_type in &serial_types { - let (value, n) = read_value(&payload[pos..], serial_type)?; + for &serial_type in &serial_types.data[..serial_types.len.min(serial_types.data.len())] { + let (value, n) = read_value(&reuse_immutable.payload[pos..], unsafe { + *serial_type.as_ptr() + })?; pos += n; - values.push(value); + reuse_immutable.values.push(value); + } + if let Some(extra) = serial_types.extra_data.as_ref() { + for serial_type in extra { + let (value, n) = read_value(&reuse_immutable.payload[pos..], *serial_type)?; + pos += n; + reuse_immutable.values.push(value); + } } - Ok(ImmutableRecord { - payload: payload, - values, - }) + Ok(()) } /// Reads a value that might reference the buffer it is reading from. Be sure to store RefValue with the buffer /// always. +#[inline(always)] pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usize)> { if serial_type.is_null() { return Ok((RefValue::Null, 0)); @@ -1223,6 +1269,7 @@ pub fn read_value(buf: &[u8], serial_type: SerialType) -> Result<(RefValue, usiz crate::bail_corrupt_error!("Invalid serial type: {}", serial_type) } +#[inline(always)] pub fn read_varint(buf: &[u8]) -> Result<(u64, usize)> { let mut v: u64 = 0; for i in 0..8 { From e504262bd5bc0bee0e9c5427e665d60beaca7ebd Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 28 Mar 2025 13:24:24 +0100 Subject: [PATCH 07/22] fix rebase --- core/storage/btree.rs | 65 ++++++++++++++++++++++++++-------- core/storage/sqlite3_ondisk.rs | 2 +- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index a36f5cd5a..cf24b3105 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -401,7 +401,10 @@ impl BTreeCursor { let record = if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read(_payload, next_page, payload_size)) } else { - crate::storage::sqlite3_ondisk::read_record(_payload)? + crate::storage::sqlite3_ondisk::read_record( + _payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; self.stack.retreat(); return Ok(CursorResult::Ok(Some(_rowid))); @@ -419,7 +422,7 @@ impl BTreeCursor { payload: &'static [u8], start_next_page: u32, payload_size: u64, - ) -> Result> { + ) -> Result> { let res = match &mut self.state { CursorState::None => { tracing::debug!("start reading overflow page payload_size={}", payload_size); @@ -455,8 +458,9 @@ impl BTreeCursor { *remaining_to_read == 0 && next == 0, "we can't have more pages to read while also have read everything" ); - let record = crate::storage::sqlite3_ondisk::read_record(&payload)?; - CursorResult::Ok(record) + let mut payload_swap = Vec::new(); + std::mem::swap(payload, &mut payload_swap); + CursorResult::Ok(payload_swap) } else { let new_page = self.pager.read_page(next as usize)?; *page = new_page; @@ -466,10 +470,20 @@ impl BTreeCursor { } _ => unreachable!(), }; - if matches!(res, CursorResult::Ok(..)) { - self.state = CursorState::None; + match res { + CursorResult::Ok(payload) => { + { + let mut reuse_immutable = self.get_lazy_immutable_record(); + crate::storage::sqlite3_ondisk::read_record( + &payload, + reuse_immutable.as_mut().unwrap(), + )?; + } + self.state = CursorState::None; + Ok(CursorResult::Ok(())) + } + CursorResult::IO => Ok(CursorResult::IO), } - Ok(res) } /// Move the cursor to the next record and return it. @@ -484,7 +498,10 @@ impl BTreeCursor { match rowid { Some(rowid) => { let record = mv_cursor.current_row().unwrap().unwrap(); - crate::storage::sqlite3_ondisk::read_record(&record.data)?; + crate::storage::sqlite3_ondisk::read_record( + &record.data, + self.get_lazy_immutable_record().as_mut().unwrap(), + )?; mv_cursor.forward(); return Ok(CursorResult::Ok(Some(rowid.row_id))); } @@ -573,7 +590,10 @@ impl BTreeCursor { *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(_payload)? + crate::storage::sqlite3_ondisk::read_record( + _payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; self.stack.advance(); return Ok(CursorResult::Ok(Some(*_rowid))); @@ -596,7 +616,10 @@ impl BTreeCursor { *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(payload)? + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; self.going_upwards = false; @@ -646,7 +669,10 @@ impl BTreeCursor { *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(payload)? + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; self.stack.advance(); @@ -735,7 +761,10 @@ impl BTreeCursor { *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(payload)? + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; self.stack.advance(); return Ok(CursorResult::Ok(Some(*cell_rowid))); @@ -751,14 +780,17 @@ impl BTreeCursor { let SeekKey::IndexKey(index_key) = key else { unreachable!("index seek key should be a record"); }; - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( payload, *next_page, *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(payload)? + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; let record = self.get_immutable_record(); let record = record.as_ref().unwrap(); @@ -946,7 +978,10 @@ impl BTreeCursor { *payload_size )) } else { - crate::storage::sqlite3_ondisk::read_record(payload)? + crate::storage::sqlite3_ondisk::read_record( + payload, + self.get_lazy_immutable_record().as_mut().unwrap(), + )? }; let order = compare_immutable( index_key.get_values(), diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 5e4692ffd..de5b7c4bb 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1083,7 +1083,7 @@ impl SmallVec { } pub fn has_extra_data(&self) -> bool { - self.len >= self.len + self.len >= self.data.len() } } From 1bfec65f23a37ff9c9ad8320bf23b1d92ec139cb Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 28 Mar 2025 16:52:58 +0100 Subject: [PATCH 08/22] remove dbg --- core/vdbe/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index f530a33fa..546f110f9 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -4550,7 +4550,6 @@ impl Row { pub fn get_values(&self) -> impl Iterator { let values = unsafe { std::slice::from_raw_parts(self.values, self.count) }; - dbg!(&values); // This should be ownedvalues // TODO: add check for this values.iter().map(|v| v.get_owned_value()) From 34c8fd7e6c8f3e760eb97955915262f9333e29c3 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Fri, 28 Mar 2025 16:54:55 +0100 Subject: [PATCH 09/22] fix serial_type write --- core/types.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/types.rs b/core/types.rs index d0501f671..ee5b0756e 100644 --- a/core/types.rs +++ b/core/types.rs @@ -727,7 +727,7 @@ impl ImmutableRecord { let value = value.get_owned_value(); let serial_type = SerialType::from(value); let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); - serials.push(serial_type_buf.clone()); + serials.push((serial_type_buf.clone(), n)); let value_size = match serial_type { SerialType::Null => 0, @@ -767,8 +767,8 @@ impl ImmutableRecord { let mut writer = AppendWriter::new(&mut buf, start_pos); // 2. Write serial - for value in serials { - writer.extend_from_slice(&value); + for (value, n) in serials { + writer.extend_from_slice(&value[..n]); } // write content From 9623cce986dd020a9d398e3f2e7de312b19d8be0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 29 Mar 2025 16:28:07 +0100 Subject: [PATCH 10/22] push null refvalue too --- core/types.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/types.rs b/core/types.rs index ee5b0756e..674b0256c 100644 --- a/core/types.rs +++ b/core/types.rs @@ -776,7 +776,9 @@ impl ImmutableRecord { let value = value.get_owned_value(); let start_offset = writer.pos; match value { - OwnedValue::Null => {} + OwnedValue::Null => { + values.push(RefValue::Null); + } OwnedValue::Integer(i) => { values.push(RefValue::Integer(*i)); let serial_type = SerialType::from(value); From 4a9c4cff02b7d1fd94b4d9ccab36106f6ead9e25 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 29 Mar 2025 16:28:34 +0100 Subject: [PATCH 11/22] fix comparison of immutable records in seekgt --- core/vdbe/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 546f110f9..b6a63cd95 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -40,8 +40,8 @@ use crate::storage::wal::CheckpointResult; use crate::storage::{btree::BTreeCursor, pager::Pager}; use crate::translate::plan::{ResultSetColumn, TableReference}; use crate::types::{ - AggContext, Cursor, CursorResult, ExternalAggState, ImmutableRecord, OwnedValue, SeekKey, - SeekOp, + compare_immutable, AggContext, Cursor, CursorResult, ExternalAggState, ImmutableRecord, + OwnedValue, SeekKey, SeekOp, }; use crate::util::{ cast_real_to_integer, cast_text_to_integer, cast_text_to_numeric, cast_text_to_real, @@ -1700,11 +1700,11 @@ impl Program { let record_from_regs = make_record(&state.registers, start_reg, num_regs); let pc = if let Some(ref idx_record) = *cursor.record() { // Compare against the same number of values - if idx_record.get_values()[..record_from_regs.len()] - .iter() - .zip(&record_from_regs.get_values()[..]) - .all(|(a, b)| a >= b) - { + let ord = compare_immutable( + &idx_record.get_values()[..record_from_regs.len()], + &record_from_regs.get_values(), + ); + if ord.is_ge() { target_pc.to_offset_int() } else { state.pc + 1 From d9f5cd870d11bc175a29d81fba53b4ae419799ce Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 29 Mar 2025 21:47:35 +0100 Subject: [PATCH 12/22] clippy --- bindings/go/rs_src/types.rs | 17 ------- cli/app.rs | 2 +- core/json/json_operations.rs | 4 +- core/json/mod.rs | 1 - core/storage/btree.rs | 17 ++++--- core/storage/sqlite3_ondisk.rs | 4 -- core/types.rs | 4 -- core/vdbe/mod.rs | 9 ++++ core/vdbe/sorter.rs | 2 +- .../functions/test_function_rowid.rs | 6 +-- .../query_processing/test_read_path.rs | 22 +++++---- .../query_processing/test_write_path.rs | 45 ++++--------------- tests/integration/wal/test_wal.rs | 6 +-- 13 files changed, 47 insertions(+), 92 deletions(-) diff --git a/bindings/go/rs_src/types.rs b/bindings/go/rs_src/types.rs index 712306a34..f58a4a0b0 100644 --- a/bindings/go/rs_src/types.rs +++ b/bindings/go/rs_src/types.rs @@ -161,23 +161,6 @@ impl LimboValue { } } } - pub fn from_value(value: &limbo_core::RefValue) -> Self { - match value { - limbo_core::RefValue::Integer(i) => { - LimboValue::new(ValueType::Integer, ValueUnion::from_int(*i)) - } - limbo_core::RefValue::Float(r) => { - LimboValue::new(ValueType::Real, ValueUnion::from_real(*r)) - } - limbo_core::RefValue::Text(s) => { - LimboValue::new(ValueType::Text, ValueUnion::from_str(s.as_str())) - } - limbo_core::RefValue::Blob(b) => { - LimboValue::new(ValueType::Blob, ValueUnion::from_bytes(b.to_slice())) - } - limbo_core::RefValue::Null => LimboValue::new(ValueType::Null, ValueUnion::from_null()), - } - } // The values we get from Go need to be temporarily owned by the statement until they are bound // then they can be cleaned up immediately afterwards diff --git a/cli/app.rs b/cli/app.rs index 4e3167200..aa6556869 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -5,7 +5,7 @@ use crate::{ opcodes_dictionary::OPCODE_DESCRIPTIONS, }; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Row, Table}; -use limbo_core::{Database, LimboError, OwnedValue, RefValue, Statement, StepResult}; +use limbo_core::{Database, LimboError, OwnedValue, Statement, StepResult}; use clap::{Parser, ValueEnum}; use rustyline::{history::DefaultHistory, Editor}; diff --git a/core/json/json_operations.rs b/core/json/json_operations.rs index 17369cc3c..ea0ba9560 100644 --- a/core/json/json_operations.rs +++ b/core/json/json_operations.rs @@ -1,4 +1,4 @@ -use std::{collections::VecDeque, rc::Rc}; +use std::collections::VecDeque; use crate::{types::OwnedValue, vdbe::Register}; @@ -283,8 +283,6 @@ pub fn jsonb_insert(args: &[Register], json_cache: &JsonCacheCell) -> crate::Res #[cfg(test)] mod tests { - use std::rc::Rc; - use crate::types::Text; use super::*; diff --git a/core/json/mod.rs b/core/json/mod.rs index 07d93b06d..d35fa13d5 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -22,7 +22,6 @@ use jsonb::{ElementType, Jsonb, JsonbHeader, PathOperationMode, SearchOperation, use ser::to_string_pretty; use serde::{Deserialize, Serialize}; use std::borrow::Cow; -use std::rc::Rc; use std::str::FromStr; #[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] diff --git a/core/storage/btree.rs b/core/storage/btree.rs index cf24b3105..1b8dc6d66 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -7,8 +7,7 @@ use crate::storage::sqlite3_ondisk::{ use crate::MvCursor; use crate::types::{ - compare_immutable, compare_immutable_to_record, CursorResult, ImmutableRecord, OwnedValue, - RefValue, SeekKey, SeekOp, + compare_immutable, CursorResult, ImmutableRecord, OwnedValue, RefValue, SeekKey, SeekOp, }; use crate::{return_corrupt, LimboError, Result}; @@ -329,7 +328,7 @@ impl BTreeCursor { /// Move the cursor to the previous record and return it. /// Used in backwards iteration. - fn get_prev_record(&mut self) -> Result)>> { + fn get_prev_record(&mut self) -> Result>> { loop { let page = self.stack.top(); let cell_idx = self.stack.current_cell_index(); @@ -346,7 +345,7 @@ impl BTreeCursor { self.stack.pop(); } else { // moved to begin of btree - return Ok(CursorResult::Ok((None))); + return Ok(CursorResult::Ok(None)); } } // continue to next loop to get record from the new page @@ -398,7 +397,7 @@ impl BTreeCursor { first_overflow_page, payload_size, }) => { - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read(_payload, next_page, payload_size)) } else { crate::storage::sqlite3_ondisk::read_record( @@ -583,7 +582,7 @@ impl BTreeCursor { payload_size, }) => { assert!(predicate.is_none()); - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( _payload, *next_page, @@ -609,7 +608,7 @@ impl BTreeCursor { self.stack.push(mem_page); continue; } - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( payload, *next_page, @@ -662,7 +661,7 @@ impl BTreeCursor { first_overflow_page, payload_size, }) => { - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( payload, *next_page, @@ -754,7 +753,7 @@ impl BTreeCursor { SeekOp::EQ => *cell_rowid == rowid_key, }; if found { - let record = if let Some(next_page) = first_overflow_page { + if let Some(next_page) = first_overflow_page { return_if_io!(self.process_overflow_read( payload, *next_page, diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index de5b7c4bb..f64e9e424 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1081,10 +1081,6 @@ impl SmallVec { self.len += 1; } } - - pub fn has_extra_data(&self) -> bool { - self.len >= self.data.len() - } } pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Result<()> { diff --git a/core/types.rs b/core/types.rs index 674b0256c..1705603cf 100644 --- a/core/types.rs +++ b/core/types.rs @@ -4,15 +4,12 @@ use crate::error::LimboError; use crate::ext::{ExtValue, ExtValueType}; use crate::pseudo::PseudoCursor; use crate::storage::btree::BTreeCursor; -use crate::storage::buffer_pool; use crate::storage::sqlite3_ondisk::write_varint; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; use crate::Result; use std::cmp::Ordering; use std::fmt::Display; -use std::pin::Pin; -use std::rc::Rc; const MAX_REAL_SIZE: u8 = 15; @@ -1327,7 +1324,6 @@ impl RawSlice { #[cfg(test)] mod tests { use super::*; - use std::rc::Rc; #[test] fn test_serialize_null() { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index b6a63cd95..a6f158023 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -4548,6 +4548,15 @@ impl Row { T::from_value(value) } + pub fn get_value<'a>(&'a self, idx: usize) -> &'a OwnedValue { + let value = unsafe { self.values.add(idx).as_ref().unwrap() }; + let value = match value { + Register::OwnedValue(owned_value) => owned_value, + _ => unreachable!("a row should be formed of values only"), + }; + value + } + pub fn get_values(&self) -> impl Iterator { let values = unsafe { std::slice::from_raw_parts(self.values, self.count) }; // This should be ownedvalues diff --git a/core/vdbe/sorter.rs b/core/vdbe/sorter.rs index 7b87a5387..584a29271 100644 --- a/core/vdbe/sorter.rs +++ b/core/vdbe/sorter.rs @@ -1,4 +1,4 @@ -use crate::types::{ImmutableRecord, Record}; +use crate::types::ImmutableRecord; use std::cmp::Ordering; pub struct Sorter { diff --git a/tests/integration/functions/test_function_rowid.rs b/tests/integration/functions/test_function_rowid.rs index 6afdae0ca..78a77ce9f 100644 --- a/tests/integration/functions/test_function_rowid.rs +++ b/tests/integration/functions/test_function_rowid.rs @@ -30,7 +30,7 @@ fn test_last_insert_rowid_basic() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let limbo_core::RefValue::Integer(id) = row.get_value(0) { + if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { assert_eq!(*id, 1, "First insert should have rowid 1"); } } @@ -66,7 +66,7 @@ fn test_last_insert_rowid_basic() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - if let limbo_core::RefValue::Integer(id) = row.get_value(0) { + if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { last_id = *id; } } @@ -112,7 +112,7 @@ fn test_integer_primary_key() -> anyhow::Result<()> { match select_query.step()? { StepResult::Row => { let row = select_query.row().unwrap(); - if let limbo_core::RefValue::Integer(id) = row.get_value(0) { + if let limbo_core::OwnedValue::Integer(id) = row.get_value(0) { rowids.push(*id); } } diff --git a/tests/integration/query_processing/test_read_path.rs b/tests/integration/query_processing/test_read_path.rs index 072db306f..7f525f6a9 100644 --- a/tests/integration/query_processing/test_read_path.rs +++ b/tests/integration/query_processing/test_read_path.rs @@ -15,7 +15,10 @@ fn test_statement_reset_bind() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(*row.get_value(0), limbo_core::OwnedValue::Integer(1)); + assert_eq!( + *row.get::<&OwnedValue>(0).unwrap(), + limbo_core::OwnedValue::Integer(1) + ); } StepResult::IO => tmp_db.io.run_once()?, _ => break, @@ -30,7 +33,10 @@ fn test_statement_reset_bind() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - assert_eq!(*row.get_value(0), limbo_core::OwnedValue::Integer(2)); + assert_eq!( + *row.get::<&OwnedValue>(0).unwrap(), + limbo_core::OwnedValue::Integer(2) + ); } StepResult::IO => tmp_db.io.run_once()?, _ => break, @@ -63,23 +69,23 @@ fn test_statement_bind() -> anyhow::Result<()> { match stmt.step()? { StepResult::Row => { let row = stmt.row().unwrap(); - if let limbo_core::RefValue::Text(s) = row.get_value(0) { + if let limbo_core::OwnedValue::Text(s) = row.get::<&OwnedValue>(0).unwrap() { assert_eq!(s.as_str(), "hello") } - if let limbo_core::RefValue::Text(s) = row.get_value(1) { + if let limbo_core::OwnedValue::Text(s) = row.get::<&OwnedValue>(1).unwrap() { assert_eq!(s.as_str(), "hello") } - if let limbo_core::RefValue::Integer(i) = row.get_value(2) { + if let limbo_core::OwnedValue::Integer(i) = row.get::<&OwnedValue>(2).unwrap() { assert_eq!(*i, 42) } - if let limbo_core::RefValue::Blob(v) = row.get_value(3) { - assert_eq!(v.to_slice(), &vec![0x1 as u8, 0x2, 0x3]) + if let limbo_core::OwnedValue::Blob(v) = row.get::<&OwnedValue>(3).unwrap() { + assert_eq!(v.as_slice(), &vec![0x1 as u8, 0x2, 0x3]) } - if let limbo_core::RefValue::Float(f) = row.get_value(4) { + if let limbo_core::OwnedValue::Float(f) = row.get::<&OwnedValue>(4).unwrap() { assert_eq!(*f, 0.5) } } diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 543e0678a..6955f2483 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -44,17 +44,8 @@ fn test_simple_overflow_page() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_value(0); - let text = row.get_value(1); - let id = match first_value { - limbo_core::RefValue::Integer(i) => *i as i32, - limbo_core::RefValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - let text = match text { - limbo_core::RefValue::Text(t) => t.as_str(), - _ => unreachable!(), - }; + let id = row.get::(0).unwrap(); + let text = row.get::<&str>(0).unwrap(); assert_eq!(1, id); compare_string(&huge_text, text); } @@ -120,17 +111,8 @@ fn test_sequential_overflow_page() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_value(0); - let text = row.get_value(1); - let id = match first_value { - limbo_core::RefValue::Integer(i) => *i as i32, - limbo_core::RefValue::Float(f) => *f as i32, - _ => unreachable!(), - }; - let text = match text { - limbo_core::RefValue::Text(t) => t.as_str(), - _ => unreachable!(), - }; + let id = row.get::(0).unwrap(); + let text = row.get::(1).unwrap(); let huge_text = &huge_texts[current_index]; compare_string(huge_text, text); assert_eq!(current_index, id as usize); @@ -305,7 +287,7 @@ fn test_statement_reset() -> anyhow::Result<()> { StepResult::Row => { let row = stmt.row().unwrap(); assert_eq!( - *row.get::<&OwnedValue>(0), + *row.get::<&OwnedValue>(0).unwrap(), limbo_core::OwnedValue::Integer(1) ); break; @@ -322,7 +304,7 @@ fn test_statement_reset() -> anyhow::Result<()> { StepResult::Row => { let row = stmt.row().unwrap(); assert_eq!( - *row.get::<&OwnedValue>(0), + *row.get::<&OwnedValue>(0).unwrap(), limbo_core::OwnedValue::Integer(1) ); break; @@ -374,12 +356,7 @@ fn test_wal_checkpoint() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_value(0); - let id = match first_value { - RefValue::Integer(i) => *i as i32, - RefValue::Float(f) => *f as i32, - _ => unreachable!(), - }; + let id = row.get::(0).unwrap(); assert_eq!(current_index, id as usize); current_index += 1; } @@ -438,13 +415,9 @@ fn test_wal_restart() -> anyhow::Result<()> { match rows.step()? { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_value(0); - let count = match first_value { - RefValue::Integer(i) => i, - _ => unreachable!(), - }; + let count = row.get::(0).unwrap(); debug!("counted {}", count); - return Ok(*count as usize); + return Ok(count as usize); } StepResult::IO => { tmp_db.io.run_once()?; diff --git a/tests/integration/wal/test_wal.rs b/tests/integration/wal/test_wal.rs index 53790a7df..646995957 100644 --- a/tests/integration/wal/test_wal.rs +++ b/tests/integration/wal/test_wal.rs @@ -81,11 +81,7 @@ fn test_wal_1_writer_1_reader() -> Result<()> { match rows.step().unwrap() { StepResult::Row => { let row = rows.row().unwrap(); - let first_value = row.get_value(0); - let id = match first_value { - RefValue::Integer(i) => *i as i32, - _ => unreachable!(), - }; + let id = row.get::(0).unwrap(); assert_eq!(id, i); i += 1; } From a13b33fec9c02bb0f9720878d5c86a79a4d9a4df Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 29 Mar 2025 22:07:43 +0100 Subject: [PATCH 13/22] clippy again --- core/json/mod.rs | 2 -- core/vdbe/mod.rs | 2 +- tests/integration/query_processing/test_write_path.rs | 2 +- tests/integration/wal/test_wal.rs | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/core/json/mod.rs b/core/json/mod.rs index d35fa13d5..aed8c1cd2 100644 --- a/core/json/mod.rs +++ b/core/json/mod.rs @@ -663,8 +663,6 @@ pub fn json_quote(value: &OwnedValue) -> crate::Result { #[cfg(test)] mod tests { - use std::rc::Rc; - use super::*; use crate::types::OwnedValue; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index a6f158023..ce9346c14 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -4579,7 +4579,7 @@ mod tests { exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, Bitfield, OwnedValue, }; - use std::{collections::HashMap, rc::Rc}; + use std::collections::HashMap; #[test] fn test_length() { diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 6955f2483..1392ebcd2 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -1,6 +1,6 @@ use crate::common::{self, maybe_setup_tracing}; use crate::common::{compare_string, do_flush, TempDatabase}; -use limbo_core::{Connection, OwnedValue, RefValue, StepResult}; +use limbo_core::{Connection, OwnedValue, StepResult}; use log::debug; use std::rc::Rc; diff --git a/tests/integration/wal/test_wal.rs b/tests/integration/wal/test_wal.rs index 646995957..bb731917a 100644 --- a/tests/integration/wal/test_wal.rs +++ b/tests/integration/wal/test_wal.rs @@ -1,5 +1,5 @@ use crate::common::{do_flush, maybe_setup_tracing, TempDatabase}; -use limbo_core::{Connection, LimboError, RefValue, Result, StepResult}; +use limbo_core::{Connection, LimboError, Result, StepResult}; use std::cell::RefCell; use std::ops::Deref; use std::rc::Rc; From 37ddf0946f8fe0221f039675b068626c78079290 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sat, 29 Mar 2025 22:09:53 +0100 Subject: [PATCH 14/22] rever testing.db change --- testing/testing.db | Bin 1216512 -> 1212416 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/testing/testing.db b/testing/testing.db index 984b71fecd5bd6a1b53304575976b5123264f629..e508d589dfd51183fb8bf04a1e1f46a212be79b5 100644 GIT binary patch delta 131 zcmZoz;MLIJH9=aCg@J)lg8>1UC+ZjrvoPpY74QNDnfX*0_@445@u_T9RM^DZtiso> z!p8{2OhC*G#4JF}3dC$c%nrmHK+FlmTtLhX#5_RE3&bFO{6H)K#Dd#Z_=N5h0stE} B87Tk& delta 192 zcmZo@@M>7#H9=aCm4Sg#lK}x(Ch8cAvNGu9?cxOrvhe<5;CsrK#HYghYqO$48?TlI z3!6AUKa+h)Vp2|OMFpd#bC9cJh^s<~qmz%TLWM?!LS|k`YI Date: Sat, 29 Mar 2025 22:26:29 +0100 Subject: [PATCH 15/22] ignore sequential write beause it takes too long --- tests/integration/query_processing/test_write_path.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/query_processing/test_write_path.rs b/tests/integration/query_processing/test_write_path.rs index 1392ebcd2..e948ed5d1 100644 --- a/tests/integration/query_processing/test_write_path.rs +++ b/tests/integration/query_processing/test_write_path.rs @@ -136,6 +136,7 @@ fn test_sequential_overflow_page() -> anyhow::Result<()> { } #[test_log::test] +#[ignore = "this takes too long :)"] fn test_sequential_write() -> anyhow::Result<()> { let _ = env_logger::try_init(); maybe_setup_tracing(); From 3ac1795c25fb186366daee931753c25a3026d1cf Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 10:31:39 +0200 Subject: [PATCH 16/22] fix from_register serialization --- core/types.rs | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/core/types.rs b/core/types.rs index 1705603cf..9aa32710f 100644 --- a/core/types.rs +++ b/core/types.rs @@ -669,15 +669,31 @@ impl Record { struct AppendWriter<'a> { buf: &'a mut Vec, pos: usize, + buf_capacity_start: usize, + buf_ptr_start: *const u8, +} + +impl Drop for AppendWriter { + fn drop(&mut self) { + // let's make sure we didn't reallocate anywhere else + assert_eq!(self.buf_capacity_start, self.buf.capacity()); + assert_eq!(self.buf_ptr_start, self.buf.as_ptr()); + } } impl<'a> AppendWriter<'a> { pub fn new(buf: &'a mut Vec, pos: usize) -> Self { - Self { buf, pos } + Self { + buf, + pos, + buf_capacity_start: buf.capacity(), + buf_ptr_start: buf.as_ptr(), + } } #[inline] pub fn extend_from_slice(&mut self, slice: &[u8]) { + dbg!(self.pos, slice); self.buf[self.pos..self.pos + slice.len()].copy_from_slice(slice); self.pos += slice.len(); } @@ -728,13 +744,13 @@ impl ImmutableRecord { let value_size = match serial_type { SerialType::Null => 0, - SerialType::I8 => 8, - SerialType::I16 => 16, - SerialType::I24 => 24, - SerialType::I32 => 32, - SerialType::I48 => 48, - SerialType::I64 => 64, - SerialType::F64 => 64, + SerialType::I8 => 1, + SerialType::I16 => 2, + SerialType::I24 => 3, + SerialType::I32 => 4, + SerialType::I48 => 6, + SerialType::I64 => 8, + SerialType::F64 => 8, SerialType::Text { content_size } => content_size, SerialType::Blob { content_size } => content_size, }; @@ -753,15 +769,15 @@ impl ImmutableRecord { // if( nVarint Date: Sun, 30 Mar 2025 10:37:58 +0200 Subject: [PATCH 17/22] assert capacity didn't change --- core/types.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/core/types.rs b/core/types.rs index 9aa32710f..a17efb0ab 100644 --- a/core/types.rs +++ b/core/types.rs @@ -673,30 +673,29 @@ struct AppendWriter<'a> { buf_ptr_start: *const u8, } -impl Drop for AppendWriter { - fn drop(&mut self) { - // let's make sure we didn't reallocate anywhere else - assert_eq!(self.buf_capacity_start, self.buf.capacity()); - assert_eq!(self.buf_ptr_start, self.buf.as_ptr()); - } -} - impl<'a> AppendWriter<'a> { pub fn new(buf: &'a mut Vec, pos: usize) -> Self { + let buf_ptr_start = buf.as_ptr(); + let buf_capacity_start = buf.capacity(); Self { buf, pos, - buf_capacity_start: buf.capacity(), - buf_ptr_start: buf.as_ptr(), + buf_capacity_start, + buf_ptr_start, } } #[inline] pub fn extend_from_slice(&mut self, slice: &[u8]) { - dbg!(self.pos, slice); self.buf[self.pos..self.pos + slice.len()].copy_from_slice(slice); self.pos += slice.len(); } + + fn assert_finish_capacity(&self) { + // let's make sure we didn't reallocate anywhere else + assert_eq!(self.buf_capacity_start, self.buf.capacity()); + assert_eq!(self.buf_ptr_start, self.buf.as_ptr()); + } } impl ImmutableRecord { @@ -832,7 +831,7 @@ impl ImmutableRecord { }; } - assert_eq!(writer.pos, buf.len()); + writer.assert_finish_capacity(); Self { payload: buf, values, From 6ccb2e16d1551477d098d547f8e6a044642de7d0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 11:00:13 +0200 Subject: [PATCH 18/22] safer api for ImmutableRecord recreation --- core/storage/btree.rs | 15 +++---- core/storage/sqlite3_ondisk.rs | 15 ++++--- core/types.rs | 76 ++++++++++++++++------------------ 3 files changed, 49 insertions(+), 57 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 1b8dc6d66..89c91f52d 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -260,7 +260,7 @@ pub struct BTreeCursor { stack: PageStack, /// Reusable immutable record, used to allow better allocation strategy. reusable_immutable_record: RefCell>, - empty_record: RefCell, + empty_record: Cell, } /// Stack of pages representing the tree traversal order. @@ -308,7 +308,7 @@ impl BTreeCursor { stack: RefCell::new([const { None }; BTCURSOR_MAX_DEPTH + 1]), }, reusable_immutable_record: RefCell::new(None), - empty_record: RefCell::new(true), + empty_record: Cell::new(true), } } @@ -1912,7 +1912,7 @@ impl BTreeCursor { } pub fn is_empty(&self) -> bool { - *self.empty_record.borrow() + self.empty_record.get() } pub fn root_page(&self) -> usize { @@ -2000,7 +2000,7 @@ impl BTreeCursor { Some(mv_cursor) => { let row_id = crate::mvcc::database::RowID::new(self.table_id() as u64, *int_key as u64); - let record_buf = record.payload.to_vec(); + let record_buf = record.get_payload().to_vec(); let row = crate::mvcc::database::Row::new(row_id, record_buf); mv_cursor.borrow_mut().insert(row).unwrap(); } @@ -2739,10 +2739,7 @@ impl BTreeCursor { fn get_lazy_immutable_record(&self) -> std::cell::RefMut<'_, Option> { if self.reusable_immutable_record.borrow().is_none() { - let record = ImmutableRecord { - payload: Vec::with_capacity(4096), - values: Vec::with_capacity(10), - }; + let record = ImmutableRecord::new(4096, 10); self.reusable_immutable_record.replace(Some(record)); } self.reusable_immutable_record.borrow_mut() @@ -3423,7 +3420,7 @@ fn fill_cell_payload( PageType::TableLeaf | PageType::IndexLeaf )); // TODO: make record raw from start, having to serialize is not good - let record_buf = record.payload.to_vec(); + let record_buf = record.get_payload().to_vec(); // fill in header if matches!(page_type, PageType::TableLeaf) { diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index f64e9e424..05eed40f2 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1085,11 +1085,10 @@ impl SmallVec { pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Result<()> { // Let's clear previous use - reuse_immutable.payload.clear(); - reuse_immutable.values.clear(); + reuse_immutable.invalidate(); // Copy payload to ImmutableRecord in order to make RefValue that point to this new buffer. // By reusing this immutable record we make it less allocation expensive. - reuse_immutable.payload.extend_from_slice(payload); + reuse_immutable.start_serialization(payload); let mut pos = 0; let (header_size, nr) = read_varint(payload)?; @@ -1099,7 +1098,7 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res let mut serial_types = SmallVec::new(); while header_size > 0 { - let (serial_type, nr) = read_varint(&reuse_immutable.payload[pos..])?; + let (serial_type, nr) = read_varint(&reuse_immutable.get_payload()[pos..])?; let serial_type = validate_serial_type(serial_type)?; serial_types.push(serial_type); pos += nr; @@ -1108,17 +1107,17 @@ pub fn read_record(payload: &[u8], reuse_immutable: &mut ImmutableRecord) -> Res } for &serial_type in &serial_types.data[..serial_types.len.min(serial_types.data.len())] { - let (value, n) = read_value(&reuse_immutable.payload[pos..], unsafe { + let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], unsafe { *serial_type.as_ptr() })?; pos += n; - reuse_immutable.values.push(value); + reuse_immutable.add_value(value); } if let Some(extra) = serial_types.extra_data.as_ref() { for serial_type in extra { - let (value, n) = read_value(&reuse_immutable.payload[pos..], *serial_type)?; + let (value, n) = read_value(&reuse_immutable.get_payload()[pos..], *serial_type)?; pos += n; - reuse_immutable.values.push(value); + reuse_immutable.add_value(value); } } diff --git a/core/types.rs b/core/types.rs index a17efb0ab..8c6f3245b 100644 --- a/core/types.rs +++ b/core/types.rs @@ -8,7 +8,6 @@ use crate::storage::sqlite3_ondisk::write_varint; use crate::vdbe::sorter::Sorter; use crate::vdbe::{Register, VTabOpaqueCursor}; use crate::Result; -use std::cmp::Ordering; use std::fmt::Display; const MAX_REAL_SIZE: u8 = 15; @@ -631,8 +630,9 @@ pub struct ImmutableRecord { // We have to be super careful with this buffer since we make values point to the payload we need to take care reallocations // happen in a controlled manner. If we realocate with values that should be correct, they will now point to undefined data. // We don't use pin here because it would make it imposible to reuse the buffer if we need to push a new record in the same struct. - pub payload: Vec, + payload: Vec, pub values: Vec, + recreating: bool, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -699,6 +699,14 @@ impl<'a> AppendWriter<'a> { } impl ImmutableRecord { + pub fn new(payload_capacity: usize, value_capacity: usize) -> Self { + Self { + payload: Vec::with_capacity(payload_capacity), + values: Vec::with_capacity(value_capacity), + recreating: false, + } + } + pub fn get<'a, T: FromValue<'a> + 'a>(&'a self, idx: usize) -> Result { let value = self .values @@ -835,8 +843,32 @@ impl ImmutableRecord { Self { payload: buf, values, + recreating: false, } } + + pub fn start_serialization(&mut self, payload: &[u8]) { + self.recreating = true; + self.payload.extend_from_slice(payload); + } + pub fn end_serialization(&mut self) { + assert!(self.recreating); + self.recreating = false; + } + + pub fn add_value(&mut self, value: RefValue) { + assert!(self.recreating); + self.values.push(value); + } + + pub fn invalidate(&mut self) { + self.payload.clear(); + self.values.clear(); + } + + pub fn get_payload(&self) -> &[u8] { + &self.payload + } } impl Clone for ImmutableRecord { @@ -872,6 +904,7 @@ impl Clone for ImmutableRecord { Self { payload: new_payload, values: new_values, + recreating: self.recreating, } } } @@ -1090,45 +1123,8 @@ impl PartialEq for RefValue { } } -pub fn compare_record_to_immutable( - record: &[OwnedValue], - immutable: &[RefValue], -) -> std::cmp::Ordering { - for (a, b) in record.iter().zip(immutable.iter()) { - match a.partial_cmp(b).unwrap() { - Ordering::Equal => {} - order => { - return order; - } - } - } - Ordering::Equal -} -pub fn compare_immutable_to_record( - immutable: &[RefValue], - record: &[OwnedValue], -) -> std::cmp::Ordering { - for (a, b) in immutable.iter().zip(record.iter()) { - match a.partial_cmp(b).unwrap() { - Ordering::Equal => {} - order => { - return order; - } - } - } - Ordering::Equal -} - pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { - for (a, b) in l.iter().zip(r.iter()) { - match a.partial_cmp(b).unwrap() { - Ordering::Equal => {} - order => { - return order; - } - } - } - Ordering::Equal + l.partial_cmp(r).unwrap() } const I8_LOW: i64 = -128; From 541b67bd2b93b2248050279b6411205fd27a53ce Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 11:00:59 +0200 Subject: [PATCH 19/22] rename get_lazy_immutable_record -> get_immutable_record_or_create --- core/storage/btree.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 89c91f52d..42138ff9b 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -402,7 +402,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( _payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; self.stack.retreat(); @@ -472,7 +472,7 @@ impl BTreeCursor { match res { CursorResult::Ok(payload) => { { - let mut reuse_immutable = self.get_lazy_immutable_record(); + let mut reuse_immutable = self.get_immutable_record_or_create(); crate::storage::sqlite3_ondisk::read_record( &payload, reuse_immutable.as_mut().unwrap(), @@ -499,7 +499,7 @@ impl BTreeCursor { let record = mv_cursor.current_row().unwrap().unwrap(); crate::storage::sqlite3_ondisk::read_record( &record.data, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )?; mv_cursor.forward(); return Ok(CursorResult::Ok(Some(rowid.row_id))); @@ -591,7 +591,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( _payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; self.stack.advance(); @@ -617,7 +617,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; @@ -670,7 +670,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; @@ -762,7 +762,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; self.stack.advance(); @@ -788,7 +788,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; let record = self.get_immutable_record(); @@ -979,7 +979,7 @@ impl BTreeCursor { } else { crate::storage::sqlite3_ondisk::read_record( payload, - self.get_lazy_immutable_record().as_mut().unwrap(), + self.get_immutable_record_or_create().as_mut().unwrap(), )? }; let order = compare_immutable( @@ -2737,7 +2737,7 @@ impl BTreeCursor { Ok(CursorResult::Ok(())) } - fn get_lazy_immutable_record(&self) -> std::cell::RefMut<'_, Option> { + fn get_immutable_record_or_create(&self) -> std::cell::RefMut<'_, Option> { if self.reusable_immutable_record.borrow().is_none() { let record = ImmutableRecord::new(4096, 10); self.reusable_immutable_record.replace(Some(record)); From 3899f8ca179e07de7c3e1f91af51eabf49aa880c Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 11:03:45 +0200 Subject: [PATCH 20/22] comment header size --- core/types.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/types.rs b/core/types.rs index 8c6f3245b..90b6dcd27 100644 --- a/core/types.rs +++ b/core/types.rs @@ -768,6 +768,10 @@ impl ImmutableRecord { let mut header_size = size_header; if header_size <= 126 { // common case + // This case means the header size can be contained by a single byte, therefore + // header_size == size of serial types + 1 byte from the header size + // Since header_size is a varint, and a varint the first bit is used to represent we have more bytes to read, + // header size here will be 126 == (2^7 - 1) header_size += 1; } else { todo!("calculate big header size extra bytes"); From 8d74f4b8abf106d72cffc53dc78bb3273847f2d7 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 11:07:23 +0200 Subject: [PATCH 21/22] remove unnecessary partial ord --- core/storage/sqlite3_ondisk.rs | 2 +- core/types.rs | 122 +-------------------------------- 2 files changed, 2 insertions(+), 122 deletions(-) diff --git a/core/storage/sqlite3_ondisk.rs b/core/storage/sqlite3_ondisk.rs index 05eed40f2..636b1acaf 100644 --- a/core/storage/sqlite3_ondisk.rs +++ b/core/storage/sqlite3_ondisk.rs @@ -1624,7 +1624,7 @@ mod tests { #[case] expected: OwnedValue, ) { let result = read_value(buf, serial_type).unwrap(); - assert_eq!(result.0, expected); + assert_eq!(result.0.to_owned(), expected); } #[test] diff --git a/core/types.rs b/core/types.rs index 90b6dcd27..a951f326c 100644 --- a/core/types.rs +++ b/core/types.rs @@ -747,7 +747,7 @@ impl ImmutableRecord { let value = value.get_owned_value(); let serial_type = SerialType::from(value); let n = write_varint(&mut serial_type_buf[0..], serial_type.into()); - serials.push((serial_type_buf.clone(), n)); + serials.push((serial_type_buf, n)); let value_size = match serial_type { SerialType::Null => 0, @@ -1007,126 +1007,6 @@ impl PartialOrd for RefValue { } } -#[allow(clippy::non_canonical_partial_ord_impl)] -impl PartialOrd for RefValue { - fn partial_cmp(&self, other: &OwnedValue) -> Option { - match (self, other) { - (Self::Integer(int_left), OwnedValue::Integer(int_right)) => { - int_left.partial_cmp(int_right) - } - (Self::Integer(int_left), OwnedValue::Float(float_right)) => { - (*int_left as f64).partial_cmp(float_right) - } - (Self::Float(float_left), OwnedValue::Integer(int_right)) => { - float_left.partial_cmp(&(*int_right as f64)) - } - (Self::Float(float_left), OwnedValue::Float(float_right)) => { - float_left.partial_cmp(float_right) - } - // Numeric vs Text/Blob - (Self::Integer(_) | Self::Float(_), OwnedValue::Text(_) | OwnedValue::Blob(_)) => { - Some(std::cmp::Ordering::Less) - } - (Self::Text(_) | Self::Blob(_), OwnedValue::Integer(_) | OwnedValue::Float(_)) => { - Some(std::cmp::Ordering::Greater) - } - - (Self::Text(text_left), OwnedValue::Text(text_right)) => { - let text_left = text_left.value.to_slice(); - text_left.partial_cmp(&text_right.value) - } - // Text vs Blob - (Self::Text(_), OwnedValue::Blob(_)) => Some(std::cmp::Ordering::Less), - (Self::Blob(_), OwnedValue::Text(_)) => Some(std::cmp::Ordering::Greater), - - (Self::Blob(blob_left), OwnedValue::Blob(blob_right)) => { - let blob_left = blob_left.to_slice(); - blob_left.partial_cmp(blob_right) - } - (Self::Null, OwnedValue::Null) => Some(std::cmp::Ordering::Equal), - (Self::Null, _) => Some(std::cmp::Ordering::Less), - (_, OwnedValue::Null) => Some(std::cmp::Ordering::Greater), - } - } -} - -#[allow(clippy::non_canonical_partial_ord_impl)] -impl PartialOrd for OwnedValue { - fn partial_cmp(&self, other: &RefValue) -> Option { - match (self, other) { - (Self::Integer(int_left), RefValue::Integer(int_right)) => { - int_left.partial_cmp(int_right) - } - (Self::Integer(int_left), RefValue::Float(float_right)) => { - (*int_left as f64).partial_cmp(float_right) - } - (Self::Float(float_left), RefValue::Integer(int_right)) => { - float_left.partial_cmp(&(*int_right as f64)) - } - (Self::Float(float_left), RefValue::Float(float_right)) => { - float_left.partial_cmp(float_right) - } - // Numeric vs Text/Blob - (Self::Integer(_) | Self::Float(_), RefValue::Text(_) | RefValue::Blob(_)) => { - Some(std::cmp::Ordering::Less) - } - (Self::Text(_) | Self::Blob(_), RefValue::Integer(_) | RefValue::Float(_)) => { - Some(std::cmp::Ordering::Greater) - } - - (Self::Text(text_left), RefValue::Text(text_right)) => { - let text_right = text_right.value.to_slice(); - text_left.value.as_slice().partial_cmp(text_right) - } - // Text vs Blob - (Self::Text(_), RefValue::Blob(_)) => Some(std::cmp::Ordering::Less), - (Self::Blob(_), RefValue::Text(_)) => Some(std::cmp::Ordering::Greater), - - (Self::Blob(blob_left), RefValue::Blob(blob_right)) => { - let blob_right = blob_right.to_slice(); - blob_left.as_slice().partial_cmp(blob_right) - } - (Self::Null, RefValue::Null) => Some(std::cmp::Ordering::Equal), - (Self::Null, _) => Some(std::cmp::Ordering::Less), - (_, RefValue::Null) => Some(std::cmp::Ordering::Greater), - } - } -} - -impl PartialEq for OwnedValue { - fn eq(&self, other: &RefValue) -> bool { - match (self, other) { - (Self::Integer(int_left), RefValue::Integer(int_right)) => int_left == int_right, - (Self::Float(float_left), RefValue::Float(float_right)) => float_left == float_right, - (Self::Text(text_left), RefValue::Text(text_right)) => { - text_left.value.as_slice() == text_right.value.to_slice() - } - (Self::Blob(blob_left), RefValue::Blob(blob_right)) => { - blob_left.as_slice() == blob_right.to_slice() - } - (Self::Null, RefValue::Null) => true, - _ => false, - } - } -} - -impl PartialEq for RefValue { - fn eq(&self, other: &OwnedValue) -> bool { - match (self, other) { - (Self::Integer(int_left), OwnedValue::Integer(int_right)) => int_left == int_right, - (Self::Float(float_left), OwnedValue::Float(float_right)) => float_left == float_right, - (Self::Text(text_left), OwnedValue::Text(text_right)) => { - text_left.value.to_slice() == text_right.value.as_slice() - } - (Self::Blob(blob_left), OwnedValue::Blob(blob_right)) => { - blob_left.to_slice() == blob_right.as_slice() - } - (Self::Null, OwnedValue::Null) => true, - _ => false, - } - } -} - pub fn compare_immutable(l: &[RefValue], r: &[RefValue]) -> std::cmp::Ordering { l.partial_cmp(r).unwrap() } From 578bc9e3e67dcded880aa61e5df16ef56d8071c0 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Sun, 30 Mar 2025 11:12:11 +0200 Subject: [PATCH 22/22] extract constant min_header_size --- core/types.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/types.rs b/core/types.rs index a951f326c..a34bf4e65 100644 --- a/core/types.rs +++ b/core/types.rs @@ -766,7 +766,8 @@ impl ImmutableRecord { size_values += value_size; } let mut header_size = size_header; - if header_size <= 126 { + const MIN_HEADER_SIZE: usize = 126; + if header_size <= MIN_HEADER_SIZE { // common case // This case means the header size can be contained by a single byte, therefore // header_size == size of serial types + 1 byte from the header size