Merge 'VDBE/op_column: use references to cursor payload instead of cloning' from Jussi Saurio

instead use RefValue to refer to record payload directly and then copy
to register as necessary
my local:
```sql
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/1: Warming u
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/1: Collectin
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/1
                        time:   [491.64 ns 492.54 ns 493.64 ns]
                        change: [-3.6642% -3.3050% -2.9558%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/10: Warming
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/10: Collecti
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/10: Analyzin
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/10
                        time:   [2.7923 µs 2.8001 µs 2.8114 µs]
                        change: [-14.643% -14.282% -13.878%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  1 (1.00%) low severe
  1 (1.00%) high mild
  4 (4.00%) high severe
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/50: Warming
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/50: Collecti
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/50: Analyzin
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/50
                        time:   [13.452 µs 13.496 µs 13.550 µs]
                        change: [-15.768% -15.471% -15.182%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 5 outliers among 100 measurements (5.00%)
  1 (1.00%) high mild
  4 (4.00%) high severe
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100: Warming
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100: Collect
Benchmarking Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100: Analyzi
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100
                        time:   [27.110 µs 27.162 µs 27.226 µs]
                        change: [-15.878% -15.604% -15.336%] (p = 0.00 < 0.05)
                        Performance has improved.
```
ci, main:
```
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100
                        time:   [70.671 µs 71.741 µs 72.910 µs]
```
ci, branch:
```
Execute `SELECT * FROM users LIMIT ?`/limbo_execute_select_rows/100
                        time:   [53.969 µs 54.013 µs 54.061 µs]
```

Reviewed-by: bit-aloo (@Shourya742)

Closes #2205
This commit is contained in:
Jussi Saurio
2025-07-28 14:13:54 +03:00
2 changed files with 115 additions and 36 deletions

View File

@@ -82,7 +82,6 @@ impl Text {
subtype: TextSubtype::Text,
}
}
#[cfg(feature = "json")]
pub fn json(value: String) -> Self {
Self {
@@ -96,6 +95,63 @@ impl Text {
}
}
pub trait Extendable<T> {
fn do_extend(&mut self, other: &T);
}
impl<T: AnyText> Extendable<T> for Text {
fn do_extend(&mut self, other: &T) {
self.value.clear();
self.value.extend_from_slice(other.as_ref().as_bytes());
self.subtype = other.subtype();
}
}
impl<T: AnyBlob> Extendable<T> for Vec<u8> {
fn do_extend(&mut self, other: &T) {
self.clear();
self.extend_from_slice(other.as_slice());
}
}
pub trait AnyText: AsRef<str> {
fn subtype(&self) -> TextSubtype;
}
impl AsRef<str> for TextRef {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AnyText for Text {
fn subtype(&self) -> TextSubtype {
self.subtype
}
}
impl AnyText for TextRef {
fn subtype(&self) -> TextSubtype {
self.subtype
}
}
pub trait AnyBlob {
fn as_slice(&self) -> &[u8];
}
impl AnyBlob for RawSlice {
fn as_slice(&self) -> &[u8] {
self.to_slice()
}
}
impl AnyBlob for Vec<u8> {
fn as_slice(&self) -> &[u8] {
self.as_slice()
}
}
impl AsRef<str> for Text {
fn as_ref(&self) -> &str {
self.as_str()

View File

@@ -10,7 +10,8 @@ use crate::storage::wal::DummyWAL;
use crate::storage::{self, header_accessor};
use crate::translate::collate::CollationSeq;
use crate::types::{
compare_immutable, compare_records_generic, ImmutableRecord, SeekResult, Text, TextSubtype,
compare_immutable, compare_records_generic, Extendable, ImmutableRecord, RawSlice, SeekResult,
Text, TextRef, TextSubtype,
};
use crate::util::normalize_ident;
use crate::vdbe::insn::InsertFlags;
@@ -1420,18 +1421,18 @@ pub fn op_column(
let cursor = cursor.as_btree_mut();
if cursor.get_null_flag() {
break 'value Value::Null;
break 'value Some(RefValue::Null);
}
let record_result = return_if_io!(cursor.record());
let Some(record) = record_result.as_ref() else {
break 'value default.clone().unwrap_or(Value::Null);
break 'value None;
};
let payload = record.get_payload();
if payload.is_empty() {
break 'value default.clone().unwrap_or(Value::Null);
break 'value None;
}
let mut record_cursor = cursor.record_cursor.borrow_mut();
@@ -1507,25 +1508,25 @@ pub fn op_column(
record_cursor.offsets.clear();
record_cursor.header_offset = 0;
record_cursor.header_size = 0;
break 'value default.clone().unwrap_or(Value::Null);
break 'value None;
}
if target_column >= record_cursor.serial_types.len() {
break 'value default.clone().unwrap_or(Value::Null);
break 'value None;
}
let serial_type = record_cursor.serial_types[target_column];
// Fast path for common constant cases
match serial_type {
0 => break 'value Value::Null,
8 => break 'value Value::Integer(0),
9 => break 'value Value::Integer(1),
0 => break 'value Some(RefValue::Null),
8 => break 'value Some(RefValue::Integer(0)),
9 => break 'value Some(RefValue::Integer(1)),
_ => {}
}
if target_column + 1 >= record_cursor.offsets.len() {
break 'value default.clone().unwrap_or(Value::Null);
break 'value None;
}
let start_offset = record_cursor.offsets[target_column];
@@ -1547,7 +1548,10 @@ pub fn op_column(
};
if data_len >= expected_len {
Value::Integer(read_integer_fast(data_slice, expected_len))
Some(RefValue::Integer(read_integer_fast(
data_slice,
expected_len,
)))
} else {
return Err(LimboError::Corrupt(format!(
"Insufficient data for integer type {serial_type}: expected {expected_len}, got {data_len}"
@@ -1566,41 +1570,60 @@ pub fn op_column(
data_slice[6],
data_slice[7],
];
Value::Float(f64::from_be_bytes(bytes))
Some(RefValue::Float(f64::from_be_bytes(bytes)))
} else {
default.clone().unwrap_or(Value::Null)
None
}
}
n if n >= 12 && n % 2 == 0 => Value::Blob(data_slice.to_vec()),
n if n >= 13 && n % 2 == 1 => Value::Text(Text {
value: data_slice.to_vec(),
subtype: TextSubtype::Text,
}),
_ => default.clone().unwrap_or(Value::Null),
n if n >= 12 && n % 2 == 0 => {
Some(RefValue::Blob(RawSlice::create_from(data_slice)))
}
n if n >= 13 && n % 2 == 1 => Some(RefValue::Text(TextRef::create_from(
data_slice,
TextSubtype::Text,
))),
_ => None,
}
};
let Some(value) = value else {
// DEFAULT handling. Try to reuse the registers when allocation is not needed.
let Some(ref default) = default else {
state.registers[*dest] = Register::Value(Value::Null);
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
};
match (default, &mut state.registers[*dest]) {
(Value::Text(new_text), Register::Value(Value::Text(existing_text))) => {
existing_text.do_extend(new_text);
}
(Value::Blob(new_blob), Register::Value(Value::Blob(existing_blob))) => {
existing_blob.do_extend(new_blob);
}
_ => {
state.registers[*dest] = Register::Value(default.clone());
}
}
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
};
// Try to reuse the registers when allocation is not needed.
match (&value, &mut state.registers[*dest]) {
(Value::Text(new_text), Register::Value(Value::Text(existing_text))) => {
if existing_text.value.capacity() >= new_text.value.len() {
existing_text.value.clear();
existing_text.value.extend_from_slice(&new_text.value);
existing_text.subtype = new_text.subtype;
} else {
state.registers[*dest] = Register::Value(value);
}
(RefValue::Text(new_text), Register::Value(Value::Text(existing_text))) => {
existing_text.do_extend(new_text);
}
(Value::Blob(new_blob), Register::Value(Value::Blob(existing_blob))) => {
if existing_blob.capacity() >= new_blob.len() {
existing_blob.clear();
existing_blob.extend_from_slice(new_blob);
} else {
state.registers[*dest] = Register::Value(value);
}
(RefValue::Blob(new_blob), Register::Value(Value::Blob(existing_blob))) => {
existing_blob.do_extend(new_blob);
}
_ => {
state.registers[*dest] = Register::Value(value);
state.registers[*dest] = Register::Value(match value {
RefValue::Integer(i) => Value::Integer(i),
RefValue::Float(f) => Value::Float(f),
RefValue::Text(t) => Value::Text(Text::new(t.as_str())),
RefValue::Blob(b) => Value::Blob(b.to_slice().to_vec()),
RefValue::Null => Value::Null,
});
}
}
}