mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-08 17:54:22 +01:00
implement sparse vector operations
This commit is contained in:
@@ -153,6 +153,7 @@ impl Display for JsonFunc {
|
||||
pub enum VectorFunc {
|
||||
Vector,
|
||||
Vector32,
|
||||
Vector32Sparse,
|
||||
Vector64,
|
||||
VectorExtract,
|
||||
VectorDistanceCos,
|
||||
@@ -172,6 +173,7 @@ impl Display for VectorFunc {
|
||||
let str = match self {
|
||||
Self::Vector => "vector".to_string(),
|
||||
Self::Vector32 => "vector32".to_string(),
|
||||
Self::Vector32Sparse => "vector32_sparse".to_string(),
|
||||
Self::Vector64 => "vector64".to_string(),
|
||||
Self::VectorExtract => "vector_extract".to_string(),
|
||||
Self::VectorDistanceCos => "vector_distance_cos".to_string(),
|
||||
@@ -864,6 +866,7 @@ impl Func {
|
||||
"printf" => Ok(Self::Scalar(ScalarFunc::Printf)),
|
||||
"vector" => Ok(Self::Vector(VectorFunc::Vector)),
|
||||
"vector32" => Ok(Self::Vector(VectorFunc::Vector32)),
|
||||
"vector32_sparse" => Ok(Self::Vector(VectorFunc::Vector32Sparse)),
|
||||
"vector64" => Ok(Self::Vector(VectorFunc::Vector64)),
|
||||
"vector_extract" => Ok(Self::Vector(VectorFunc::VectorExtract)),
|
||||
"vector_distance_cos" => Ok(Self::Vector(VectorFunc::VectorDistanceCos)),
|
||||
|
||||
@@ -879,6 +879,14 @@ pub fn translate_expr(
|
||||
emit_function_call(program, func_ctx, &[start_reg], target_register)?;
|
||||
Ok(target_register)
|
||||
}
|
||||
VectorFunc::Vector32Sparse => {
|
||||
let args = expect_arguments_exact!(args, 1, vector_func);
|
||||
let start_reg = program.alloc_register();
|
||||
translate_expr(program, referenced_tables, &args[0], start_reg, resolver)?;
|
||||
|
||||
emit_function_call(program, func_ctx, &[start_reg], target_register)?;
|
||||
Ok(target_register)
|
||||
}
|
||||
VectorFunc::Vector64 => {
|
||||
let args = expect_arguments_exact!(args, 1, vector_func);
|
||||
let start_reg = program.alloc_register();
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::types::{
|
||||
use crate::util::normalize_ident;
|
||||
use crate::vdbe::insn::InsertFlags;
|
||||
use crate::vdbe::{registers_to_ref_values, TxnCleanup};
|
||||
use crate::vector::{vector_concat, vector_slice};
|
||||
use crate::vector::{vector32_sparse, vector_concat, vector_slice};
|
||||
use crate::{
|
||||
error::{
|
||||
LimboError, SQLITE_CONSTRAINT, SQLITE_CONSTRAINT_NOTNULL, SQLITE_CONSTRAINT_PRIMARYKEY,
|
||||
@@ -5197,6 +5197,10 @@ pub fn op_function(
|
||||
let result = vector32(&state.registers[*start_reg..*start_reg + arg_count])?;
|
||||
state.registers[*dest] = Register::Value(result);
|
||||
}
|
||||
VectorFunc::Vector32Sparse => {
|
||||
let result = vector32_sparse(&state.registers[*start_reg..*start_reg + arg_count])?;
|
||||
state.registers[*dest] = Register::Value(result);
|
||||
}
|
||||
VectorFunc::Vector64 => {
|
||||
let result = vector64(&state.registers[*start_reg..*start_reg + arg_count])?;
|
||||
state.registers[*dest] = Register::Value(result);
|
||||
|
||||
@@ -8,10 +8,10 @@ pub mod operations;
|
||||
pub mod vector_types;
|
||||
use vector_types::*;
|
||||
|
||||
pub fn parse_vector(value: &Register, vec_ty: Option<VectorType>) -> Result<Vector> {
|
||||
pub fn parse_vector(value: &Register, type_hint: Option<VectorType>) -> Result<Vector> {
|
||||
match value.get_value().value_type() {
|
||||
ValueType::Text => operations::text::vector_from_text(
|
||||
vec_ty.unwrap_or(VectorType::Float32Dense),
|
||||
type_hint.unwrap_or(VectorType::Float32Dense),
|
||||
value.get_value().to_text().expect("value must be text"),
|
||||
),
|
||||
ValueType::Blob => {
|
||||
@@ -39,6 +39,17 @@ pub fn vector32(args: &[Register]) -> Result<Value> {
|
||||
Ok(operations::serialize::vector_serialize(vector))
|
||||
}
|
||||
|
||||
pub fn vector32_sparse(args: &[Register]) -> Result<Value> {
|
||||
if args.len() != 1 {
|
||||
return Err(LimboError::ConversionError(
|
||||
"vector32_sparse requires exactly one argument".to_string(),
|
||||
));
|
||||
}
|
||||
let vector = parse_vector(&args[0], Some(VectorType::Float32Sparse))?;
|
||||
let vector = operations::convert::vector_convert(vector, VectorType::Float32Sparse)?;
|
||||
Ok(operations::serialize::vector_serialize(vector))
|
||||
}
|
||||
|
||||
pub fn vector64(args: &[Register]) -> Result<Value> {
|
||||
if args.len() != 1 {
|
||||
return Err(LimboError::ConversionError(
|
||||
|
||||
@@ -17,6 +17,14 @@ pub fn vector_concat(v1: &Vector, v2: &Vector) -> Result<Vector> {
|
||||
data.extend_from_slice(&v2.data);
|
||||
data
|
||||
}
|
||||
VectorType::Float32Sparse => {
|
||||
let mut data = Vec::with_capacity(v1.data.len() + v2.data.len());
|
||||
data.extend_from_slice(&v1.data[..v1.data.len() / 2]);
|
||||
data.extend_from_slice(&v2.data[..v2.data.len() / 2]);
|
||||
data.extend_from_slice(&v1.data[v1.data.len() / 2..]);
|
||||
data.extend_from_slice(&v2.data[v2.data.len() / 2..]);
|
||||
data
|
||||
}
|
||||
_ => todo!(),
|
||||
};
|
||||
|
||||
|
||||
@@ -6,21 +6,88 @@ use crate::{
|
||||
pub fn vector_convert(v: Vector, target_type: VectorType) -> Result<Vector> {
|
||||
match (v.vector_type, target_type) {
|
||||
(VectorType::Float32Dense, VectorType::Float32Dense)
|
||||
| (VectorType::Float64Dense, VectorType::Float64Dense) => Ok(v),
|
||||
| (VectorType::Float64Dense, VectorType::Float64Dense)
|
||||
| (VectorType::Float32Sparse, VectorType::Float32Sparse) => Ok(v),
|
||||
(VectorType::Float32Dense, VectorType::Float64Dense) => {
|
||||
let mut data = Vec::with_capacity(v.dims * 8);
|
||||
for &x in v.as_f32_slice() {
|
||||
data.extend_from_slice(&f64::to_le_bytes(x as f64));
|
||||
}
|
||||
Vector::from_data(target_type, data)
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
(VectorType::Float64Dense, VectorType::Float32Dense) => {
|
||||
let mut data = Vec::with_capacity(v.dims * 4);
|
||||
for &x in v.as_f32_slice() {
|
||||
data.extend_from_slice(&f64::to_le_bytes(x as f64));
|
||||
}
|
||||
Vector::from_data(target_type, data)
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
(VectorType::Float32Dense, VectorType::Float32Sparse) => {
|
||||
let (mut idx, mut values) = (Vec::new(), Vec::new());
|
||||
for (i, &value) in v.as_f32_slice().iter().enumerate() {
|
||||
if value == 0.0 {
|
||||
continue;
|
||||
}
|
||||
idx.extend_from_slice(&(i as u32).to_le_bytes());
|
||||
values.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
values.extend_from_slice(&idx);
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data: values,
|
||||
})
|
||||
}
|
||||
(VectorType::Float64Dense, VectorType::Float32Sparse) => {
|
||||
let (mut idx, mut values) = (Vec::new(), Vec::new());
|
||||
for (i, &value) in v.as_f64_slice().iter().enumerate() {
|
||||
if value == 0.0 {
|
||||
continue;
|
||||
}
|
||||
idx.extend_from_slice(&(i as u32).to_le_bytes());
|
||||
values.extend_from_slice(&(value as f32).to_le_bytes());
|
||||
}
|
||||
values.extend_from_slice(&idx);
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data: values,
|
||||
})
|
||||
}
|
||||
(VectorType::Float32Sparse, VectorType::Float32Dense) => {
|
||||
let sparse = v.as_f32_sparse();
|
||||
let mut data = vec![0u8; v.dims * 4];
|
||||
for (&i, &value) in sparse.idx.iter().zip(sparse.values.iter()) {
|
||||
data.splice((4 * i) as usize..4 * (i + 1) as usize, value.to_le_bytes());
|
||||
}
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
(VectorType::Float32Sparse, VectorType::Float64Dense) => {
|
||||
let sparse = v.as_f32_sparse();
|
||||
let mut data = vec![0u8; v.dims * 8];
|
||||
for (&i, &value) in sparse.idx.iter().zip(sparse.values.iter()) {
|
||||
data.splice(
|
||||
(8 * i) as usize..8 * (i + 1) as usize,
|
||||
(value as f64).to_le_bytes(),
|
||||
);
|
||||
}
|
||||
Ok(Vector {
|
||||
vector_type: target_type,
|
||||
dims: v.dims,
|
||||
data,
|
||||
})
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::{
|
||||
vector::vector_types::{Vector, VectorType},
|
||||
vector::vector_types::{Vector, VectorSparse, VectorType},
|
||||
LimboError, Result,
|
||||
};
|
||||
|
||||
@@ -23,6 +23,10 @@ pub fn vector_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
v1.as_f64_slice(),
|
||||
v2.as_f64_slice(),
|
||||
)),
|
||||
VectorType::Float32Sparse => Ok(vector_f32_sparse_distance_cos(
|
||||
v1.as_f32_sparse(),
|
||||
v2.as_f32_sparse(),
|
||||
)),
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
@@ -67,6 +71,45 @@ fn vector_f64_distance_cos(v1: &[f64], v2: &[f64]) -> f64 {
|
||||
1.0 - (dot / (norm1 * norm2).sqrt())
|
||||
}
|
||||
|
||||
fn vector_f32_sparse_distance_cos(v1: VectorSparse<f32>, v2: VectorSparse<f32>) -> f64 {
|
||||
let mut v1_pos = 0;
|
||||
let mut v2_pos = 0;
|
||||
let (mut dot, mut norm1, mut norm2) = (0.0, 0.0, 0.0);
|
||||
while v1_pos < v1.idx.len() && v2_pos < v2.idx.len() {
|
||||
let e1 = v1.values[v1_pos];
|
||||
let e2 = v2.values[v2_pos];
|
||||
if v1.idx[v1_pos] == v2.idx[v2_pos] {
|
||||
dot += e1 * e2;
|
||||
norm1 += e1 * e1;
|
||||
norm2 += e2 * e2;
|
||||
v1_pos += 1;
|
||||
v2_pos += 1;
|
||||
} else if v1.idx[v1_pos] < v2.idx[v2_pos] {
|
||||
norm1 += e1 * e1;
|
||||
v1_pos += 1;
|
||||
} else {
|
||||
norm2 += e2 * e2;
|
||||
v2_pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
while v1_pos < v1.idx.len() {
|
||||
norm1 += v1.values[v1_pos] * v1.values[v1_pos];
|
||||
v1_pos += 1;
|
||||
}
|
||||
while v2_pos < v2.idx.len() {
|
||||
norm1 += v2.values[v2_pos] * v2.values[v2_pos];
|
||||
v2_pos += 1;
|
||||
}
|
||||
|
||||
// Check for zero norms
|
||||
if norm1 == 0.0f32 || norm2 == 0.0f32 {
|
||||
return f64::NAN;
|
||||
}
|
||||
|
||||
(1.0f32 - (dot / (norm1 * norm2).sqrt())) as f64
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -79,4 +122,13 @@ mod tests {
|
||||
assert_eq!(vector_f32_distance_cos(&[1.0, 2.0], &[-1.0, -2.0]), 2.0);
|
||||
assert_eq!(vector_f32_distance_cos(&[1.0, 2.0], &[-2.0, 1.0]), 1.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_cos_f64() {
|
||||
assert!(vector_f64_distance_cos(&[], &[]).is_nan());
|
||||
assert!(vector_f64_distance_cos(&[1.0, 2.0], &[0.0, 0.0]).is_nan());
|
||||
assert_eq!(vector_f64_distance_cos(&[1.0, 2.0], &[1.0, 2.0]), 0.0);
|
||||
assert_eq!(vector_f64_distance_cos(&[1.0, 2.0], &[-1.0, -2.0]), 2.0);
|
||||
assert_eq!(vector_f64_distance_cos(&[1.0, 2.0], &[-2.0, 1.0]), 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::{
|
||||
vector::vector_types::{Vector, VectorType},
|
||||
vector::vector_types::{Vector, VectorSparse, VectorType},
|
||||
LimboError, Result,
|
||||
};
|
||||
|
||||
@@ -21,6 +21,10 @@ pub fn vector_distance_l2(v1: &Vector, v2: &Vector) -> Result<f64> {
|
||||
VectorType::Float64Dense => {
|
||||
Ok(vector_f64_distance_l2(v1.as_f64_slice(), v2.as_f64_slice()))
|
||||
}
|
||||
VectorType::Float32Sparse => Ok(vector_f32_sparse_distance_l2(
|
||||
v1.as_f32_sparse(),
|
||||
v2.as_f32_sparse(),
|
||||
)),
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
@@ -43,6 +47,24 @@ fn vector_f64_distance_l2(v1: &[f64], v2: &[f64]) -> f64 {
|
||||
sum.sqrt()
|
||||
}
|
||||
|
||||
fn vector_f32_sparse_distance_l2(v1: VectorSparse<f32>, v2: VectorSparse<f32>) -> f64 {
|
||||
let mut v1_pos = 0;
|
||||
let mut v2_pos = 0;
|
||||
let mut sum = 0.0;
|
||||
while v1_pos < v1.idx.len() && v2_pos < v2.idx.len() {
|
||||
if v1.idx[v1_pos] == v2.idx[v2_pos] {
|
||||
sum += (v1.values[v1_pos] - v2.values[v2_pos]).powi(2);
|
||||
v1_pos += 1;
|
||||
v2_pos += 1;
|
||||
} else if v1.idx[v1_pos] < v2.idx[v2_pos] {
|
||||
v1_pos += 1;
|
||||
} else {
|
||||
v2_pos += 1;
|
||||
}
|
||||
}
|
||||
sum as f64
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -3,21 +3,17 @@ use crate::{
|
||||
Value,
|
||||
};
|
||||
|
||||
pub fn vector_serialize(x: Vector) -> Value {
|
||||
pub fn vector_serialize(mut x: Vector) -> Value {
|
||||
match x.vector_type {
|
||||
VectorType::Float32Dense => vector_f32_serialize(x),
|
||||
VectorType::Float64Dense => vector_f64_serialize(x),
|
||||
_ => todo!(),
|
||||
VectorType::Float32Dense => Value::from_blob(x.data),
|
||||
VectorType::Float64Dense => {
|
||||
x.data.push(2);
|
||||
Value::from_blob(x.data)
|
||||
}
|
||||
VectorType::Float32Sparse => {
|
||||
x.data.extend_from_slice(&(x.dims as u32).to_le_bytes());
|
||||
x.data.push(9);
|
||||
Value::from_blob(x.data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vector_f64_serialize(x: Vector) -> Value {
|
||||
let mut blob = Vec::with_capacity(x.dims * 8 + 1);
|
||||
blob.extend_from_slice(&x.data);
|
||||
blob.push(2);
|
||||
Value::from_blob(blob)
|
||||
}
|
||||
|
||||
fn vector_f32_serialize(x: Vector) -> Value {
|
||||
Value::from_blob(x.data)
|
||||
}
|
||||
|
||||
@@ -15,13 +15,35 @@ pub fn vector_slice(vector: &Vector, start: usize, end: usize) -> Result<Vector>
|
||||
));
|
||||
}
|
||||
match vector.vector_type {
|
||||
VectorType::Float32Dense => {
|
||||
Vector::from_data(vector.vector_type, vector.data[start * 4..end * 4].to_vec())
|
||||
VectorType::Float32Dense => Ok(Vector {
|
||||
vector_type: vector.vector_type,
|
||||
dims: end - start + 1,
|
||||
data: vector.data[start * 4..end * 4].to_vec(),
|
||||
}),
|
||||
VectorType::Float64Dense => Ok(Vector {
|
||||
vector_type: vector.vector_type,
|
||||
dims: end - start + 1,
|
||||
data: vector.data[start * 8..end * 8].to_vec(),
|
||||
}),
|
||||
VectorType::Float32Sparse => {
|
||||
let mut values = Vec::new();
|
||||
let mut idx = Vec::new();
|
||||
let sparse = vector.as_f32_sparse();
|
||||
for (&i, &value) in sparse.idx.iter().zip(sparse.values.iter()) {
|
||||
let i = i as usize;
|
||||
if i < start || i >= end {
|
||||
continue;
|
||||
}
|
||||
values.extend_from_slice(&value.to_le_bytes());
|
||||
idx.extend_from_slice(&i.to_le_bytes());
|
||||
}
|
||||
values.extend_from_slice(&idx);
|
||||
Ok(Vector {
|
||||
vector_type: vector.vector_type,
|
||||
dims: end - start + 1,
|
||||
data: values,
|
||||
})
|
||||
}
|
||||
VectorType::Float64Dense => {
|
||||
Vector::from_data(vector.vector_type, vector.data[start * 8..end * 8].to_vec())
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,28 +4,31 @@ use crate::{
|
||||
};
|
||||
|
||||
pub fn vector_to_text(vector: &Vector) -> String {
|
||||
match vector.vector_type {
|
||||
VectorType::Float32Dense => format_text(vector.as_f32_slice().iter()),
|
||||
VectorType::Float64Dense => format_text(vector.as_f64_slice().iter()),
|
||||
VectorType::Float32Sparse => {
|
||||
let mut dense = vec![0.0f32; vector.dims];
|
||||
let sparse = vector.as_f32_sparse();
|
||||
tracing::info!("{:?}", sparse);
|
||||
for (&idx, &value) in sparse.idx.iter().zip(sparse.values.iter()) {
|
||||
dense[idx as usize] = value;
|
||||
}
|
||||
format_text(dense.iter())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn format_text<T: std::string::ToString>(values: impl Iterator<Item = T>) -> String {
|
||||
let mut text = String::new();
|
||||
text.push('[');
|
||||
match vector.vector_type {
|
||||
VectorType::Float32Dense => {
|
||||
let data = vector.as_f32_slice();
|
||||
for (i, value) in data.iter().enumerate().take(vector.dims) {
|
||||
text.push_str(&value.to_string());
|
||||
if i < vector.dims - 1 {
|
||||
text.push(',');
|
||||
}
|
||||
}
|
||||
let mut first = true;
|
||||
for value in values {
|
||||
if !first {
|
||||
text.push(',');
|
||||
}
|
||||
VectorType::Float64Dense => {
|
||||
let data = vector.as_f64_slice();
|
||||
for (i, value) in data.iter().enumerate().take(vector.dims) {
|
||||
text.push_str(&value.to_string());
|
||||
if i < vector.dims - 1 {
|
||||
text.push(',');
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => todo!(),
|
||||
first = false;
|
||||
text.push_str(&value.to_string());
|
||||
}
|
||||
text.push(']');
|
||||
text
|
||||
@@ -46,48 +49,92 @@ pub fn vector_from_text(vector_type: VectorType, text: &str) -> Result<Vector> {
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
let mut data: Vec<u8> = Vec::new();
|
||||
let text = &text[1..text.len() - 1];
|
||||
if text.trim().is_empty() {
|
||||
return Ok(Vector {
|
||||
vector_type,
|
||||
dims: 0,
|
||||
data,
|
||||
return Ok(match vector_type {
|
||||
VectorType::Float32Dense | VectorType::Float64Dense | VectorType::Float32Sparse => {
|
||||
Vector {
|
||||
vector_type,
|
||||
dims: 0,
|
||||
data: Vec::new(),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
let xs = text.split(',');
|
||||
for x in xs {
|
||||
let x = x.trim();
|
||||
if x.is_empty() {
|
||||
let tokens = text.split(',').map(|x| x.trim());
|
||||
match vector_type {
|
||||
VectorType::Float32Dense => vector32_from_text(tokens),
|
||||
VectorType::Float64Dense => vector64_from_text(tokens),
|
||||
VectorType::Float32Sparse => vector32_sparse_from_text(tokens),
|
||||
}
|
||||
}
|
||||
|
||||
fn vector32_from_text<'a>(tokens: impl Iterator<Item = &'a str>) -> Result<Vector> {
|
||||
let mut data = Vec::new();
|
||||
for token in tokens {
|
||||
let value = token
|
||||
.parse::<f32>()
|
||||
.map_err(|_| LimboError::ConversionError("Invalid vector value".to_string()))?;
|
||||
if !value.is_finite() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
match vector_type {
|
||||
VectorType::Float32Dense => {
|
||||
let x = x
|
||||
.parse::<f32>()
|
||||
.map_err(|_| LimboError::ConversionError("Invalid vector value".to_string()))?;
|
||||
if !x.is_finite() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
data.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
VectorType::Float64Dense => {
|
||||
let x = x
|
||||
.parse::<f64>()
|
||||
.map_err(|_| LimboError::ConversionError("Invalid vector value".to_string()))?;
|
||||
if !x.is_finite() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
data.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
_ => todo!(),
|
||||
};
|
||||
data.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
Vector::from_data(vector_type, data)
|
||||
Ok(Vector {
|
||||
vector_type: VectorType::Float32Dense,
|
||||
dims: data.len() / 4,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
fn vector64_from_text<'a>(tokens: impl Iterator<Item = &'a str>) -> Result<Vector> {
|
||||
let mut data = Vec::new();
|
||||
for token in tokens {
|
||||
let value = token
|
||||
.parse::<f64>()
|
||||
.map_err(|_| LimboError::ConversionError("Invalid vector value".to_string()))?;
|
||||
if !value.is_finite() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
data.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
Ok(Vector {
|
||||
vector_type: VectorType::Float64Dense,
|
||||
dims: data.len() / 8,
|
||||
data,
|
||||
})
|
||||
}
|
||||
|
||||
fn vector32_sparse_from_text<'a>(tokens: impl Iterator<Item = &'a str>) -> Result<Vector> {
|
||||
let mut idx = Vec::new();
|
||||
let mut values = Vec::new();
|
||||
let mut dims = 0u32;
|
||||
for token in tokens {
|
||||
let value = token
|
||||
.parse::<f32>()
|
||||
.map_err(|_| LimboError::ConversionError("Invalid vector value".to_string()))?;
|
||||
if !value.is_finite() {
|
||||
return Err(LimboError::ConversionError(
|
||||
"Invalid vector value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
dims += 1;
|
||||
if value == 0.0 {
|
||||
continue;
|
||||
}
|
||||
idx.extend_from_slice(&(dims - 1).to_le_bytes());
|
||||
values.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
values.extend_from_slice(&idx);
|
||||
Ok(Vector {
|
||||
vector_type: VectorType::Float32Sparse,
|
||||
dims: dims as usize,
|
||||
data: values,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -14,9 +14,10 @@ pub struct Vector {
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
pub struct VectorSparse<'a, T> {
|
||||
idx: &'a [u32],
|
||||
values: &'a [T],
|
||||
#[derive(Debug)]
|
||||
pub struct VectorSparse<'a, T: std::fmt::Debug> {
|
||||
pub idx: &'a [u32],
|
||||
pub values: &'a [T],
|
||||
}
|
||||
|
||||
impl Vector {
|
||||
@@ -53,7 +54,7 @@ impl Vector {
|
||||
let (vector_type, data) = Self::vector_type(blob)?;
|
||||
Self::from_data(vector_type, data)
|
||||
}
|
||||
pub fn from_data(vector_type: VectorType, data: Vec<u8>) -> Result<Self> {
|
||||
pub fn from_data(vector_type: VectorType, mut data: Vec<u8>) -> Result<Self> {
|
||||
match vector_type {
|
||||
VectorType::Float32Dense => {
|
||||
if data.len() % 4 != 0 {
|
||||
@@ -88,7 +89,8 @@ impl Vector {
|
||||
data.len(),
|
||||
)));
|
||||
}
|
||||
let dims = u32::from_le_bytes(data[data.len() - 4..].try_into().unwrap()) as usize;
|
||||
let dims_bytes = data.split_off(data.len() - 4);
|
||||
let dims = u32::from_le_bytes(dims_bytes.try_into().unwrap()) as usize;
|
||||
let vector = Vector {
|
||||
vector_type,
|
||||
dims,
|
||||
@@ -162,9 +164,10 @@ impl Vector {
|
||||
0,
|
||||
"data pointer must be aligned to {align} bytes for f32 access"
|
||||
);
|
||||
let length = (self.data.len() - 4) / 4 / 2;
|
||||
let length = self.data.len() / 4 / 2;
|
||||
let values = unsafe { std::slice::from_raw_parts(ptr as *const f32, length) };
|
||||
let idx = unsafe { std::slice::from_raw_parts((ptr as *const u32).add(length), length) };
|
||||
debug_assert!(idx.is_sorted());
|
||||
VectorSparse { idx, values }
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user