mirror of
https://github.com/aljazceru/turso.git
synced 2026-02-11 03:04:22 +01:00
move the aggregate operator to its own file.
The code is becoming impossible to reason about with everything in operator.rs
This commit is contained in:
1787
core/incremental/aggregate_operator.rs
Normal file
1787
core/incremental/aggregate_operator.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,4 @@
|
||||
pub mod aggregate_operator;
|
||||
pub mod compiler;
|
||||
pub mod cursor;
|
||||
pub mod dbsp;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +1,7 @@
|
||||
use crate::incremental::dbsp::HashableRow;
|
||||
use crate::incremental::operator::{
|
||||
generate_storage_id, AggColumnInfo, AggregateFunction, AggregateOperator, AggregateState,
|
||||
DbspStateCursors, MinMaxDeltas, AGG_TYPE_MINMAX,
|
||||
};
|
||||
use crate::incremental::operator::{AggregateFunction, AggregateState, DbspStateCursors};
|
||||
use crate::storage::btree::{BTreeCursor, BTreeKey};
|
||||
use crate::types::{IOResult, ImmutableRecord, RefValue, SeekKey, SeekOp, SeekResult};
|
||||
use crate::types::{IOResult, ImmutableRecord, SeekKey, SeekOp, SeekResult};
|
||||
use crate::{return_if_io, LimboError, Result, Value};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub enum ReadRecord {
|
||||
@@ -290,672 +285,3 @@ impl WriteRow {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// State machine for recomputing MIN/MAX values after deletion
|
||||
#[derive(Debug)]
|
||||
pub enum RecomputeMinMax {
|
||||
ProcessElements {
|
||||
/// Current column being processed
|
||||
current_column_idx: usize,
|
||||
/// Columns to process (combined MIN and MAX)
|
||||
columns_to_process: Vec<(String, String, bool)>, // (group_key, column_name, is_min)
|
||||
/// MIN/MAX deltas for checking values and weights
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
},
|
||||
Scan {
|
||||
/// Columns still to process
|
||||
columns_to_process: Vec<(String, String, bool)>,
|
||||
/// Current index in columns_to_process (will resume from here)
|
||||
current_column_idx: usize,
|
||||
/// MIN/MAX deltas for checking values and weights
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
/// Current group key being processed
|
||||
group_key: String,
|
||||
/// Current column name being processed
|
||||
column_name: String,
|
||||
/// Whether we're looking for MIN (true) or MAX (false)
|
||||
is_min: bool,
|
||||
/// The scan state machine for finding the new MIN/MAX
|
||||
scan_state: Box<ScanState>,
|
||||
},
|
||||
Done,
|
||||
}
|
||||
|
||||
impl RecomputeMinMax {
|
||||
pub fn new(
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
existing_groups: &HashMap<String, AggregateState>,
|
||||
operator: &AggregateOperator,
|
||||
) -> Self {
|
||||
let mut groups_to_check: HashSet<(String, String, bool)> = HashSet::new();
|
||||
|
||||
// Remember the min_max_deltas are essentially just the only column that is affected by
|
||||
// this min/max, in delta (actually ZSet - consolidated delta) format. This makes it easier
|
||||
// for us to consume it in here.
|
||||
//
|
||||
// The most challenging case is the case where there is a retraction, since we need to go
|
||||
// back to the index.
|
||||
for (group_key_str, values) in &min_max_deltas {
|
||||
for ((col_name, hashable_row), weight) in values {
|
||||
let col_info = operator.column_min_max.get(col_name);
|
||||
|
||||
let value = &hashable_row.values[0];
|
||||
|
||||
if *weight < 0 {
|
||||
// Deletion detected - check if it's the current MIN/MAX
|
||||
if let Some(state) = existing_groups.get(group_key_str) {
|
||||
// Check for MIN
|
||||
if let Some(current_min) = state.mins.get(col_name) {
|
||||
if current_min == value {
|
||||
groups_to_check.insert((
|
||||
group_key_str.clone(),
|
||||
col_name.clone(),
|
||||
true,
|
||||
));
|
||||
}
|
||||
}
|
||||
// Check for MAX
|
||||
if let Some(current_max) = state.maxs.get(col_name) {
|
||||
if current_max == value {
|
||||
groups_to_check.insert((
|
||||
group_key_str.clone(),
|
||||
col_name.clone(),
|
||||
false,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if *weight > 0 {
|
||||
// If it is not found in the existing groups, then we only need to care
|
||||
// about this if this is a new record being inserted
|
||||
if let Some(info) = col_info {
|
||||
if info.has_min {
|
||||
groups_to_check.insert((group_key_str.clone(), col_name.clone(), true));
|
||||
}
|
||||
if info.has_max {
|
||||
groups_to_check.insert((
|
||||
group_key_str.clone(),
|
||||
col_name.clone(),
|
||||
false,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if groups_to_check.is_empty() {
|
||||
// No recomputation or initialization needed
|
||||
Self::Done
|
||||
} else {
|
||||
// Convert HashSet to Vec for indexed processing
|
||||
let groups_to_check_vec: Vec<_> = groups_to_check.into_iter().collect();
|
||||
Self::ProcessElements {
|
||||
current_column_idx: 0,
|
||||
columns_to_process: groups_to_check_vec,
|
||||
min_max_deltas,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn process(
|
||||
&mut self,
|
||||
existing_groups: &mut HashMap<String, AggregateState>,
|
||||
operator: &AggregateOperator,
|
||||
cursors: &mut DbspStateCursors,
|
||||
) -> Result<IOResult<()>> {
|
||||
loop {
|
||||
match self {
|
||||
RecomputeMinMax::ProcessElements {
|
||||
current_column_idx,
|
||||
columns_to_process,
|
||||
min_max_deltas,
|
||||
} => {
|
||||
if *current_column_idx >= columns_to_process.len() {
|
||||
*self = RecomputeMinMax::Done;
|
||||
return Ok(IOResult::Done(()));
|
||||
}
|
||||
|
||||
let (group_key, column_name, is_min) =
|
||||
columns_to_process[*current_column_idx].clone();
|
||||
|
||||
// Get column index from pre-computed info
|
||||
let column_index = operator
|
||||
.column_min_max
|
||||
.get(&column_name)
|
||||
.map(|info| info.index)
|
||||
.unwrap(); // Should always exist since we're processing known columns
|
||||
|
||||
// Get current value from existing state
|
||||
let current_value = existing_groups.get(&group_key).and_then(|state| {
|
||||
if is_min {
|
||||
state.mins.get(&column_name).cloned()
|
||||
} else {
|
||||
state.maxs.get(&column_name).cloned()
|
||||
}
|
||||
});
|
||||
|
||||
// Create storage keys for index lookup
|
||||
let storage_id =
|
||||
generate_storage_id(operator.operator_id, column_index, AGG_TYPE_MINMAX);
|
||||
let zset_id = operator.generate_group_rowid(&group_key);
|
||||
|
||||
// Get the values for this group from min_max_deltas
|
||||
let group_values = min_max_deltas.get(&group_key).cloned().unwrap_or_default();
|
||||
|
||||
let columns_to_process = std::mem::take(columns_to_process);
|
||||
let min_max_deltas = std::mem::take(min_max_deltas);
|
||||
|
||||
let scan_state = if is_min {
|
||||
Box::new(ScanState::new_for_min(
|
||||
current_value,
|
||||
group_key.clone(),
|
||||
column_name.clone(),
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
))
|
||||
} else {
|
||||
Box::new(ScanState::new_for_max(
|
||||
current_value,
|
||||
group_key.clone(),
|
||||
column_name.clone(),
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
))
|
||||
};
|
||||
|
||||
*self = RecomputeMinMax::Scan {
|
||||
columns_to_process,
|
||||
current_column_idx: *current_column_idx,
|
||||
min_max_deltas,
|
||||
group_key,
|
||||
column_name,
|
||||
is_min,
|
||||
scan_state,
|
||||
};
|
||||
}
|
||||
RecomputeMinMax::Scan {
|
||||
columns_to_process,
|
||||
current_column_idx,
|
||||
min_max_deltas,
|
||||
group_key,
|
||||
column_name,
|
||||
is_min,
|
||||
scan_state,
|
||||
} => {
|
||||
// Find new value using the scan state machine
|
||||
let new_value = return_if_io!(scan_state.find_new_value(cursors));
|
||||
|
||||
// Update the state with new value (create if doesn't exist)
|
||||
let state = existing_groups.entry(group_key.clone()).or_default();
|
||||
|
||||
if *is_min {
|
||||
if let Some(min_val) = new_value {
|
||||
state.mins.insert(column_name.clone(), min_val);
|
||||
} else {
|
||||
state.mins.remove(column_name);
|
||||
}
|
||||
} else if let Some(max_val) = new_value {
|
||||
state.maxs.insert(column_name.clone(), max_val);
|
||||
} else {
|
||||
state.maxs.remove(column_name);
|
||||
}
|
||||
|
||||
// Move to next column
|
||||
let min_max_deltas = std::mem::take(min_max_deltas);
|
||||
let columns_to_process = std::mem::take(columns_to_process);
|
||||
*self = RecomputeMinMax::ProcessElements {
|
||||
current_column_idx: *current_column_idx + 1,
|
||||
columns_to_process,
|
||||
min_max_deltas,
|
||||
};
|
||||
}
|
||||
RecomputeMinMax::Done => {
|
||||
return Ok(IOResult::Done(()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// State machine for scanning through the index to find new MIN/MAX values
|
||||
#[derive(Debug)]
|
||||
pub enum ScanState {
|
||||
CheckCandidate {
|
||||
/// Current candidate value for MIN/MAX
|
||||
candidate: Option<Value>,
|
||||
/// Group key being processed
|
||||
group_key: String,
|
||||
/// Column name being processed
|
||||
column_name: String,
|
||||
/// Storage ID for the index seek
|
||||
storage_id: i64,
|
||||
/// ZSet ID for the group
|
||||
zset_id: i64,
|
||||
/// Group values from MinMaxDeltas: (column_name, HashableRow) -> weight
|
||||
group_values: HashMap<(String, HashableRow), isize>,
|
||||
/// Whether we're looking for MIN (true) or MAX (false)
|
||||
is_min: bool,
|
||||
},
|
||||
FetchNextCandidate {
|
||||
/// Current candidate to seek past
|
||||
current_candidate: Value,
|
||||
/// Group key being processed
|
||||
group_key: String,
|
||||
/// Column name being processed
|
||||
column_name: String,
|
||||
/// Storage ID for the index seek
|
||||
storage_id: i64,
|
||||
/// ZSet ID for the group
|
||||
zset_id: i64,
|
||||
/// Group values from MinMaxDeltas: (column_name, HashableRow) -> weight
|
||||
group_values: HashMap<(String, HashableRow), isize>,
|
||||
/// Whether we're looking for MIN (true) or MAX (false)
|
||||
is_min: bool,
|
||||
},
|
||||
Done {
|
||||
/// The final MIN/MAX value found
|
||||
result: Option<Value>,
|
||||
},
|
||||
}
|
||||
|
||||
impl ScanState {
|
||||
pub fn new_for_min(
|
||||
current_min: Option<Value>,
|
||||
group_key: String,
|
||||
column_name: String,
|
||||
storage_id: i64,
|
||||
zset_id: i64,
|
||||
group_values: HashMap<(String, HashableRow), isize>,
|
||||
) -> Self {
|
||||
Self::CheckCandidate {
|
||||
candidate: current_min,
|
||||
group_key,
|
||||
column_name,
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
is_min: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Extract a new candidate from the index. It is possible that, when searching,
|
||||
// we end up going into a different operator altogether. That means we have
|
||||
// exhausted this operator (or group) entirely, and no good candidate was found
|
||||
fn extract_new_candidate(
|
||||
cursors: &mut DbspStateCursors,
|
||||
index_record: &ImmutableRecord,
|
||||
seek_op: SeekOp,
|
||||
storage_id: i64,
|
||||
zset_id: i64,
|
||||
) -> Result<IOResult<Option<Value>>> {
|
||||
let seek_result = return_if_io!(cursors
|
||||
.index_cursor
|
||||
.seek(SeekKey::IndexKey(index_record), seek_op));
|
||||
if !matches!(seek_result, SeekResult::Found) {
|
||||
return Ok(IOResult::Done(None));
|
||||
}
|
||||
|
||||
let record = return_if_io!(cursors.index_cursor.record()).ok_or_else(|| {
|
||||
LimboError::InternalError(
|
||||
"Record found on the cursor, but could not be read".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
let values = record.get_values();
|
||||
if values.len() < 3 {
|
||||
return Ok(IOResult::Done(None));
|
||||
}
|
||||
|
||||
let Some(rec_storage_id) = values.first() else {
|
||||
return Ok(IOResult::Done(None));
|
||||
};
|
||||
let Some(rec_zset_id) = values.get(1) else {
|
||||
return Ok(IOResult::Done(None));
|
||||
};
|
||||
|
||||
// Check if we're still in the same group
|
||||
if let (RefValue::Integer(rec_sid), RefValue::Integer(rec_zid)) =
|
||||
(rec_storage_id, rec_zset_id)
|
||||
{
|
||||
if *rec_sid != storage_id || *rec_zid != zset_id {
|
||||
return Ok(IOResult::Done(None));
|
||||
}
|
||||
} else {
|
||||
return Ok(IOResult::Done(None));
|
||||
}
|
||||
|
||||
// Get the value (3rd element)
|
||||
Ok(IOResult::Done(values.get(2).map(|v| v.to_owned())))
|
||||
}
|
||||
|
||||
pub fn new_for_max(
|
||||
current_max: Option<Value>,
|
||||
group_key: String,
|
||||
column_name: String,
|
||||
storage_id: i64,
|
||||
zset_id: i64,
|
||||
group_values: HashMap<(String, HashableRow), isize>,
|
||||
) -> Self {
|
||||
Self::CheckCandidate {
|
||||
candidate: current_max,
|
||||
group_key,
|
||||
column_name,
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
is_min: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_new_value(
|
||||
&mut self,
|
||||
cursors: &mut DbspStateCursors,
|
||||
) -> Result<IOResult<Option<Value>>> {
|
||||
loop {
|
||||
match self {
|
||||
ScanState::CheckCandidate {
|
||||
candidate,
|
||||
group_key,
|
||||
column_name,
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
is_min,
|
||||
} => {
|
||||
// First, check if we have a candidate
|
||||
if let Some(cand_val) = candidate {
|
||||
// Check if the candidate is retracted (weight <= 0)
|
||||
// Create a HashableRow to look up the weight
|
||||
let hashable_cand = HashableRow::new(0, vec![cand_val.clone()]);
|
||||
let key = (column_name.clone(), hashable_cand);
|
||||
let is_retracted =
|
||||
group_values.get(&key).is_some_and(|weight| *weight <= 0);
|
||||
|
||||
if is_retracted {
|
||||
// Candidate is retracted, need to fetch next from index
|
||||
*self = ScanState::FetchNextCandidate {
|
||||
current_candidate: cand_val.clone(),
|
||||
group_key: std::mem::take(group_key),
|
||||
column_name: std::mem::take(column_name),
|
||||
storage_id: *storage_id,
|
||||
zset_id: *zset_id,
|
||||
group_values: std::mem::take(group_values),
|
||||
is_min: *is_min,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Candidate is valid or we have no candidate
|
||||
// Now find the best value from insertions in group_values
|
||||
let mut best_from_zset = None;
|
||||
for ((col, hashable_val), weight) in group_values.iter() {
|
||||
if col == column_name && *weight > 0 {
|
||||
let value = &hashable_val.values[0];
|
||||
// Skip NULL values - they don't participate in MIN/MAX
|
||||
if value == &Value::Null {
|
||||
continue;
|
||||
}
|
||||
// This is an insertion for our column
|
||||
if let Some(ref current_best) = best_from_zset {
|
||||
if *is_min {
|
||||
if value.cmp(current_best) == std::cmp::Ordering::Less {
|
||||
best_from_zset = Some(value.clone());
|
||||
}
|
||||
} else if value.cmp(current_best) == std::cmp::Ordering::Greater {
|
||||
best_from_zset = Some(value.clone());
|
||||
}
|
||||
} else {
|
||||
best_from_zset = Some(value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compare candidate with best from ZSet, filtering out NULLs
|
||||
let result = match (&candidate, &best_from_zset) {
|
||||
(Some(cand), Some(zset_val)) if cand != &Value::Null => {
|
||||
if *is_min {
|
||||
if zset_val.cmp(cand) == std::cmp::Ordering::Less {
|
||||
Some(zset_val.clone())
|
||||
} else {
|
||||
Some(cand.clone())
|
||||
}
|
||||
} else if zset_val.cmp(cand) == std::cmp::Ordering::Greater {
|
||||
Some(zset_val.clone())
|
||||
} else {
|
||||
Some(cand.clone())
|
||||
}
|
||||
}
|
||||
(Some(cand), None) if cand != &Value::Null => Some(cand.clone()),
|
||||
(None, Some(zset_val)) => Some(zset_val.clone()),
|
||||
(Some(cand), Some(_)) if cand == &Value::Null => best_from_zset,
|
||||
_ => None,
|
||||
};
|
||||
|
||||
*self = ScanState::Done { result };
|
||||
}
|
||||
|
||||
ScanState::FetchNextCandidate {
|
||||
current_candidate,
|
||||
group_key,
|
||||
column_name,
|
||||
storage_id,
|
||||
zset_id,
|
||||
group_values,
|
||||
is_min,
|
||||
} => {
|
||||
// Seek to the next value in the index
|
||||
let index_key = vec![
|
||||
Value::Integer(*storage_id),
|
||||
Value::Integer(*zset_id),
|
||||
current_candidate.clone(),
|
||||
];
|
||||
let index_record = ImmutableRecord::from_values(&index_key, index_key.len());
|
||||
|
||||
let seek_op = if *is_min {
|
||||
SeekOp::GT // For MIN, seek greater than current
|
||||
} else {
|
||||
SeekOp::LT // For MAX, seek less than current
|
||||
};
|
||||
|
||||
let new_candidate = return_if_io!(Self::extract_new_candidate(
|
||||
cursors,
|
||||
&index_record,
|
||||
seek_op,
|
||||
*storage_id,
|
||||
*zset_id
|
||||
));
|
||||
|
||||
*self = ScanState::CheckCandidate {
|
||||
candidate: new_candidate,
|
||||
group_key: std::mem::take(group_key),
|
||||
column_name: std::mem::take(column_name),
|
||||
storage_id: *storage_id,
|
||||
zset_id: *zset_id,
|
||||
group_values: std::mem::take(group_values),
|
||||
is_min: *is_min,
|
||||
};
|
||||
}
|
||||
|
||||
ScanState::Done { result } => {
|
||||
return Ok(IOResult::Done(result.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// State machine for persisting Min/Max values to storage
|
||||
#[derive(Debug)]
|
||||
pub enum MinMaxPersistState {
|
||||
Init {
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
group_keys: Vec<String>,
|
||||
},
|
||||
ProcessGroup {
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
group_keys: Vec<String>,
|
||||
group_idx: usize,
|
||||
value_idx: usize,
|
||||
},
|
||||
WriteValue {
|
||||
min_max_deltas: MinMaxDeltas,
|
||||
group_keys: Vec<String>,
|
||||
group_idx: usize,
|
||||
value_idx: usize,
|
||||
value: Value,
|
||||
column_name: String,
|
||||
weight: isize,
|
||||
write_row: WriteRow,
|
||||
},
|
||||
Done,
|
||||
}
|
||||
|
||||
impl MinMaxPersistState {
|
||||
pub fn new(min_max_deltas: MinMaxDeltas) -> Self {
|
||||
let group_keys: Vec<String> = min_max_deltas.keys().cloned().collect();
|
||||
Self::Init {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn persist_min_max(
|
||||
&mut self,
|
||||
operator_id: usize,
|
||||
column_min_max: &HashMap<String, AggColumnInfo>,
|
||||
cursors: &mut DbspStateCursors,
|
||||
generate_group_rowid: impl Fn(&str) -> i64,
|
||||
) -> Result<IOResult<()>> {
|
||||
loop {
|
||||
match self {
|
||||
MinMaxPersistState::Init {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
} => {
|
||||
let min_max_deltas = std::mem::take(min_max_deltas);
|
||||
let group_keys = std::mem::take(group_keys);
|
||||
*self = MinMaxPersistState::ProcessGroup {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
group_idx: 0,
|
||||
value_idx: 0,
|
||||
};
|
||||
}
|
||||
MinMaxPersistState::ProcessGroup {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
group_idx,
|
||||
value_idx,
|
||||
} => {
|
||||
// Check if we're past all groups
|
||||
if *group_idx >= group_keys.len() {
|
||||
*self = MinMaxPersistState::Done;
|
||||
continue;
|
||||
}
|
||||
|
||||
let group_key_str = &group_keys[*group_idx];
|
||||
let values = &min_max_deltas[group_key_str]; // This should always exist
|
||||
|
||||
// Convert HashMap to Vec for indexed access
|
||||
let values_vec: Vec<_> = values.iter().collect();
|
||||
|
||||
// Check if we have more values in current group
|
||||
if *value_idx >= values_vec.len() {
|
||||
*group_idx += 1;
|
||||
*value_idx = 0;
|
||||
// Continue to check if we're past all groups now
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process current value and extract what we need before taking ownership
|
||||
let ((column_name, hashable_row), weight) = values_vec[*value_idx];
|
||||
let column_name = column_name.clone();
|
||||
let value = hashable_row.values[0].clone(); // Extract the Value from HashableRow
|
||||
let weight = *weight;
|
||||
|
||||
let min_max_deltas = std::mem::take(min_max_deltas);
|
||||
let group_keys = std::mem::take(group_keys);
|
||||
*self = MinMaxPersistState::WriteValue {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
group_idx: *group_idx,
|
||||
value_idx: *value_idx,
|
||||
column_name,
|
||||
value,
|
||||
weight,
|
||||
write_row: WriteRow::new(),
|
||||
};
|
||||
}
|
||||
MinMaxPersistState::WriteValue {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
group_idx,
|
||||
value_idx,
|
||||
value,
|
||||
column_name,
|
||||
weight,
|
||||
write_row,
|
||||
} => {
|
||||
// Should have exited in the previous state
|
||||
assert!(*group_idx < group_keys.len());
|
||||
|
||||
let group_key_str = &group_keys[*group_idx];
|
||||
|
||||
// Get the column index from the pre-computed map
|
||||
let column_info = column_min_max
|
||||
.get(&*column_name)
|
||||
.expect("Column should exist in column_min_max map");
|
||||
let column_index = column_info.index;
|
||||
|
||||
// Build the key components for MinMax storage using new encoding
|
||||
let storage_id =
|
||||
generate_storage_id(operator_id, column_index, AGG_TYPE_MINMAX);
|
||||
let zset_id = generate_group_rowid(group_key_str);
|
||||
|
||||
// element_id is the actual value for Min/Max
|
||||
let element_id_val = value.clone();
|
||||
|
||||
// Create index key
|
||||
let index_key = vec![
|
||||
Value::Integer(storage_id),
|
||||
Value::Integer(zset_id),
|
||||
element_id_val.clone(),
|
||||
];
|
||||
|
||||
// Record values (operator_id, zset_id, element_id, unused_placeholder)
|
||||
// For MIN/MAX, the element_id IS the value, so we use NULL for the 4th column
|
||||
let record_values = vec![
|
||||
Value::Integer(storage_id),
|
||||
Value::Integer(zset_id),
|
||||
element_id_val.clone(),
|
||||
Value::Null, // Placeholder - not used for MIN/MAX
|
||||
];
|
||||
|
||||
return_if_io!(write_row.write_row(
|
||||
cursors,
|
||||
index_key.clone(),
|
||||
record_values,
|
||||
*weight
|
||||
));
|
||||
|
||||
// Move to next value
|
||||
let min_max_deltas = std::mem::take(min_max_deltas);
|
||||
let group_keys = std::mem::take(group_keys);
|
||||
*self = MinMaxPersistState::ProcessGroup {
|
||||
min_max_deltas,
|
||||
group_keys,
|
||||
group_idx: *group_idx,
|
||||
value_idx: *value_idx + 1,
|
||||
};
|
||||
}
|
||||
MinMaxPersistState::Done => {
|
||||
return Ok(IOResult::Done(()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user