vdbe: refactor label resolution to account for insn offsets changing

This commit is contained in:
Jussi Saurio
2025-04-23 12:58:58 +03:00
parent b4b38bdb3c
commit 0f5c791784
2 changed files with 69 additions and 27 deletions

View File

@@ -17,7 +17,7 @@ use crate::{
Connection, VirtualTable,
};
use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program};
use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program};
#[allow(dead_code)]
pub struct ProgramBuilder {
next_free_register: usize,
@@ -28,12 +28,10 @@ pub struct ProgramBuilder {
/// that are deemed to be compile-time constant and can be hoisted out of loops
/// so that they get evaluated only once at the start of the program.
pub constant_spans: Vec<(usize, usize)>,
// Vector of labels which must be assigned to next emitted instruction
next_insn_labels: Vec<BranchOffset>,
// Cursors that are referenced by the program. Indexed by CursorID.
pub cursor_ref: Vec<(Option<String>, CursorType)>,
/// A vector where index=label number, value=resolved offset. Resolved in build().
label_to_resolved_offset: Vec<Option<InsnReference>>,
label_to_resolved_offset: Vec<Option<(InsnReference, JumpTarget)>>,
// Bitmask of cursors that have emitted a SeekRowid instruction.
seekrowid_emitted_bitmask: u64,
// map of instruction index to manual comment (used in EXPLAIN only)
@@ -86,7 +84,6 @@ impl ProgramBuilder {
next_free_register: 1,
next_free_cursor_id: 0,
insns: Vec::with_capacity(opts.approx_num_insns),
next_insn_labels: Vec::with_capacity(2),
cursor_ref: Vec::with_capacity(opts.num_cursors),
constant_spans: Vec::new(),
label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels),
@@ -313,18 +310,42 @@ impl ProgramBuilder {
BranchOffset::Label(label_n as u32)
}
// Effectively a GOTO <next insn> without the need to emit an explicit GOTO instruction.
// Useful when you know you need to jump to "the next part", but the exact offset is unknowable
// at the time of emitting the instruction.
/// Resolve a label to whatever instruction follows the one that was
/// last emitted.
///
/// Use this when your use case is: "the program should jump to whatever instruction
/// follows the one that was previously emitted", and you don't care exactly
/// which instruction that is. Examples include "the start of a loop", or
/// "after the loop ends".
///
/// It is important to handle those cases this way, because the precise
/// instruction that follows any given instruction might change due to
/// reordering the emitted instructions.
#[inline]
pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) {
self.next_insn_labels.push(label);
assert!(label.is_label(), "BranchOffset {:?} is not a label", label);
self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn);
}
/// Resolve a label to exactly the instruction that was last emitted.
///
/// Use this when your use case is: "the program should jump to the exact instruction
/// that was last emitted", and you don't care WHERE exactly that ends up being
/// once the order of the bytecode of the program is finalized. Examples include
/// "jump to the Halt instruction", or "jump to the Next instruction of a loop".
#[inline]
pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) {
self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn);
}
fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) {
assert!(matches!(label, BranchOffset::Label(_)));
assert!(matches!(to_offset, BranchOffset::Offset(_)));
self.label_to_resolved_offset[label.to_label_value() as usize] =
Some(to_offset.to_offset_int());
let BranchOffset::Label(label_number) = label else {
unreachable!("Label is not a label");
};
self.label_to_resolved_offset[label_number as usize] =
Some((to_offset.to_offset_int(), target));
}
/// Resolve unresolved labels to a specific offset in the instruction list.
@@ -335,15 +356,21 @@ impl ProgramBuilder {
pub fn resolve_labels(&mut self) {
let resolve = |pc: &mut BranchOffset, insn_name: &str| {
if let BranchOffset::Label(label) = pc {
let to_offset = self
.label_to_resolved_offset
.get(*label as usize)
.unwrap_or_else(|| {
panic!("Reference to undefined label in {}: {}", insn_name, label)
});
let Some(Some((to_offset, target))) =
self.label_to_resolved_offset.get(*label as usize)
else {
panic!(
"Reference to undefined or unresolved label in {}: {}",
insn_name, label
);
};
*pc = BranchOffset::Offset(
to_offset
.unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)),
+ if *target == JumpTarget::ExactlyThisInsn {
0
} else {
1
},
);
}
};

View File

@@ -60,6 +60,26 @@ use std::{
sync::Arc,
};
/// We use labels to indicate that we want to jump to whatever the instruction offset
/// will be at runtime, because the offset cannot always be determined when the jump
/// instruction is created.
///
/// In some cases, we want to jump to EXACTLY a specific instruction.
/// - Example: a condition is not met, so we want to jump to wherever Halt is.
/// In other cases, we don't care what the exact instruction is, but we know that we
/// want to jump to whatever comes AFTER a certain instruction.
/// - Example: a Next instruction will want to jump to "whatever the start of the loop is",
/// but it doesn't care what instruction that is.
///
/// The reason this distinction is important is that we might reorder instructions that are
/// constant at compile time, and when we do that, we need to change the offsets of any impacted
/// jump instructions, so the instruction that comes immediately after "next Insn" might have changed during the reordering.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum JumpTarget {
ExactlyThisInsn,
AfterThisInsn,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
/// Represents a target for a jump instruction.
/// Stores 32-bit ints to keep the enum word-sized.
@@ -95,15 +115,6 @@ impl BranchOffset {
}
}
/// Returns the label value. Panics if the branch offset is an offset or placeholder.
pub fn to_label_value(&self) -> u32 {
match self {
BranchOffset::Label(v) => *v,
BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"),
BranchOffset::Placeholder => unreachable!("Unresolved placeholder"),
}
}
/// Returns the branch offset as a signed integer.
/// Used in explain output, where we don't want to panic in case we have an unresolved
/// label or placeholder.
@@ -121,6 +132,10 @@ impl BranchOffset {
pub fn add<N: Into<u32>>(self, n: N) -> BranchOffset {
BranchOffset::Offset(self.to_offset_int() + n.into())
}
pub fn sub<N: Into<u32>>(self, n: N) -> BranchOffset {
BranchOffset::Offset(self.to_offset_int() - n.into())
}
}
pub type CursorID = usize;