From 0f5c7917845ee99d4e47ed73c32d83bfb7f7c51a Mon Sep 17 00:00:00 2001 From: Jussi Saurio Date: Wed, 23 Apr 2025 12:58:58 +0300 Subject: [PATCH] vdbe: refactor label resolution to account for insn offsets changing --- core/vdbe/builder.rs | 63 +++++++++++++++++++++++++++++++------------- core/vdbe/mod.rs | 33 ++++++++++++++++------- 2 files changed, 69 insertions(+), 27 deletions(-) diff --git a/core/vdbe/builder.rs b/core/vdbe/builder.rs index fa56ba7ff..a5d9e8b8a 100644 --- a/core/vdbe/builder.rs +++ b/core/vdbe/builder.rs @@ -17,7 +17,7 @@ use crate::{ Connection, VirtualTable, }; -use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, Program}; +use super::{BranchOffset, CursorID, Insn, InsnFunction, InsnReference, JumpTarget, Program}; #[allow(dead_code)] pub struct ProgramBuilder { next_free_register: usize, @@ -28,12 +28,10 @@ pub struct ProgramBuilder { /// that are deemed to be compile-time constant and can be hoisted out of loops /// so that they get evaluated only once at the start of the program. pub constant_spans: Vec<(usize, usize)>, - // Vector of labels which must be assigned to next emitted instruction - next_insn_labels: Vec, // Cursors that are referenced by the program. Indexed by CursorID. pub cursor_ref: Vec<(Option, CursorType)>, /// A vector where index=label number, value=resolved offset. Resolved in build(). - label_to_resolved_offset: Vec>, + label_to_resolved_offset: Vec>, // Bitmask of cursors that have emitted a SeekRowid instruction. seekrowid_emitted_bitmask: u64, // map of instruction index to manual comment (used in EXPLAIN only) @@ -86,7 +84,6 @@ impl ProgramBuilder { next_free_register: 1, next_free_cursor_id: 0, insns: Vec::with_capacity(opts.approx_num_insns), - next_insn_labels: Vec::with_capacity(2), cursor_ref: Vec::with_capacity(opts.num_cursors), constant_spans: Vec::new(), label_to_resolved_offset: Vec::with_capacity(opts.approx_num_labels), @@ -313,18 +310,42 @@ impl ProgramBuilder { BranchOffset::Label(label_n as u32) } - // Effectively a GOTO without the need to emit an explicit GOTO instruction. - // Useful when you know you need to jump to "the next part", but the exact offset is unknowable - // at the time of emitting the instruction. + /// Resolve a label to whatever instruction follows the one that was + /// last emitted. + /// + /// Use this when your use case is: "the program should jump to whatever instruction + /// follows the one that was previously emitted", and you don't care exactly + /// which instruction that is. Examples include "the start of a loop", or + /// "after the loop ends". + /// + /// It is important to handle those cases this way, because the precise + /// instruction that follows any given instruction might change due to + /// reordering the emitted instructions. + #[inline] pub fn preassign_label_to_next_insn(&mut self, label: BranchOffset) { - self.next_insn_labels.push(label); + assert!(label.is_label(), "BranchOffset {:?} is not a label", label); + self._resolve_label(label, self.offset().sub(1u32), JumpTarget::AfterThisInsn); } + /// Resolve a label to exactly the instruction that was last emitted. + /// + /// Use this when your use case is: "the program should jump to the exact instruction + /// that was last emitted", and you don't care WHERE exactly that ends up being + /// once the order of the bytecode of the program is finalized. Examples include + /// "jump to the Halt instruction", or "jump to the Next instruction of a loop". + #[inline] pub fn resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset) { + self._resolve_label(label, to_offset, JumpTarget::ExactlyThisInsn); + } + + fn _resolve_label(&mut self, label: BranchOffset, to_offset: BranchOffset, target: JumpTarget) { assert!(matches!(label, BranchOffset::Label(_))); assert!(matches!(to_offset, BranchOffset::Offset(_))); - self.label_to_resolved_offset[label.to_label_value() as usize] = - Some(to_offset.to_offset_int()); + let BranchOffset::Label(label_number) = label else { + unreachable!("Label is not a label"); + }; + self.label_to_resolved_offset[label_number as usize] = + Some((to_offset.to_offset_int(), target)); } /// Resolve unresolved labels to a specific offset in the instruction list. @@ -335,15 +356,21 @@ impl ProgramBuilder { pub fn resolve_labels(&mut self) { let resolve = |pc: &mut BranchOffset, insn_name: &str| { if let BranchOffset::Label(label) = pc { - let to_offset = self - .label_to_resolved_offset - .get(*label as usize) - .unwrap_or_else(|| { - panic!("Reference to undefined label in {}: {}", insn_name, label) - }); + let Some(Some((to_offset, target))) = + self.label_to_resolved_offset.get(*label as usize) + else { + panic!( + "Reference to undefined or unresolved label in {}: {}", + insn_name, label + ); + }; *pc = BranchOffset::Offset( to_offset - .unwrap_or_else(|| panic!("Unresolved label in {}: {}", insn_name, label)), + + if *target == JumpTarget::ExactlyThisInsn { + 0 + } else { + 1 + }, ); } }; diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 1d1ad0b77..86fd53303 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -60,6 +60,26 @@ use std::{ sync::Arc, }; +/// We use labels to indicate that we want to jump to whatever the instruction offset +/// will be at runtime, because the offset cannot always be determined when the jump +/// instruction is created. +/// +/// In some cases, we want to jump to EXACTLY a specific instruction. +/// - Example: a condition is not met, so we want to jump to wherever Halt is. +/// In other cases, we don't care what the exact instruction is, but we know that we +/// want to jump to whatever comes AFTER a certain instruction. +/// - Example: a Next instruction will want to jump to "whatever the start of the loop is", +/// but it doesn't care what instruction that is. +/// +/// The reason this distinction is important is that we might reorder instructions that are +/// constant at compile time, and when we do that, we need to change the offsets of any impacted +/// jump instructions, so the instruction that comes immediately after "next Insn" might have changed during the reordering. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum JumpTarget { + ExactlyThisInsn, + AfterThisInsn, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] /// Represents a target for a jump instruction. /// Stores 32-bit ints to keep the enum word-sized. @@ -95,15 +115,6 @@ impl BranchOffset { } } - /// Returns the label value. Panics if the branch offset is an offset or placeholder. - pub fn to_label_value(&self) -> u32 { - match self { - BranchOffset::Label(v) => *v, - BranchOffset::Offset(_) => unreachable!("Offset cannot be converted to label value"), - BranchOffset::Placeholder => unreachable!("Unresolved placeholder"), - } - } - /// Returns the branch offset as a signed integer. /// Used in explain output, where we don't want to panic in case we have an unresolved /// label or placeholder. @@ -121,6 +132,10 @@ impl BranchOffset { pub fn add>(self, n: N) -> BranchOffset { BranchOffset::Offset(self.to_offset_int() + n.into()) } + + pub fn sub>(self, n: N) -> BranchOffset { + BranchOffset::Offset(self.to_offset_int() - n.into()) + } } pub type CursorID = usize;