From 9383ba207d361b6c72b84a030041126d99f8ab86 Mon Sep 17 00:00:00 2001 From: Pere Diaz Bou Date: Wed, 11 Jun 2025 11:14:29 +0200 Subject: [PATCH] introduce integrity_check pragma --- core/pragma.rs | 4 ++ core/storage/btree.rs | 13 ++++ core/translate/integrity_check.rs | 45 ++++++++++++ core/translate/mod.rs | 1 + core/translate/pragma.rs | 6 ++ core/vdbe/execute.rs | 70 +++++++++++++++++++ core/vdbe/explain.rs | 13 ++++ core/vdbe/insn.rs | 13 ++++ core/vdbe/mod.rs | 4 +- vendored/sqlite3-parser/src/parser/ast/mod.rs | 2 + 10 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 core/translate/integrity_check.rs diff --git a/core/pragma.rs b/core/pragma.rs index 2d820a77f..d55d28d25 100644 --- a/core/pragma.rs +++ b/core/pragma.rs @@ -73,6 +73,10 @@ fn pragma_for(pragma: PragmaName) -> Pragma { PragmaFlags::NoColumns1 | PragmaFlags::Result0, &["auto_vacuum"], ), + IntegrityCheck => Pragma::new( + PragmaFlags::NeedSchema | PragmaFlags::ReadOnly | PragmaFlags::Result0, + &["message"], + ), } } diff --git a/core/storage/btree.rs b/core/storage/btree.rs index 4456024ce..9a6fe3493 100644 --- a/core/storage/btree.rs +++ b/core/storage/btree.rs @@ -5101,6 +5101,19 @@ impl BTreeCursor { } } +#[derive(Debug)] +pub struct IntegrityCheckState { + pub current_page: usize, +} + +pub fn integrity_check( + state: &mut IntegrityCheckState, + error_count: &mut usize, + message: &mut String, +) -> Result> { + Ok(CursorResult::Ok(())) +} + #[cfg(debug_assertions)] fn validate_cells_after_insertion(cell_array: &CellArray, leaf_data: bool) { for cell in &cell_array.cells { diff --git a/core/translate/integrity_check.rs b/core/translate/integrity_check.rs new file mode 100644 index 000000000..392ac597b --- /dev/null +++ b/core/translate/integrity_check.rs @@ -0,0 +1,45 @@ +use std::{ + rc::{Rc, Weak}, + sync::Arc, +}; + +use limbo_sqlite3_parser::ast; + +use crate::{ + fast_lock::SpinLock, + schema::Schema, + storage::sqlite3_ondisk::DatabaseHeader, + vdbe::{builder::ProgramBuilder, insn::Insn}, + Pager, +}; + +/// Maximum number of errors to report with integrity check. If we exceed this number we will short +/// circuit the procedure and return early to not waste time. +const MAX_INTEGRITY_CHECK_ERRORS: usize = 10; + +pub fn translate_integrity_check( + schema: &Schema, + program: &mut ProgramBuilder, +) -> crate::Result<()> { + let mut root_pages = Vec::with_capacity(schema.tables.len() + schema.indexes.len()); + // Collect root pages to run integrity check on + for (name, table) in &schema.tables { + match table.as_ref() { + crate::schema::Table::BTree(table) => { + root_pages.push(table.root_page); + } + _ => {} + }; + } + let message_register = program.alloc_register(); + program.emit_insn(Insn::IntegrityCk { + max_errors: MAX_INTEGRITY_CHECK_ERRORS, + roots: root_pages, + message_register, + }); + program.emit_insn(Insn::ResultRow { + start_reg: message_register, + count: 1, + }); + Ok(()) +} diff --git a/core/translate/mod.rs b/core/translate/mod.rs index 716479c6e..ae9a386e3 100644 --- a/core/translate/mod.rs +++ b/core/translate/mod.rs @@ -16,6 +16,7 @@ pub(crate) mod expr; pub(crate) mod group_by; pub(crate) mod index; pub(crate) mod insert; +pub(crate) mod integrity_check; pub(crate) mod main_loop; pub(crate) mod optimizer; pub(crate) mod order_by; diff --git a/core/translate/pragma.rs b/core/translate/pragma.rs index d7d72fd85..95fd806b6 100644 --- a/core/translate/pragma.rs +++ b/core/translate/pragma.rs @@ -18,6 +18,8 @@ use crate::{bail_parse_error, LimboError, Pager, Value}; use std::str::FromStr; use strum::IntoEnumIterator; +use super::integrity_check::translate_integrity_check; + fn list_pragmas(program: &mut ProgramBuilder) { for x in PragmaName::iter() { let register = program.emit_string8_new_reg(x.to_string()); @@ -259,6 +261,7 @@ fn update_pragma( }); Ok(()) } + PragmaName::IntegrityCheck => unreachable!("integrity_check cannot be set"), } } @@ -385,6 +388,9 @@ fn query_pragma( }); program.emit_result_row(register, 1); } + PragmaName::IntegrityCheck => { + translate_integrity_check(schema, program)?; + } } Ok(()) diff --git a/core/vdbe/execute.rs b/core/vdbe/execute.rs index 1e56778b7..43c63d888 100644 --- a/core/vdbe/execute.rs +++ b/core/vdbe/execute.rs @@ -1,6 +1,7 @@ #![allow(unused_variables)] use crate::numeric::{NullableInteger, Numeric}; use crate::schema::Schema; +use crate::storage::btree::{integrity_check, IntegrityCheckState}; use crate::storage::database::FileMemoryStorage; use crate::storage::page_cache::DumbLruPageCache; use crate::storage::pager::CreateBTreeFlags; @@ -18,6 +19,7 @@ use crate::{ }, types::compare_immutable, }; +use std::fmt::Write; use std::{borrow::BorrowMut, rc::Rc, sync::Arc}; use crate::{pseudo::PseudoCursor, result::LimboResult}; @@ -5038,6 +5040,74 @@ pub fn op_count( Ok(InsnFunctionStepResult::Step) } +#[derive(Debug)] +pub enum OpIntegrityCheckState { + Start, + Checking { + error_count: usize, + message: String, + current_root_idx: usize, + state: IntegrityCheckState, + }, +} +pub fn op_integrity_check( + program: &Program, + state: &mut ProgramState, + insn: &Insn, + pager: &Rc, + mv_store: Option<&Rc>, +) -> Result { + let Insn::IntegrityCk { + max_errors, + roots, + message_register + } = insn + else { + unreachable!("unexpected Insn {:?}", insn) + }; + match &mut state.op_integrity_check_state { + OpIntegrityCheckState::Start => { + state.op_integrity_check_state = OpIntegrityCheckState::Checking { + error_count: 0, + message: String::new(), + current_root_idx: 0, + state: IntegrityCheckState { + current_page: roots[0], + }, + }; + } + OpIntegrityCheckState::Checking { + error_count, + message, + current_root_idx, + state: integrity_check_state, + } => { + return_if_io!(integrity_check(integrity_check_state, error_count, message)); + *current_root_idx += 1; + if *current_root_idx < roots.len() { + *integrity_check_state = IntegrityCheckState { + current_page: roots[*current_root_idx], + }; + return Ok(InsnFunctionStepResult::Step); + } else { + if *error_count == 0 { + message.write_str("ok").map_err(|err| { + LimboError::InternalError(format!( + "error appending message to integrity check {:?}", + err + )) + })?; + } + state.registers[*message_register] = Register::Value(Value::build_text(message)); + state.op_integrity_check_state = OpIntegrityCheckState::Start; + state.pc += 1; + } + } + } + + Ok(InsnFunctionStepResult::Step) +} + impl Value { pub fn exec_lower(&self) -> Option { match self { diff --git a/core/vdbe/explain.rs b/core/vdbe/explain.rs index fa6a2a70e..837dd06ec 100644 --- a/core/vdbe/explain.rs +++ b/core/vdbe/explain.rs @@ -1576,6 +1576,19 @@ pub fn insn_to_str( 0, format!("r[{}]={}", *out_reg, *value), ), + Insn::IntegrityCk { + max_errors, + roots, + message_register, + } => ( + "IntegrityCk", + *max_errors as i32, + 0, + 0, + Value::build_text(""), + 0, + format!("roots={:?} message_register={}", roots, message_register), + ), }; format!( "{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}", diff --git a/core/vdbe/insn.rs b/core/vdbe/insn.rs index 23b9da480..43e8a9b64 100644 --- a/core/vdbe/insn.rs +++ b/core/vdbe/insn.rs @@ -915,6 +915,18 @@ pub enum Insn { target_reg: usize, exact: bool, }, + + /// Do an analysis of the currently open database. Store in register (P1+1) the text of an error message describing any problems. + /// If no problems are found, store a NULL in register (P1+1). + /// The register (P1) contains one less than the maximum number of allowed errors. + /// At most reg(P1) errors will be reported. In other words, the analysis stops as soon as reg(P1) errors are seen. + /// Reg(P1) is updated with the number of errors remaining. The root page numbers of all tables in the database are integers + /// stored in P4_INTARRAY argument. If P5 is not zero, the check is done on the auxiliary database file, not the main database file. This opcode is used to implement the integrity_check pragma. + IntegrityCk { + max_errors: usize, + roots: Vec, + message_register: usize, + }, } impl Insn { @@ -1038,6 +1050,7 @@ impl Insn { Insn::Affinity { .. } => execute::op_affinity, Insn::IdxDelete { .. } => execute::op_idx_delete, Insn::Count { .. } => execute::op_count, + Insn::IntegrityCk { .. } => execute::op_integrity_check, } } } diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index 3e0cffa3c..c17c46151 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -43,7 +43,7 @@ use crate::{ use crate::json::JsonCacheCell; use crate::{Connection, MvStore, Result, TransactionState}; use builder::CursorKey; -use execute::{InsnFunction, InsnFunctionStepResult, OpIdxDeleteState}; +use execute::{InsnFunction, InsnFunctionStepResult, OpIdxDeleteState, OpIntegrityCheckState}; use rand::{ distributions::{Distribution, Uniform}, @@ -248,6 +248,7 @@ pub struct ProgramState { #[cfg(feature = "json")] json_cache: JsonCacheCell, op_idx_delete_state: Option, + op_integrity_check_state: OpIntegrityCheckState, } impl ProgramState { @@ -272,6 +273,7 @@ impl ProgramState { #[cfg(feature = "json")] json_cache: JsonCacheCell::new(), op_idx_delete_state: None, + op_integrity_check_state: OpIntegrityCheckState::Start, } } diff --git a/vendored/sqlite3-parser/src/parser/ast/mod.rs b/vendored/sqlite3-parser/src/parser/ast/mod.rs index 7512b75d5..cf19e39f6 100644 --- a/vendored/sqlite3-parser/src/parser/ast/mod.rs +++ b/vendored/sqlite3-parser/src/parser/ast/mod.rs @@ -1675,6 +1675,8 @@ pub enum PragmaName { UserVersion, /// trigger a checkpoint to run on database(s) if WAL is enabled WalCheckpoint, + /// Run integrity check on the database file + IntegrityCheck, } /// `CREATE TRIGGER` time