Merge 'Fix: OP_NewRowId to generate semi random rowid when largest rowid is i64::MAX' from Krishna Vishal

- `OP_NewRowId` now generates new rowid semi randomly when the largest rowid in the table is `i64::MAX`. - Introduced new `LimboError` variant `DatabaseFull` to signify that database might be full (SQLite behaves this way returning `SQLITE_FULL`). Now: ```SQL turso> CREATE TABLE q(x INTEGER PRIMARY KEY, y); turso> INSERT INTO q VALUES (9223372036854775807, 1); turso> INSERT INTO q(y) VALUES (2); turso> INSERT INTO q(y) VALUES (3); turso> SELECT * FROM q; ┌─────────────────────┬───┐ │ x │ y │ ├─────────────────────┼───┤ │ 1841427626667347484 │ 2 │ ├─────────────────────┼───┤ │ 4000338366725695791 │ 3 │ ├─────────────────────┼───┤ │ 9223372036854775807 │ 1 │ └─────────────────────┴───┘ ``` Fixes: https://github.com/tursodatabase/turso/issues/1977 Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com> Closes #1985
2026-02-23 17:05:36 +01:00 · 2025-07-14 11:56:09 +03:00
parent 0b544717a1 12f9743443
commit 9285d8b83b
4 changed files with 125 additions and 48 deletions
--- a/core/error.rs
+++ b/core/error.rs
@@ -10,6 +10,8 @@ pub enum LimboError {
    InternalError(String),
    #[error("Page cache is full")]
    CacheFull,
+    #[error("Database is full: {0}")]
+    DatabaseFull(String),
    #[error("Parse error: {0}")]
    ParseError(String),
    #[error(transparent)]
--- a/core/vdbe/execute.rs
+++ b/core/vdbe/execute.rs
@@ -65,7 +65,7 @@ use super::{
 };
 use fallible_iterator::FallibleIterator;
 use parking_lot::RwLock;
-use rand::thread_rng;
+use rand::{thread_rng, Rng};
 use turso_sqlite3_parser::ast;
 use turso_sqlite3_parser::ast::fmt::ToTokens;
 use turso_sqlite3_parser::lexer::sql::Parser;
@@ -88,7 +88,7 @@ use crate::{
    json::jsonb_patch, json::jsonb_remove, json::jsonb_replace, json::jsonb_set,
 };

-use super::{get_new_rowid, make_record, Program, ProgramState, Register};
+use super::{make_record, Program, ProgramState, Register};
 use crate::{
    bail_constraint_error, must_be_btree_cursor, resolve_ext_path, MvStore, Pager, Result,
 };
@@ -4988,6 +4988,15 @@ pub fn op_idx_insert(
    }
 }

+#[derive(Debug)]
+pub enum OpNewRowidState {
+    Start,
+    SeekingToLast,
+    ReadingMaxRowid,
+    GeneratingRandom { attempts: u32 },
+    VerifyingCandidate { attempts: u32, candidate: i64 },
+}
+
 pub fn op_new_rowid(
    program: &Program,
    state: &mut ProgramState,
@@ -5001,15 +5010,106 @@ pub fn op_new_rowid(
    else {
        unreachable!("unexpected Insn {:?}", insn)
    };
-    let rowid = {
-        let mut cursor = state.get_cursor(*cursor);
-        let cursor = cursor.as_btree_mut();
-        // TODO: make io handle rng
-        return_if_io!(get_new_rowid(cursor, thread_rng()))
-    };
-    state.registers[*rowid_reg] = Register::Value(Value::Integer(rowid));
-    state.pc += 1;
-    Ok(InsnFunctionStepResult::Step)
+
+    const MAX_ROWID: i64 = i64::MAX;
+    const MAX_ATTEMPTS: u32 = 100;
+
+    loop {
+        match &state.op_new_rowid_state {
+            OpNewRowidState::Start => {
+                state.op_new_rowid_state = OpNewRowidState::SeekingToLast;
+            }
+
+            OpNewRowidState::SeekingToLast => {
+                {
+                    let mut cursor = state.get_cursor(*cursor);
+                    let cursor = cursor.as_btree_mut();
+                    return_if_io!(cursor.seek_to_last());
+                }
+                state.op_new_rowid_state = OpNewRowidState::ReadingMaxRowid;
+            }
+
+            OpNewRowidState::ReadingMaxRowid => {
+                let current_max = {
+                    let mut cursor = state.get_cursor(*cursor);
+                    let cursor = cursor.as_btree_mut();
+
+                    // Move to last record
+                    return_if_io!(cursor.seek_to_last());
+
+                    return_if_io!(cursor.rowid())
+                };
+
+                match current_max {
+                    Some(rowid) if rowid < MAX_ROWID => {
+                        // Can use sequential
+                        state.registers[*rowid_reg] = Register::Value(Value::Integer(rowid + 1));
+                        state.op_new_rowid_state = OpNewRowidState::Start;
+                        state.pc += 1;
+                        return Ok(InsnFunctionStepResult::Step);
+                    }
+                    Some(_) => {
+                        // Must use random (rowid == MAX_ROWID)
+                        state.op_new_rowid_state =
+                            OpNewRowidState::GeneratingRandom { attempts: 0 };
+                    }
+                    None => {
+                        // Empty table
+                        state.registers[*rowid_reg] = Register::Value(Value::Integer(1));
+                        state.op_new_rowid_state = OpNewRowidState::Start;
+                        state.pc += 1;
+                        return Ok(InsnFunctionStepResult::Step);
+                    }
+                }
+            }
+
+            OpNewRowidState::GeneratingRandom { attempts } => {
+                if *attempts >= MAX_ATTEMPTS {
+                    return Err(LimboError::DatabaseFull("Unable to find an unused rowid after 100 attempts - database is probably full".to_string()));
+                }
+
+                // Generate a random i64 and constrain it to the lower half of the rowid range.
+                // We use the lower half (1 to MAX_ROWID/2) because we're in random mode only
+                // when sequential allocation reached MAX_ROWID, meaning the upper range is full.
+                let mut rng = thread_rng();
+                let mut random_rowid: i64 = rng.gen();
+                random_rowid &= MAX_ROWID >> 1; // Mask to keep value in range [0, MAX_ROWID/2]
+                random_rowid += 1; // Ensure positive
+
+                state.op_new_rowid_state = OpNewRowidState::VerifyingCandidate {
+                    attempts: *attempts,
+                    candidate: random_rowid,
+                };
+            }
+
+            OpNewRowidState::VerifyingCandidate {
+                attempts,
+                candidate,
+            } => {
+                let exists = {
+                    let mut cursor = state.get_cursor(*cursor);
+                    let cursor = cursor.as_btree_mut();
+                    return_if_io!(cursor.seek(
+                        SeekKey::TableRowId(*candidate),
+                        SeekOp::GE { eq_only: true }
+                    ))
+                };
+
+                if !exists {
+                    // Found unused rowid!
+                    state.registers[*rowid_reg] = Register::Value(Value::Integer(*candidate));
+                    state.op_new_rowid_state = OpNewRowidState::Start;
+                    state.pc += 1;
+                    return Ok(InsnFunctionStepResult::Step);
+                } else {
+                    // Collision, try again
+                    state.op_new_rowid_state = OpNewRowidState::GeneratingRandom {
+                        attempts: attempts + 1,
+                    };
+                }
+            }
+        }
+    }
 }

 pub fn op_must_be_int(
--- a/core/vdbe/mod.rs
+++ b/core/vdbe/mod.rs
@@ -32,13 +32,14 @@ use crate::{
    types::{RawSlice, TextRef},
    vdbe::execute::OpIdxInsertState,
    vdbe::execute::OpInsertState,
+    vdbe::execute::OpNewRowidState,
    RefValue,
 };

 use crate::{
-    storage::{btree::BTreeCursor, pager::Pager},
+    storage::pager::Pager,
    translate::plan::ResultSetColumn,
-    types::{AggContext, Cursor, CursorResult, ImmutableRecord, Value},
+    types::{AggContext, Cursor, ImmutableRecord, Value},
    vdbe::{builder::CursorType, insn::Insn},
 };

@@ -51,7 +52,6 @@ use execute::{
    OpOpenEphemeralState,
 };

-use rand::Rng;
 use regex::Regex;
 use std::{
    cell::{Cell, RefCell},
@@ -254,6 +254,7 @@ pub struct ProgramState {
    op_idx_delete_state: Option<OpIdxDeleteState>,
    op_integrity_check_state: OpIntegrityCheckState,
    op_open_ephemeral_state: OpOpenEphemeralState,
+    op_new_rowid_state: OpNewRowidState,
    op_idx_insert_state: OpIdxInsertState,
    op_insert_state: OpInsertState,
 }
@@ -282,6 +283,7 @@ impl ProgramState {
            op_idx_delete_state: None,
            op_integrity_check_state: OpIntegrityCheckState::Start,
            op_open_ephemeral_state: OpOpenEphemeralState::Start,
+            op_new_rowid_state: OpNewRowidState::Start,
            op_idx_insert_state: OpIdxInsertState::SeekIfUnique,
            op_insert_state: OpInsertState::Insert,
        }
@@ -548,40 +550,6 @@ impl Program {
    }
 }

-fn get_new_rowid<R: Rng>(cursor: &mut BTreeCursor, mut _rng: R) -> Result<CursorResult<i64>> {
-    match cursor.seek_to_last()? {
-        CursorResult::Ok(()) => {}
-        CursorResult::IO => return Ok(CursorResult::IO),
-    }
-    let rowid = match cursor.rowid()? {
-        CursorResult::Ok(Some(rowid)) => rowid.checked_add(1).unwrap_or(i64::MAX), // add 1 but be careful with overflows, in case of overflow - use i64::MAX
-        CursorResult::Ok(None) => 1,
-        CursorResult::IO => return Ok(CursorResult::IO),
-    };
-    // NOTE(nilskch): I commented this part out because this condition will never be true.
-    // if rowid > i64::MAX {
-    //     let distribution = Uniform::from(1..=i64::MAX);
-    //     let max_attempts = 100;
-    //     for count in 0..max_attempts {
-    //         rowid = distribution.sample(&mut rng);
-    //         match cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })? {
-    //             CursorResult::Ok(false) => break, // Found a non-existing rowid
-    //             CursorResult::Ok(true) => {
-    //                 if count == max_attempts - 1 {
-    //                     return Err(LimboError::InternalError(
-    //                         "Failed to generate a new rowid".to_string(),
-    //                     ));
-    //                 } else {
-    //                     continue; // Try next random rowid
-    //                 }
-    //             }
-    //             CursorResult::IO => return Ok(CursorResult::IO),
-    //         }
-    //     }
-    // }
-    Ok(CursorResult::Ok(rowid))
-}
-
 fn make_record(registers: &[Register], start_reg: &usize, count: &usize) -> ImmutableRecord {
    let regs = &registers[*start_reg..*start_reg + *count];
    ImmutableRecord::from_registers(regs, regs.len())
--- a/testing/insert.test
+++ b/testing/insert.test
@@ -386,3 +386,10 @@ do_execsql_test_on_specific_db {:memory:} not-null-rowid-alias {
    select * from t;
 } {1|2}

+do_execsql_test_on_specific_db {:memory:} rowid-overflow-random-generation {
+    CREATE TABLE q(x INTEGER PRIMARY KEY, y);
+    INSERT INTO q VALUES (9223372036854775807, 1);
+    INSERT INTO q(y) VALUES (2);
+    INSERT INTO q(y) VALUES (3);
+    SELECT COUNT(*) FROM q;
+} {3}