Merge 'Fix: OP_NewRowId to generate semi random rowid when largest rowid is i64::MAX' from Krishna Vishal

- `OP_NewRowId` now generates new rowid semi randomly when the largest
rowid in the table is `i64::MAX`.
- Introduced new `LimboError` variant `DatabaseFull` to signify that
database might be full (SQLite behaves this way returning
`SQLITE_FULL`).
Now:
```SQL
turso> CREATE TABLE q(x INTEGER PRIMARY KEY, y);
turso> INSERT INTO q VALUES (9223372036854775807, 1);
turso> INSERT INTO q(y) VALUES (2);
turso> INSERT INTO q(y) VALUES (3);
turso> SELECT * FROM q;
┌─────────────────────┬───┐
│ x                   │ y │
├─────────────────────┼───┤
│ 1841427626667347484 │ 2 │
├─────────────────────┼───┤
│ 4000338366725695791 │ 3 │
├─────────────────────┼───┤
│ 9223372036854775807 │ 1 │
└─────────────────────┴───┘
```
Fixes: https://github.com/tursodatabase/turso/issues/1977

Reviewed-by: Jussi Saurio <jussi.saurio@gmail.com>

Closes #1985
This commit is contained in:
Pekka Enberg
2025-07-14 11:56:09 +03:00
4 changed files with 125 additions and 48 deletions

View File

@@ -10,6 +10,8 @@ pub enum LimboError {
InternalError(String),
#[error("Page cache is full")]
CacheFull,
#[error("Database is full: {0}")]
DatabaseFull(String),
#[error("Parse error: {0}")]
ParseError(String),
#[error(transparent)]

View File

@@ -65,7 +65,7 @@ use super::{
};
use fallible_iterator::FallibleIterator;
use parking_lot::RwLock;
use rand::thread_rng;
use rand::{thread_rng, Rng};
use turso_sqlite3_parser::ast;
use turso_sqlite3_parser::ast::fmt::ToTokens;
use turso_sqlite3_parser::lexer::sql::Parser;
@@ -88,7 +88,7 @@ use crate::{
json::jsonb_patch, json::jsonb_remove, json::jsonb_replace, json::jsonb_set,
};
use super::{get_new_rowid, make_record, Program, ProgramState, Register};
use super::{make_record, Program, ProgramState, Register};
use crate::{
bail_constraint_error, must_be_btree_cursor, resolve_ext_path, MvStore, Pager, Result,
};
@@ -4988,6 +4988,15 @@ pub fn op_idx_insert(
}
}
#[derive(Debug)]
pub enum OpNewRowidState {
Start,
SeekingToLast,
ReadingMaxRowid,
GeneratingRandom { attempts: u32 },
VerifyingCandidate { attempts: u32, candidate: i64 },
}
pub fn op_new_rowid(
program: &Program,
state: &mut ProgramState,
@@ -5001,15 +5010,106 @@ pub fn op_new_rowid(
else {
unreachable!("unexpected Insn {:?}", insn)
};
let rowid = {
let mut cursor = state.get_cursor(*cursor);
let cursor = cursor.as_btree_mut();
// TODO: make io handle rng
return_if_io!(get_new_rowid(cursor, thread_rng()))
};
state.registers[*rowid_reg] = Register::Value(Value::Integer(rowid));
state.pc += 1;
Ok(InsnFunctionStepResult::Step)
const MAX_ROWID: i64 = i64::MAX;
const MAX_ATTEMPTS: u32 = 100;
loop {
match &state.op_new_rowid_state {
OpNewRowidState::Start => {
state.op_new_rowid_state = OpNewRowidState::SeekingToLast;
}
OpNewRowidState::SeekingToLast => {
{
let mut cursor = state.get_cursor(*cursor);
let cursor = cursor.as_btree_mut();
return_if_io!(cursor.seek_to_last());
}
state.op_new_rowid_state = OpNewRowidState::ReadingMaxRowid;
}
OpNewRowidState::ReadingMaxRowid => {
let current_max = {
let mut cursor = state.get_cursor(*cursor);
let cursor = cursor.as_btree_mut();
// Move to last record
return_if_io!(cursor.seek_to_last());
return_if_io!(cursor.rowid())
};
match current_max {
Some(rowid) if rowid < MAX_ROWID => {
// Can use sequential
state.registers[*rowid_reg] = Register::Value(Value::Integer(rowid + 1));
state.op_new_rowid_state = OpNewRowidState::Start;
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
}
Some(_) => {
// Must use random (rowid == MAX_ROWID)
state.op_new_rowid_state =
OpNewRowidState::GeneratingRandom { attempts: 0 };
}
None => {
// Empty table
state.registers[*rowid_reg] = Register::Value(Value::Integer(1));
state.op_new_rowid_state = OpNewRowidState::Start;
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
}
}
}
OpNewRowidState::GeneratingRandom { attempts } => {
if *attempts >= MAX_ATTEMPTS {
return Err(LimboError::DatabaseFull("Unable to find an unused rowid after 100 attempts - database is probably full".to_string()));
}
// Generate a random i64 and constrain it to the lower half of the rowid range.
// We use the lower half (1 to MAX_ROWID/2) because we're in random mode only
// when sequential allocation reached MAX_ROWID, meaning the upper range is full.
let mut rng = thread_rng();
let mut random_rowid: i64 = rng.gen();
random_rowid &= MAX_ROWID >> 1; // Mask to keep value in range [0, MAX_ROWID/2]
random_rowid += 1; // Ensure positive
state.op_new_rowid_state = OpNewRowidState::VerifyingCandidate {
attempts: *attempts,
candidate: random_rowid,
};
}
OpNewRowidState::VerifyingCandidate {
attempts,
candidate,
} => {
let exists = {
let mut cursor = state.get_cursor(*cursor);
let cursor = cursor.as_btree_mut();
return_if_io!(cursor.seek(
SeekKey::TableRowId(*candidate),
SeekOp::GE { eq_only: true }
))
};
if !exists {
// Found unused rowid!
state.registers[*rowid_reg] = Register::Value(Value::Integer(*candidate));
state.op_new_rowid_state = OpNewRowidState::Start;
state.pc += 1;
return Ok(InsnFunctionStepResult::Step);
} else {
// Collision, try again
state.op_new_rowid_state = OpNewRowidState::GeneratingRandom {
attempts: attempts + 1,
};
}
}
}
}
}
pub fn op_must_be_int(

View File

@@ -32,13 +32,14 @@ use crate::{
types::{RawSlice, TextRef},
vdbe::execute::OpIdxInsertState,
vdbe::execute::OpInsertState,
vdbe::execute::OpNewRowidState,
RefValue,
};
use crate::{
storage::{btree::BTreeCursor, pager::Pager},
storage::pager::Pager,
translate::plan::ResultSetColumn,
types::{AggContext, Cursor, CursorResult, ImmutableRecord, Value},
types::{AggContext, Cursor, ImmutableRecord, Value},
vdbe::{builder::CursorType, insn::Insn},
};
@@ -51,7 +52,6 @@ use execute::{
OpOpenEphemeralState,
};
use rand::Rng;
use regex::Regex;
use std::{
cell::{Cell, RefCell},
@@ -254,6 +254,7 @@ pub struct ProgramState {
op_idx_delete_state: Option<OpIdxDeleteState>,
op_integrity_check_state: OpIntegrityCheckState,
op_open_ephemeral_state: OpOpenEphemeralState,
op_new_rowid_state: OpNewRowidState,
op_idx_insert_state: OpIdxInsertState,
op_insert_state: OpInsertState,
}
@@ -282,6 +283,7 @@ impl ProgramState {
op_idx_delete_state: None,
op_integrity_check_state: OpIntegrityCheckState::Start,
op_open_ephemeral_state: OpOpenEphemeralState::Start,
op_new_rowid_state: OpNewRowidState::Start,
op_idx_insert_state: OpIdxInsertState::SeekIfUnique,
op_insert_state: OpInsertState::Insert,
}
@@ -548,40 +550,6 @@ impl Program {
}
}
fn get_new_rowid<R: Rng>(cursor: &mut BTreeCursor, mut _rng: R) -> Result<CursorResult<i64>> {
match cursor.seek_to_last()? {
CursorResult::Ok(()) => {}
CursorResult::IO => return Ok(CursorResult::IO),
}
let rowid = match cursor.rowid()? {
CursorResult::Ok(Some(rowid)) => rowid.checked_add(1).unwrap_or(i64::MAX), // add 1 but be careful with overflows, in case of overflow - use i64::MAX
CursorResult::Ok(None) => 1,
CursorResult::IO => return Ok(CursorResult::IO),
};
// NOTE(nilskch): I commented this part out because this condition will never be true.
// if rowid > i64::MAX {
// let distribution = Uniform::from(1..=i64::MAX);
// let max_attempts = 100;
// for count in 0..max_attempts {
// rowid = distribution.sample(&mut rng);
// match cursor.seek(SeekKey::TableRowId(rowid), SeekOp::GE { eq_only: true })? {
// CursorResult::Ok(false) => break, // Found a non-existing rowid
// CursorResult::Ok(true) => {
// if count == max_attempts - 1 {
// return Err(LimboError::InternalError(
// "Failed to generate a new rowid".to_string(),
// ));
// } else {
// continue; // Try next random rowid
// }
// }
// CursorResult::IO => return Ok(CursorResult::IO),
// }
// }
// }
Ok(CursorResult::Ok(rowid))
}
fn make_record(registers: &[Register], start_reg: &usize, count: &usize) -> ImmutableRecord {
let regs = &registers[*start_reg..*start_reg + *count];
ImmutableRecord::from_registers(regs, regs.len())

View File

@@ -386,3 +386,10 @@ do_execsql_test_on_specific_db {:memory:} not-null-rowid-alias {
select * from t;
} {1|2}
do_execsql_test_on_specific_db {:memory:} rowid-overflow-random-generation {
CREATE TABLE q(x INTEGER PRIMARY KEY, y);
INSERT INTO q VALUES (9223372036854775807, 1);
INSERT INTO q(y) VALUES (2);
INSERT INTO q(y) VALUES (3);
SELECT COUNT(*) FROM q;
} {3}