From dc3d1fa36dd97589cda3fab7074e5ab3500448d0 Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sat, 27 Sep 2025 17:03:18 +0530 Subject: [PATCH 1/5] Use the SQLite header as associated data for protection against tampering and corruption. Previously, we did not use the first 100 bytes in encryption machinery. This patch changes that and uses that data as associated data. So in case the header is corrupted or tampered with, the decryption will fail. --- core/storage/encryption.rs | 397 ++++++++++++++++++++++++++++++++++--- 1 file changed, 368 insertions(+), 29 deletions(-) diff --git a/core/storage/encryption.rs b/core/storage/encryption.rs index c43a6d660..62f1e4f04 100644 --- a/core/storage/encryption.rs +++ b/core/storage/encryption.rs @@ -1,5 +1,5 @@ #![allow(unused_variables, dead_code)] -use crate::{LimboError, Result}; +use crate::{turso_assert, LimboError, Result}; use aegis::aegis128l::Aegis128L; use aegis::aegis128x2::Aegis128X2; use aegis::aegis128x4::Aegis128X4; @@ -12,6 +12,12 @@ use aes_gcm::{ }; use turso_macros::match_ignore_ascii_case; +/// constants used for the Turso page header in the encrypted dbs. +const TURSO_HEADER_PREFIX: &[u8] = b"Turso"; +const TURSO_VERSION: u8 = 0x00; +const TURSO_HEADER_SIZE: usize = 16; +const SQLITE_HEADER: &[u8] = b"SQLite format 3\0"; + #[derive(Clone)] pub enum EncryptionKey { Key128([u8; 16]), @@ -189,9 +195,12 @@ macro_rules! define_aes_gcm_cipher { }) } - fn encrypt(&self, plaintext: &[u8], _ad: &[u8]) -> Result<(Vec, [u8; 12])> { + fn encrypt(&self, plaintext: &[u8], ad: &[u8]) -> Result<(Vec, [u8; 12])> { let nonce = <$cipher_type>::generate_nonce(&mut OsRng); - let ciphertext = self.cipher.encrypt(&nonce, plaintext).map_err(|e| { + let ciphertext = self.cipher.encrypt(&nonce, aes_gcm::aead::Payload { + msg: plaintext, + aad: ad, + }).map_err(|e| { LimboError::InternalError(format!("{} encryption failed: {e:?}", $name)) })?; let mut nonce_array = [0u8; 12]; @@ -199,10 +208,13 @@ macro_rules! define_aes_gcm_cipher { Ok((ciphertext, nonce_array)) } - fn decrypt(&self, ciphertext: &[u8], nonce: &[u8; 12], _ad: &[u8]) -> Result> { + fn decrypt(&self, ciphertext: &[u8], nonce: &[u8; 12], ad: &[u8]) -> Result> { let nonce = Nonce::from_slice(nonce); self.cipher - .decrypt(nonce, ciphertext) + .decrypt(nonce, aes_gcm::aead::Payload { + msg: ciphertext, + aad: ad, + }) .map_err(|_| -> LimboError { CipherError::DecryptionFailed { cipher: $name }.into() }) } } @@ -349,6 +361,37 @@ impl CipherMode { pub fn metadata_size(&self) -> usize { self.nonce_size() + self.tag_size() } + + /// Returns the cipher identifier byte for Turso header + pub fn cipher_id(&self) -> u8 { + match self { + CipherMode::Aes128Gcm => 1, + CipherMode::Aes256Gcm => 2, + CipherMode::Aegis256 => 3, + CipherMode::Aegis256X2 => 4, + CipherMode::Aegis256X4 => 5, + CipherMode::Aegis128L => 6, + CipherMode::Aegis128X2 => 7, + CipherMode::Aegis128X4 => 8, + } + } + + /// Creates a CipherMode from cipher identifier byte. This is used when read from Turso header. + pub fn from_cipher_id(id: u8) -> Result { + match id { + 1 => Ok(CipherMode::Aes128Gcm), + 2 => Ok(CipherMode::Aes256Gcm), + 3 => Ok(CipherMode::Aegis256), + 4 => Ok(CipherMode::Aegis256X2), + 5 => Ok(CipherMode::Aegis256X4), + 6 => Ok(CipherMode::Aegis128L), + 7 => Ok(CipherMode::Aegis128X2), + 8 => Ok(CipherMode::Aegis128X4), + _ => Err(LimboError::InvalidArgument(format!( + "Unknown cipher ID: {id}" + ))), + } + } } #[derive(Clone)] @@ -423,9 +466,66 @@ impl EncryptionContext { self.cipher_mode.metadata_size() as u8 } + /// Creates Turso header for encrypted page 1 + fn create_turso_header(&self) -> [u8; TURSO_HEADER_SIZE] { + let mut header = [0u8; TURSO_HEADER_SIZE]; + + // "Turso" prefix (5 bytes) + header[..TURSO_HEADER_PREFIX.len()].copy_from_slice(TURSO_HEADER_PREFIX); + + // version byte (1 byte) + header[5] = TURSO_VERSION; + + // cipher identifier (1 byte) + header[6] = self.cipher_mode.cipher_id(); + + // remaining unused 9 bytes + header + } + + /// Validates and extracts cipher mode from Turso header + fn validate_turso_header(&self, header: &[u8]) -> Result<()> { + if header.len() < TURSO_HEADER_SIZE { + return Err(LimboError::InternalError( + "Header too short for encrypted Turso db".into(), + )); + } + + if &header[..TURSO_HEADER_PREFIX.len()] != TURSO_HEADER_PREFIX { + return Err(LimboError::InternalError( + "Invalid Turso header: prefix mismatch".into(), + )); + } + + let version = header[5]; + if version != TURSO_VERSION { + return Err(LimboError::InternalError(format!( + "Unsupported Turso header version: expected {}, got {}", + TURSO_VERSION, version + ))); + } + + let cipher_id = header[6]; + let header_cipher = CipherMode::from_cipher_id(cipher_id)?; + if header_cipher != self.cipher_mode { + return Err(LimboError::InternalError(format!( + "Cipher mode mismatch: expected {:?} (ID {}), got {:?} (ID {})", + self.cipher_mode, + self.cipher_mode.cipher_id(), + header_cipher, + cipher_id + ))); + } + + Ok(()) + } + #[cfg(feature = "encryption")] pub fn encrypt_page(&self, page: &[u8], page_id: usize) -> Result> { use crate::storage::sqlite3_ondisk::DatabaseHeader; + if page_id == DatabaseHeader::PAGE_ID { + return self.encrypt_page_1(page); + } tracing::debug!("encrypting page {}", page_id); assert_eq!( page.len(), @@ -434,19 +534,12 @@ impl EncryptionContext { self.page_size ); - let encryption_start_offset = match page_id { - DatabaseHeader::PAGE_ID => DatabaseHeader::SIZE, - _ => 0, - }; let metadata_size = self.cipher_mode.metadata_size(); let reserved_bytes = &page[self.page_size - metadata_size..]; #[cfg(debug_assertions)] { use crate::turso_assert; - // In debug builds, ensure that the reserved bytes are zeroed out. So even when we are - // reusing a page from buffer pool, we zero out in debug build so that we can be - // sure that b tree layer is not writing any data into the reserved space. let reserved_bytes_zeroed = reserved_bytes.iter().all(|&b| b == 0); turso_assert!( reserved_bytes_zeroed, @@ -454,20 +547,18 @@ impl EncryptionContext { ); } - let payload = &page[encryption_start_offset..self.page_size - metadata_size]; + let payload = &page[..self.page_size - metadata_size]; let (encrypted, nonce) = self.encrypt_raw(payload)?; let nonce_size = self.cipher_mode.nonce_size(); assert_eq!( encrypted.len(), - self.page_size - nonce_size - encryption_start_offset, + self.page_size - nonce_size, "Encrypted page must be exactly {} bytes", - self.page_size - nonce_size - encryption_start_offset + self.page_size - nonce_size ); let mut result = Vec::with_capacity(self.page_size); - - result.extend_from_slice(&page[..encryption_start_offset]); result.extend_from_slice(&encrypted); result.extend_from_slice(&nonce); assert_eq!( @@ -482,6 +573,9 @@ impl EncryptionContext { #[cfg(feature = "encryption")] pub fn decrypt_page(&self, encrypted_page: &[u8], page_id: usize) -> Result> { use crate::storage::sqlite3_ondisk::DatabaseHeader; + if page_id == DatabaseHeader::PAGE_ID { + return self.decrypt_page_1(encrypted_page); + } tracing::debug!("decrypting page {}", page_id); assert_eq!( encrypted_page.len(), @@ -489,32 +583,143 @@ impl EncryptionContext { "Encrypted page data must be exactly {} bytes", self.page_size ); - // for page 1, the encrypted page starts after the database header - // for other pages, the encrypted page starts at the beginning - let encrypted_page_offset = match page_id { - DatabaseHeader::PAGE_ID => DatabaseHeader::SIZE, - _ => 0, - }; let nonce_size = self.cipher_mode.nonce_size(); let nonce_offset = encrypted_page.len() - nonce_size; - let payload = &encrypted_page[encrypted_page_offset..nonce_offset]; + let payload = &encrypted_page[..nonce_offset]; let nonce = &encrypted_page[nonce_offset..]; let decrypted_data = self.decrypt_raw(payload, nonce)?; + let metadata_size = self.cipher_mode.metadata_size(); + assert_eq!( + decrypted_data.len(), + self.page_size - metadata_size, + "Decrypted page data must be exactly {} bytes", + self.page_size - metadata_size + ); + + let mut result = Vec::with_capacity(self.page_size); + result.extend_from_slice(&decrypted_data); + result.resize(self.page_size, 0); + + assert_eq!( + result.len(), + self.page_size, + "Decrypted page data must be exactly {} bytes", + self.page_size + ); + Ok(result) + } + + #[cfg(feature = "encryption")] + fn encrypt_page_1(&self, page: &[u8]) -> Result> { + use crate::storage::sqlite3_ondisk::DatabaseHeader; + + tracing::debug!("encrypting page 1"); + assert_eq!( + page.len(), + self.page_size, + "Page data must be exactly {} bytes", + self.page_size + ); + + // since this is page 1, this must have header + turso_assert!( + &page[..SQLITE_HEADER.len()] == SQLITE_HEADER, + "Page 1 must start with SQLite header" + ); + + let metadata_size = self.cipher_mode.metadata_size(); + let reserved_bytes = &page[self.page_size - metadata_size..]; + + #[cfg(debug_assertions)] + { + use crate::turso_assert; + let reserved_bytes_zeroed = reserved_bytes.iter().all(|&b| b == 0); + turso_assert!( + reserved_bytes_zeroed, + "last reserved bytes must be empty/zero, but found non-zero bytes" + ); + } + + // page 1 encryption: + // 1. First 16 bytes are replaced with Turso magic bytes + // 2. Next 84 bytes (16-100) are kept as-is (not encrypted) + // 3. Remaining bytes (100-end) are encrypted + // 4. The header (the first 100 bytes) as associated data + let turso_header = self.create_turso_header(); + let mut new_header = Vec::with_capacity(DatabaseHeader::SIZE); + new_header.extend_from_slice(&turso_header); + new_header.extend_from_slice(&page[TURSO_HEADER_SIZE..DatabaseHeader::SIZE]); + + let payload = &page[DatabaseHeader::SIZE..self.page_size - metadata_size]; + let (encrypted, nonce) = self.encrypt_raw_with_ad(payload, &new_header)?; + + let nonce_size = self.cipher_mode.nonce_size(); + assert_eq!( + encrypted.len(), + self.page_size - nonce_size - DatabaseHeader::SIZE, + "Encrypted page must be exactly {} bytes", + self.page_size - nonce_size - DatabaseHeader::SIZE + ); + + let mut result = Vec::with_capacity(self.page_size); + + // 1. copy the header + result.extend_from_slice(&new_header); + // 2. copy the encrypted payload + result.extend_from_slice(&encrypted); + // 3. now add the nonce + result.extend_from_slice(&nonce); + + assert_eq!( + result.len(), + self.page_size, + "Encrypted page must be exactly {} bytes", + self.page_size + ); + Ok(result) + } + + #[cfg(feature = "encryption")] + fn decrypt_page_1(&self, encrypted_page: &[u8]) -> Result> { + use crate::storage::sqlite3_ondisk::DatabaseHeader; + + tracing::debug!("decrypting page 1"); + assert_eq!( + encrypted_page.len(), + self.page_size, + "Encrypted page data must be exactly {} bytes", + self.page_size + ); + + self.validate_turso_header(&encrypted_page[..TURSO_HEADER_SIZE])?; + + let nonce_size = self.cipher_mode.nonce_size(); + let nonce_offset = encrypted_page.len() - nonce_size; + let payload = &encrypted_page[DatabaseHeader::SIZE..nonce_offset]; + let nonce = &encrypted_page[nonce_offset..]; + + // it's important to use the header on disk (with Turso magic bytes) as associated data + // for protection against tampering the header + let header = &encrypted_page[..DatabaseHeader::SIZE]; + let decrypted_data = self.decrypt_raw_with_ad(payload, nonce, header)?; let metadata_size = self.cipher_mode.metadata_size(); assert_eq!( decrypted_data.len(), - self.page_size - metadata_size - encrypted_page_offset, + self.page_size - metadata_size - DatabaseHeader::SIZE, "Decrypted page data must be exactly {} bytes", - self.page_size - metadata_size - encrypted_page_offset + self.page_size - metadata_size - DatabaseHeader::SIZE ); + // reconstruct the page with the appropriate SQLite header let mut result = Vec::with_capacity(self.page_size); - result.extend_from_slice(&encrypted_page[..encrypted_page_offset]); + result.extend_from_slice(SQLITE_HEADER); + result.extend_from_slice(&encrypted_page[TURSO_HEADER_SIZE..DatabaseHeader::SIZE]); result.extend_from_slice(&decrypted_data); result.resize(self.page_size, 0); + assert_eq!( result.len(), self.page_size, @@ -527,10 +732,14 @@ impl EncryptionContext { /// encrypts raw data using the configured cipher, returns ciphertext and nonce fn encrypt_raw(&self, plaintext: &[u8]) -> Result<(Vec, Vec)> { const AD: &[u8] = b""; + self.encrypt_raw_with_ad(plaintext, AD) + } + /// encrypts raw data with associated data using the configured cipher + fn encrypt_raw_with_ad(&self, plaintext: &[u8], ad: &[u8]) -> Result<(Vec, Vec)> { macro_rules! encrypt_cipher { ($cipher:expr) => {{ - let (ciphertext, nonce) = $cipher.encrypt(plaintext, AD)?; + let (ciphertext, nonce) = $cipher.encrypt(plaintext, ad)?; Ok((ciphertext, nonce.to_vec())) }}; } @@ -549,7 +758,10 @@ impl EncryptionContext { fn decrypt_raw(&self, ciphertext: &[u8], nonce: &[u8]) -> Result> { const AD: &[u8] = b""; + self.decrypt_raw_with_ad(ciphertext, nonce, AD) + } + fn decrypt_raw_with_ad(&self, ciphertext: &[u8], nonce: &[u8], ad: &[u8]) -> Result> { macro_rules! decrypt_with_nonce { ($cipher:expr, $nonce_size:literal, $name:literal) => {{ let nonce_array: [u8; $nonce_size] = nonce.try_into().map_err(|_| { @@ -560,7 +772,7 @@ impl EncryptionContext { nonce.len() )) })?; - $cipher.decrypt(ciphertext, &nonce_array, AD) + $cipher.decrypt(ciphertext, &nonce_array, ad) }}; } @@ -715,6 +927,17 @@ mod tests { hex::encode(bytes) } + fn create_test_page_1() -> Vec { + let mut page = vec![0u8; DEFAULT_ENCRYPTED_PAGE_SIZE]; + page[..SQLITE_HEADER.len()].copy_from_slice(SQLITE_HEADER); + let mut rng = rand::thread_rng(); + // 48 is the max reserved bytes we might need for metadata with any cipher + for i in SQLITE_HEADER.len()..DEFAULT_ENCRYPTED_PAGE_SIZE - 48 { + page[i] = rng.gen(); + } + page + } + test_aes_cipher_wrapper!( test_aes128gcm_cipher_wrapper, Aes128GcmCipher, @@ -731,6 +954,122 @@ mod tests { "Hello, AES-128-GCM!" ); + #[test] + fn test_page_1_encrypt_decrypt_round_trip_with_ad() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + assert_eq!(encrypted.len(), DEFAULT_ENCRYPTED_PAGE_SIZE); + + // check that header is readable directly from disk (not encrypted) + assert_eq!(&encrypted[..5], b"Turso"); + assert_eq!(encrypted[5], TURSO_VERSION); + assert_eq!(encrypted[6], CipherMode::Aegis256.cipher_id()); + + // header should be unencrypted, but data after DatabaseHeader::SIZE should be different + assert_eq!(&encrypted[16..100], &page_data[16..100]); // header portion + assert_ne!(&encrypted[100..200], &page_data[100..200]); // some encrypted portion + + // decrypt page 1 + let decrypted = ctx.decrypt_page(&encrypted, 1).unwrap(); + assert_eq!(decrypted.len(), DEFAULT_ENCRYPTED_PAGE_SIZE); + + // check that SQLite header was restored + assert_eq!(&decrypted[..SQLITE_HEADER.len()], SQLITE_HEADER); + assert_eq!(decrypted, page_data); + } + + #[test] + fn test_turso_header_validation() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + // test cipher_id conversion + assert_eq!(CipherMode::Aes128Gcm.cipher_id(), 1); + assert_eq!(CipherMode::Aes256Gcm.cipher_id(), 2); + assert_eq!(CipherMode::Aegis256.cipher_id(), 3); + assert_eq!(CipherMode::Aegis128L.cipher_id(), 6); + + // test from_cipher_id conversion + assert_eq!( + CipherMode::from_cipher_id(1).unwrap(), + CipherMode::Aes128Gcm + ); + assert_eq!(CipherMode::from_cipher_id(3).unwrap(), CipherMode::Aegis256); + assert!(CipherMode::from_cipher_id(99).is_err()); + + // test header creation + let header = ctx.create_turso_header(); + assert_eq!(&header[..5], b"Turso"); + assert_eq!(header[5], TURSO_VERSION); + assert_eq!(header[6], 3); // AEGIS-256 + assert_eq!(&header[7..], &[0u8; 9]); // unused bytes are zero + } + + #[test] + fn test_invalid_turso_header_fails_decrypt() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + // corrupt the header prefix + let mut corrupted = encrypted.clone(); + corrupted[0] = b'V'; // make `Turso` to `Vurso` + assert!(ctx.decrypt_page(&corrupted, 1).is_err()); + + // test with wrong cipher ID + let mut wrong_cipher = encrypted.clone(); + wrong_cipher[6] = 99; // invalid cipher ID + assert!(ctx.decrypt_page(&wrong_cipher, 1).is_err()); + } + + #[test] + fn test_associated_data_validation() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + // modify a byte in the preserved header portion (bytes 16-100) + let mut corrupted_ad = encrypted.clone(); + corrupted_ad[50] ^= 1; // flip one bit in the associated data portion + + // this should fail decryption because associated data doesn't match + let decrypt_result = ctx.decrypt_page(&corrupted_ad, 1); + assert!( + decrypt_result.is_err(), + "Decryption should fail with corrupted associated data" + ); + } + + #[test] + fn test_turso_header_corruption_detection() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + let mut corrupted_turso_header = encrypted.clone(); + corrupted_turso_header[7] ^= 1; + + let decrypt_result = ctx.decrypt_page(&corrupted_turso_header, 1); + assert!( + decrypt_result.is_err(), + "Decryption should fail with corrupted Turso header" + ); + } + #[test] fn test_aes128gcm_encrypt_decrypt_round_trip() { let mut rng = rand::thread_rng(); From a7237b80eae240be1f0aa51a451aa57b0400876e Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sat, 27 Sep 2025 17:49:42 +0530 Subject: [PATCH 2/5] add tests for checking encryption tampering --- .../query_processing/encryption.rs | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/tests/integration/query_processing/encryption.rs b/tests/integration/query_processing/encryption.rs index 2682d1104..f61c6f0bd 100644 --- a/tests/integration/query_processing/encryption.rs +++ b/tests/integration/query_processing/encryption.rs @@ -196,3 +196,270 @@ fn test_non_4k_page_size_encryption() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn test_corruption_turso_magic_bytes() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let db_name = format!("test-corruption-magic-{}.db", rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query( + &tmp_db, + &conn, + "PRAGMA hexkey = 'b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327';", + )?; + run_query(&tmp_db, &conn, "PRAGMA cipher = 'aegis256';")?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + run_query( + &tmp_db, + &conn, + "INSERT INTO test (value) VALUES ('Test corruption')", + )?; + run_query(&tmp_db, &conn, "PRAGMA wal_checkpoint(TRUNCATE);")?; + do_flush(&conn, &tmp_db)?; + } + + // corrupt the Turso magic bytes by changing "Turso" to "Vurso" (the db name as it was intended) + { + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom, Write}; + + let mut file = OpenOptions::new().write(true).open(&db_path)?; + + file.seek(SeekFrom::Start(0))?; + file.write_all(b"V")?; + } + + // try to connect to the corrupted database - this should fail + { + let uri = format!( + "file:{}?cipher=aegis256&hexkey=b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + db_path.to_str().unwrap() + ); + + let should_panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let (_io, conn) = + turso_core::Connection::from_uri(&uri, true, false, false, false).unwrap(); + run_query_on_row(&tmp_db, &conn, "SELECT * FROM test", |_row: &Row| {}).unwrap(); + })); + + assert!( + should_panic.is_err(), + "should panic when accessing encrypted DB with corrupted Turso magic bytes" + ); + } + + Ok(()) +} + +#[test] +fn test_corruption_associated_data_bytes() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let db_name = format!("test-corruption-ad-{}.db", rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query( + &tmp_db, + &conn, + "PRAGMA hexkey = 'b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327';", + )?; + run_query(&tmp_db, &conn, "PRAGMA cipher = 'aegis256';")?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + run_query( + &tmp_db, + &conn, + "INSERT INTO test (value) VALUES ('Test AD corruption')", + )?; + run_query(&tmp_db, &conn, "PRAGMA wal_checkpoint(TRUNCATE);")?; + do_flush(&conn, &tmp_db)?; + } + + // test corruption at different positions in the header (the first 100 bytes) + let corruption_positions = [3, 7, 16, 30, 50, 70, 99]; + + for &corrupt_pos in &corruption_positions { + let test_db_name = format!( + "test-corruption-ad-pos-{}-{}.db", + corrupt_pos, + rng().next_u32() + ); + let test_tmp_db = TempDatabase::new(&test_db_name, false); + let test_db_path = test_tmp_db.path.clone(); + std::fs::copy(&db_path, &test_db_path)?; + + // corrupt one byte + { + use std::fs::OpenOptions; + use std::io::{Read, Seek, SeekFrom, Write}; + + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(&test_db_path)?; + + file.seek(SeekFrom::Start(corrupt_pos as u64))?; + let mut original_byte = [0u8; 1]; + file.read_exact(&mut original_byte)?; + + // corrupt it by flipping all bits + let corrupted_byte = [!original_byte[0]]; + + file.seek(SeekFrom::Start(corrupt_pos as u64))?; + file.write_all(&corrupted_byte)?; + } + + // this should fail + { + let uri = format!( + "file:{}?cipher=aegis256&hexkey=b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + test_db_path.to_str().unwrap() + ); + + let should_panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let (_io, conn) = + turso_core::Connection::from_uri(&uri, true, false, false, false).unwrap(); + run_query_on_row(&test_tmp_db, &conn, "SELECT * FROM test", |_row: &Row| {}) + .unwrap(); + })); + + assert!( + should_panic.is_err(), + "should panic when accessing encrypted DB with corrupted associated data at position {}", + corrupt_pos + ); + } + } + + Ok(()) +} + +#[test] +fn test_turso_header_structure() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + let verify_header = + |db_path: &str, expected_cipher_id: u8, description: &str| -> anyhow::Result<()> { + use std::fs::File; + use std::io::{Read, Seek, SeekFrom}; + + let mut file = File::open(db_path)?; + let mut header = [0u8; 16]; + file.seek(SeekFrom::Start(0))?; + file.read_exact(&mut header)?; + + assert_eq!( + &header[0..5], + b"Turso", + "Magic bytes should be 'Turso' for {}", + description + ); + assert_eq!( + header[5], 0x00, + "Version should be 0x00 for {}", + description + ); + assert_eq!( + header[6], expected_cipher_id, + "Cipher ID should be {} for {}", + expected_cipher_id, description + ); + + // the unused bytes should be zeroed + for (i, &byte) in header[7..16].iter().enumerate() { + assert_eq!( + byte, + 0, + "Unused byte at position {} should be 0 for {}", + i + 7, + description + ); + } + + println!("Verified {} header: cipher ID = {}", description, header[6]); + Ok(()) + }; + + let test_cases = [ + ( + "aes128gcm", + 1, + "AES-128-GCM", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aes256gcm", + 2, + "AES-256-GCM", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis256", + 3, + "AEGIS-256", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis256x2", + 4, + "AEGIS-256X2", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis128l", + 6, + "AEGIS-128L", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aegis128x2", + 7, + "AEGIS-128X2", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aegis128x4", + 8, + "AEGIS-128X4", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ]; + + for (cipher_name, expected_id, description, hexkey) in test_cases { + let db_name = format!("test-header-{}-{}.db", cipher_name, rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query(&tmp_db, &conn, &format!("PRAGMA hexkey = '{}';", hexkey))?; + run_query( + &tmp_db, + &conn, + &format!("PRAGMA cipher = '{}';", cipher_name), + )?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + do_flush(&conn, &tmp_db)?; + } + + verify_header(&db_path.to_str().unwrap(), expected_id, description)?; + } + Ok(()) +} From a2df313ad54f206444bcee62928d130ddbbfeff1 Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sat, 27 Sep 2025 18:11:27 +0530 Subject: [PATCH 3/5] Add documentation for the encryption module --- core/storage/encryption.rs | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/core/storage/encryption.rs b/core/storage/encryption.rs index 62f1e4f04..125744344 100644 --- a/core/storage/encryption.rs +++ b/core/storage/encryption.rs @@ -12,6 +12,58 @@ use aes_gcm::{ }; use turso_macros::match_ignore_ascii_case; +/// Encryption Scheme +/// We support two major algorithms: AEGIS, AES GCM. These algorithms picked so that they also do +/// verification of the ciphertext, so we don't need to implement. That is if the page is corrupted +/// (or tampered), then we will know if we got garbage bytes post decryption. +/// +/// We perform encryption at the page level, i.e., each page is encrypted and decrypted individually. +/// We store the nonce and tag (or the verification bits) in the page itself. We also generate a +/// random nonce every time we encrypt a page. +/// +/// Example: Assume the page size is 4096 bytes and we use AEGIS 256. So we reserve the last 48 bytes +/// for the nonce (32 bytes) and tag (16 bytes). +/// +/// ```ignore +/// Unencrypted Page Encrypted Page +/// ┌───────────────┐ ┌───────────────┐ +/// │ │ │ │ +/// │ Page Content │ │ Encrypted │ +/// │ (4068 bytes) │ ────────► │ Content │ +/// │ │ │ (4068 bytes) │ +/// ├───────────────┤ ├───────────────┤ +/// │ Reserved │ │ Tag (32) │ +/// │ (48 bytes) │ ├───────────────┤ +/// │ [empty] │ │ Nonce (12) │ +/// └───────────────┘ └───────────────┘ +/// 4096 bytes 4096 bytes +/// ``` +/// +/// The above applies to all the pages except Page 1. The page 1 contains the SQLite header (the +/// first 100 bytes). Specifically, the bytes 16 to 24 contain metadata which is required to +/// initialise the connection, which happens before we can setup the encryption context. So, we +/// don't encrypt the header but instead use the header data as additional data (AD) for the +/// encryption of the rest of the page. This provides us protection against tampering and +/// corruption for the unencrypted portion. +/// +/// On disk, the encrypted page 1 contains special bytes replacing the SQLite's magic bytes (the +/// first 16 bytes): +/// +/// ```ignore +/// Turso Header (16 bytes) +/// ┌─────────┬───────┬────────┬──────────────────┐ +/// │ │ │ │ │ +/// │ Turso │Version│ Cipher │ Unused │ +/// │ (5) │ (1) │ (1) │ (9 bytes) │ +/// │ │ │ │ │ +/// └─────────┴───────┴────────┴──────────────────┘ +/// 0-4 5 6 7-15 +/// +/// Standard SQLite Header: "SQLite format 3\0" (16 bytes) +/// ↓ +/// Turso Encrypted Header: "Turso" + Version + Cipher ID + Unused +/// ``` + /// constants used for the Turso page header in the encrypted dbs. const TURSO_HEADER_PREFIX: &[u8] = b"Turso"; const TURSO_VERSION: u8 = 0x00; From c2453046fa796ef35b70979eeac06f2a636db4cb Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sat, 27 Sep 2025 18:16:51 +0530 Subject: [PATCH 4/5] clippy fixes --- core/storage/encryption.rs | 9 +++---- .../query_processing/encryption.rs | 25 ++++++------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/core/storage/encryption.rs b/core/storage/encryption.rs index 125744344..4ef9e63b9 100644 --- a/core/storage/encryption.rs +++ b/core/storage/encryption.rs @@ -63,7 +63,7 @@ use turso_macros::match_ignore_ascii_case; /// ↓ /// Turso Encrypted Header: "Turso" + Version + Cipher ID + Unused /// ``` - +/// /// constants used for the Turso page header in the encrypted dbs. const TURSO_HEADER_PREFIX: &[u8] = b"Turso"; const TURSO_VERSION: u8 = 0x00; @@ -552,8 +552,7 @@ impl EncryptionContext { let version = header[5]; if version != TURSO_VERSION { return Err(LimboError::InternalError(format!( - "Unsupported Turso header version: expected {}, got {}", - TURSO_VERSION, version + "Unsupported Turso header version: expected {TURSO_VERSION}, got {version}" ))); } @@ -984,9 +983,7 @@ mod tests { page[..SQLITE_HEADER.len()].copy_from_slice(SQLITE_HEADER); let mut rng = rand::thread_rng(); // 48 is the max reserved bytes we might need for metadata with any cipher - for i in SQLITE_HEADER.len()..DEFAULT_ENCRYPTED_PAGE_SIZE - 48 { - page[i] = rng.gen(); - } + rng.fill(&mut page[SQLITE_HEADER.len()..DEFAULT_ENCRYPTED_PAGE_SIZE - 48]); page } diff --git a/tests/integration/query_processing/encryption.rs b/tests/integration/query_processing/encryption.rs index f61c6f0bd..3fc7a8e63 100644 --- a/tests/integration/query_processing/encryption.rs +++ b/tests/integration/query_processing/encryption.rs @@ -338,8 +338,7 @@ fn test_corruption_associated_data_bytes() -> anyhow::Result<()> { assert!( should_panic.is_err(), - "should panic when accessing encrypted DB with corrupted associated data at position {}", - corrupt_pos + "should panic when accessing encrypted DB with corrupted associated data at position {corrupt_pos}", ); } } @@ -364,18 +363,12 @@ fn test_turso_header_structure() -> anyhow::Result<()> { assert_eq!( &header[0..5], b"Turso", - "Magic bytes should be 'Turso' for {}", - description - ); - assert_eq!( - header[5], 0x00, - "Version should be 0x00 for {}", - description + "Magic bytes should be 'Turso' for {description}" ); + assert_eq!(header[5], 0x00, "Version should be 0x00 for {description}"); assert_eq!( header[6], expected_cipher_id, - "Cipher ID should be {} for {}", - expected_cipher_id, description + "Cipher ID should be {expected_cipher_id} for {description}" ); // the unused bytes should be zeroed @@ -445,12 +438,8 @@ fn test_turso_header_structure() -> anyhow::Result<()> { { let conn = tmp_db.connect_limbo(); - run_query(&tmp_db, &conn, &format!("PRAGMA hexkey = '{}';", hexkey))?; - run_query( - &tmp_db, - &conn, - &format!("PRAGMA cipher = '{}';", cipher_name), - )?; + run_query(&tmp_db, &conn, &format!("PRAGMA hexkey = '{hexkey}';"))?; + run_query(&tmp_db, &conn, &format!("PRAGMA cipher = '{cipher_name}';"))?; run_query( &tmp_db, &conn, @@ -459,7 +448,7 @@ fn test_turso_header_structure() -> anyhow::Result<()> { do_flush(&conn, &tmp_db)?; } - verify_header(&db_path.to_str().unwrap(), expected_id, description)?; + verify_header(db_path.to_str().unwrap(), expected_id, description)?; } Ok(()) } From ec1bf8888c112cfeb16f7bfea34a8219beb1f20c Mon Sep 17 00:00:00 2001 From: Avinash Sajjanshetty Date: Sun, 28 Sep 2025 21:56:07 +0530 Subject: [PATCH 5/5] refactor to adress review comments --- core/storage/encryption.rs | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/core/storage/encryption.rs b/core/storage/encryption.rs index 4ef9e63b9..3adb2eb14 100644 --- a/core/storage/encryption.rs +++ b/core/storage/encryption.rs @@ -29,8 +29,8 @@ use turso_macros::match_ignore_ascii_case; /// ┌───────────────┐ ┌───────────────┐ /// │ │ │ │ /// │ Page Content │ │ Encrypted │ -/// │ (4068 bytes) │ ────────► │ Content │ -/// │ │ │ (4068 bytes) │ +/// │ (4048 bytes) │ ────────► │ Content │ +/// │ │ │ (4048 bytes) │ /// ├───────────────┤ ├───────────────┤ /// │ Reserved │ │ Tag (32) │ /// │ (48 bytes) │ ├───────────────┤ @@ -67,6 +67,8 @@ use turso_macros::match_ignore_ascii_case; /// constants used for the Turso page header in the encrypted dbs. const TURSO_HEADER_PREFIX: &[u8] = b"Turso"; const TURSO_VERSION: u8 = 0x00; +const VERSION_OFFSET: usize = 5; +const CIPHER_OFFSET: usize = 6; const TURSO_HEADER_SIZE: usize = 16; const SQLITE_HEADER: &[u8] = b"SQLite format 3\0"; @@ -526,10 +528,10 @@ impl EncryptionContext { header[..TURSO_HEADER_PREFIX.len()].copy_from_slice(TURSO_HEADER_PREFIX); // version byte (1 byte) - header[5] = TURSO_VERSION; + header[VERSION_OFFSET] = TURSO_VERSION; // cipher identifier (1 byte) - header[6] = self.cipher_mode.cipher_id(); + header[CIPHER_OFFSET] = self.cipher_mode.cipher_id(); // remaining unused 9 bytes header @@ -549,14 +551,14 @@ impl EncryptionContext { )); } - let version = header[5]; + let version = header[VERSION_OFFSET]; if version != TURSO_VERSION { return Err(LimboError::InternalError(format!( "Unsupported Turso header version: expected {TURSO_VERSION}, got {version}" ))); } - let cipher_id = header[6]; + let cipher_id = header[CIPHER_OFFSET]; let header_cipher = CipherMode::from_cipher_id(cipher_id)?; if header_cipher != self.cipher_mode { return Err(LimboError::InternalError(format!( @@ -568,6 +570,14 @@ impl EncryptionContext { ))); } + if header[CIPHER_OFFSET + 1..TURSO_HEADER_SIZE] + .iter() + .any(|&b| b != 0) + { + return Err(LimboError::InternalError( + "Invalid Turso header: unused bytes must be zero".into(), + )); + } Ok(()) } @@ -676,7 +686,7 @@ impl EncryptionContext { // since this is page 1, this must have header turso_assert!( - &page[..SQLITE_HEADER.len()] == SQLITE_HEADER, + page.starts_with(SQLITE_HEADER), "Page 1 must start with SQLite header" ); @@ -686,6 +696,10 @@ impl EncryptionContext { #[cfg(debug_assertions)] { use crate::turso_assert; + // In debug builds, ensure that the reserved bytes are zeroed out. So even when we are + // reusing a page from buffer pool, we zero out in debug build so that we can be + // sure that b tree layer is not writing any data into the reserved space. + // We avoid calling `memset` in release builds for performance reasons. let reserved_bytes_zeroed = reserved_bytes.iter().all(|&b| b == 0); turso_assert!( reserved_bytes_zeroed,