diff --git a/core/storage/encryption.rs b/core/storage/encryption.rs index c43a6d660..3adb2eb14 100644 --- a/core/storage/encryption.rs +++ b/core/storage/encryption.rs @@ -1,5 +1,5 @@ #![allow(unused_variables, dead_code)] -use crate::{LimboError, Result}; +use crate::{turso_assert, LimboError, Result}; use aegis::aegis128l::Aegis128L; use aegis::aegis128x2::Aegis128X2; use aegis::aegis128x4::Aegis128X4; @@ -12,6 +12,66 @@ use aes_gcm::{ }; use turso_macros::match_ignore_ascii_case; +/// Encryption Scheme +/// We support two major algorithms: AEGIS, AES GCM. These algorithms picked so that they also do +/// verification of the ciphertext, so we don't need to implement. That is if the page is corrupted +/// (or tampered), then we will know if we got garbage bytes post decryption. +/// +/// We perform encryption at the page level, i.e., each page is encrypted and decrypted individually. +/// We store the nonce and tag (or the verification bits) in the page itself. We also generate a +/// random nonce every time we encrypt a page. +/// +/// Example: Assume the page size is 4096 bytes and we use AEGIS 256. So we reserve the last 48 bytes +/// for the nonce (32 bytes) and tag (16 bytes). +/// +/// ```ignore +/// Unencrypted Page Encrypted Page +/// ┌───────────────┐ ┌───────────────┐ +/// │ │ │ │ +/// │ Page Content │ │ Encrypted │ +/// │ (4048 bytes) │ ────────► │ Content │ +/// │ │ │ (4048 bytes) │ +/// ├───────────────┤ ├───────────────┤ +/// │ Reserved │ │ Tag (32) │ +/// │ (48 bytes) │ ├───────────────┤ +/// │ [empty] │ │ Nonce (12) │ +/// └───────────────┘ └───────────────┘ +/// 4096 bytes 4096 bytes +/// ``` +/// +/// The above applies to all the pages except Page 1. The page 1 contains the SQLite header (the +/// first 100 bytes). Specifically, the bytes 16 to 24 contain metadata which is required to +/// initialise the connection, which happens before we can setup the encryption context. So, we +/// don't encrypt the header but instead use the header data as additional data (AD) for the +/// encryption of the rest of the page. This provides us protection against tampering and +/// corruption for the unencrypted portion. +/// +/// On disk, the encrypted page 1 contains special bytes replacing the SQLite's magic bytes (the +/// first 16 bytes): +/// +/// ```ignore +/// Turso Header (16 bytes) +/// ┌─────────┬───────┬────────┬──────────────────┐ +/// │ │ │ │ │ +/// │ Turso │Version│ Cipher │ Unused │ +/// │ (5) │ (1) │ (1) │ (9 bytes) │ +/// │ │ │ │ │ +/// └─────────┴───────┴────────┴──────────────────┘ +/// 0-4 5 6 7-15 +/// +/// Standard SQLite Header: "SQLite format 3\0" (16 bytes) +/// ↓ +/// Turso Encrypted Header: "Turso" + Version + Cipher ID + Unused +/// ``` +/// +/// constants used for the Turso page header in the encrypted dbs. +const TURSO_HEADER_PREFIX: &[u8] = b"Turso"; +const TURSO_VERSION: u8 = 0x00; +const VERSION_OFFSET: usize = 5; +const CIPHER_OFFSET: usize = 6; +const TURSO_HEADER_SIZE: usize = 16; +const SQLITE_HEADER: &[u8] = b"SQLite format 3\0"; + #[derive(Clone)] pub enum EncryptionKey { Key128([u8; 16]), @@ -189,9 +249,12 @@ macro_rules! define_aes_gcm_cipher { }) } - fn encrypt(&self, plaintext: &[u8], _ad: &[u8]) -> Result<(Vec, [u8; 12])> { + fn encrypt(&self, plaintext: &[u8], ad: &[u8]) -> Result<(Vec, [u8; 12])> { let nonce = <$cipher_type>::generate_nonce(&mut OsRng); - let ciphertext = self.cipher.encrypt(&nonce, plaintext).map_err(|e| { + let ciphertext = self.cipher.encrypt(&nonce, aes_gcm::aead::Payload { + msg: plaintext, + aad: ad, + }).map_err(|e| { LimboError::InternalError(format!("{} encryption failed: {e:?}", $name)) })?; let mut nonce_array = [0u8; 12]; @@ -199,10 +262,13 @@ macro_rules! define_aes_gcm_cipher { Ok((ciphertext, nonce_array)) } - fn decrypt(&self, ciphertext: &[u8], nonce: &[u8; 12], _ad: &[u8]) -> Result> { + fn decrypt(&self, ciphertext: &[u8], nonce: &[u8; 12], ad: &[u8]) -> Result> { let nonce = Nonce::from_slice(nonce); self.cipher - .decrypt(nonce, ciphertext) + .decrypt(nonce, aes_gcm::aead::Payload { + msg: ciphertext, + aad: ad, + }) .map_err(|_| -> LimboError { CipherError::DecryptionFailed { cipher: $name }.into() }) } } @@ -349,6 +415,37 @@ impl CipherMode { pub fn metadata_size(&self) -> usize { self.nonce_size() + self.tag_size() } + + /// Returns the cipher identifier byte for Turso header + pub fn cipher_id(&self) -> u8 { + match self { + CipherMode::Aes128Gcm => 1, + CipherMode::Aes256Gcm => 2, + CipherMode::Aegis256 => 3, + CipherMode::Aegis256X2 => 4, + CipherMode::Aegis256X4 => 5, + CipherMode::Aegis128L => 6, + CipherMode::Aegis128X2 => 7, + CipherMode::Aegis128X4 => 8, + } + } + + /// Creates a CipherMode from cipher identifier byte. This is used when read from Turso header. + pub fn from_cipher_id(id: u8) -> Result { + match id { + 1 => Ok(CipherMode::Aes128Gcm), + 2 => Ok(CipherMode::Aes256Gcm), + 3 => Ok(CipherMode::Aegis256), + 4 => Ok(CipherMode::Aegis256X2), + 5 => Ok(CipherMode::Aegis256X4), + 6 => Ok(CipherMode::Aegis128L), + 7 => Ok(CipherMode::Aegis128X2), + 8 => Ok(CipherMode::Aegis128X4), + _ => Err(LimboError::InvalidArgument(format!( + "Unknown cipher ID: {id}" + ))), + } + } } #[derive(Clone)] @@ -423,9 +520,73 @@ impl EncryptionContext { self.cipher_mode.metadata_size() as u8 } + /// Creates Turso header for encrypted page 1 + fn create_turso_header(&self) -> [u8; TURSO_HEADER_SIZE] { + let mut header = [0u8; TURSO_HEADER_SIZE]; + + // "Turso" prefix (5 bytes) + header[..TURSO_HEADER_PREFIX.len()].copy_from_slice(TURSO_HEADER_PREFIX); + + // version byte (1 byte) + header[VERSION_OFFSET] = TURSO_VERSION; + + // cipher identifier (1 byte) + header[CIPHER_OFFSET] = self.cipher_mode.cipher_id(); + + // remaining unused 9 bytes + header + } + + /// Validates and extracts cipher mode from Turso header + fn validate_turso_header(&self, header: &[u8]) -> Result<()> { + if header.len() < TURSO_HEADER_SIZE { + return Err(LimboError::InternalError( + "Header too short for encrypted Turso db".into(), + )); + } + + if &header[..TURSO_HEADER_PREFIX.len()] != TURSO_HEADER_PREFIX { + return Err(LimboError::InternalError( + "Invalid Turso header: prefix mismatch".into(), + )); + } + + let version = header[VERSION_OFFSET]; + if version != TURSO_VERSION { + return Err(LimboError::InternalError(format!( + "Unsupported Turso header version: expected {TURSO_VERSION}, got {version}" + ))); + } + + let cipher_id = header[CIPHER_OFFSET]; + let header_cipher = CipherMode::from_cipher_id(cipher_id)?; + if header_cipher != self.cipher_mode { + return Err(LimboError::InternalError(format!( + "Cipher mode mismatch: expected {:?} (ID {}), got {:?} (ID {})", + self.cipher_mode, + self.cipher_mode.cipher_id(), + header_cipher, + cipher_id + ))); + } + + if header[CIPHER_OFFSET + 1..TURSO_HEADER_SIZE] + .iter() + .any(|&b| b != 0) + { + return Err(LimboError::InternalError( + "Invalid Turso header: unused bytes must be zero".into(), + )); + } + Ok(()) + } + #[cfg(feature = "encryption")] pub fn encrypt_page(&self, page: &[u8], page_id: usize) -> Result> { use crate::storage::sqlite3_ondisk::DatabaseHeader; + if page_id == DatabaseHeader::PAGE_ID { + return self.encrypt_page_1(page); + } tracing::debug!("encrypting page {}", page_id); assert_eq!( page.len(), @@ -434,19 +595,12 @@ impl EncryptionContext { self.page_size ); - let encryption_start_offset = match page_id { - DatabaseHeader::PAGE_ID => DatabaseHeader::SIZE, - _ => 0, - }; let metadata_size = self.cipher_mode.metadata_size(); let reserved_bytes = &page[self.page_size - metadata_size..]; #[cfg(debug_assertions)] { use crate::turso_assert; - // In debug builds, ensure that the reserved bytes are zeroed out. So even when we are - // reusing a page from buffer pool, we zero out in debug build so that we can be - // sure that b tree layer is not writing any data into the reserved space. let reserved_bytes_zeroed = reserved_bytes.iter().all(|&b| b == 0); turso_assert!( reserved_bytes_zeroed, @@ -454,20 +608,18 @@ impl EncryptionContext { ); } - let payload = &page[encryption_start_offset..self.page_size - metadata_size]; + let payload = &page[..self.page_size - metadata_size]; let (encrypted, nonce) = self.encrypt_raw(payload)?; let nonce_size = self.cipher_mode.nonce_size(); assert_eq!( encrypted.len(), - self.page_size - nonce_size - encryption_start_offset, + self.page_size - nonce_size, "Encrypted page must be exactly {} bytes", - self.page_size - nonce_size - encryption_start_offset + self.page_size - nonce_size ); let mut result = Vec::with_capacity(self.page_size); - - result.extend_from_slice(&page[..encryption_start_offset]); result.extend_from_slice(&encrypted); result.extend_from_slice(&nonce); assert_eq!( @@ -482,6 +634,9 @@ impl EncryptionContext { #[cfg(feature = "encryption")] pub fn decrypt_page(&self, encrypted_page: &[u8], page_id: usize) -> Result> { use crate::storage::sqlite3_ondisk::DatabaseHeader; + if page_id == DatabaseHeader::PAGE_ID { + return self.decrypt_page_1(encrypted_page); + } tracing::debug!("decrypting page {}", page_id); assert_eq!( encrypted_page.len(), @@ -489,32 +644,147 @@ impl EncryptionContext { "Encrypted page data must be exactly {} bytes", self.page_size ); - // for page 1, the encrypted page starts after the database header - // for other pages, the encrypted page starts at the beginning - let encrypted_page_offset = match page_id { - DatabaseHeader::PAGE_ID => DatabaseHeader::SIZE, - _ => 0, - }; let nonce_size = self.cipher_mode.nonce_size(); let nonce_offset = encrypted_page.len() - nonce_size; - let payload = &encrypted_page[encrypted_page_offset..nonce_offset]; + let payload = &encrypted_page[..nonce_offset]; let nonce = &encrypted_page[nonce_offset..]; let decrypted_data = self.decrypt_raw(payload, nonce)?; + let metadata_size = self.cipher_mode.metadata_size(); + assert_eq!( + decrypted_data.len(), + self.page_size - metadata_size, + "Decrypted page data must be exactly {} bytes", + self.page_size - metadata_size + ); + + let mut result = Vec::with_capacity(self.page_size); + result.extend_from_slice(&decrypted_data); + result.resize(self.page_size, 0); + + assert_eq!( + result.len(), + self.page_size, + "Decrypted page data must be exactly {} bytes", + self.page_size + ); + Ok(result) + } + + #[cfg(feature = "encryption")] + fn encrypt_page_1(&self, page: &[u8]) -> Result> { + use crate::storage::sqlite3_ondisk::DatabaseHeader; + + tracing::debug!("encrypting page 1"); + assert_eq!( + page.len(), + self.page_size, + "Page data must be exactly {} bytes", + self.page_size + ); + + // since this is page 1, this must have header + turso_assert!( + page.starts_with(SQLITE_HEADER), + "Page 1 must start with SQLite header" + ); + + let metadata_size = self.cipher_mode.metadata_size(); + let reserved_bytes = &page[self.page_size - metadata_size..]; + + #[cfg(debug_assertions)] + { + use crate::turso_assert; + // In debug builds, ensure that the reserved bytes are zeroed out. So even when we are + // reusing a page from buffer pool, we zero out in debug build so that we can be + // sure that b tree layer is not writing any data into the reserved space. + // We avoid calling `memset` in release builds for performance reasons. + let reserved_bytes_zeroed = reserved_bytes.iter().all(|&b| b == 0); + turso_assert!( + reserved_bytes_zeroed, + "last reserved bytes must be empty/zero, but found non-zero bytes" + ); + } + + // page 1 encryption: + // 1. First 16 bytes are replaced with Turso magic bytes + // 2. Next 84 bytes (16-100) are kept as-is (not encrypted) + // 3. Remaining bytes (100-end) are encrypted + // 4. The header (the first 100 bytes) as associated data + let turso_header = self.create_turso_header(); + let mut new_header = Vec::with_capacity(DatabaseHeader::SIZE); + new_header.extend_from_slice(&turso_header); + new_header.extend_from_slice(&page[TURSO_HEADER_SIZE..DatabaseHeader::SIZE]); + + let payload = &page[DatabaseHeader::SIZE..self.page_size - metadata_size]; + let (encrypted, nonce) = self.encrypt_raw_with_ad(payload, &new_header)?; + + let nonce_size = self.cipher_mode.nonce_size(); + assert_eq!( + encrypted.len(), + self.page_size - nonce_size - DatabaseHeader::SIZE, + "Encrypted page must be exactly {} bytes", + self.page_size - nonce_size - DatabaseHeader::SIZE + ); + + let mut result = Vec::with_capacity(self.page_size); + + // 1. copy the header + result.extend_from_slice(&new_header); + // 2. copy the encrypted payload + result.extend_from_slice(&encrypted); + // 3. now add the nonce + result.extend_from_slice(&nonce); + + assert_eq!( + result.len(), + self.page_size, + "Encrypted page must be exactly {} bytes", + self.page_size + ); + Ok(result) + } + + #[cfg(feature = "encryption")] + fn decrypt_page_1(&self, encrypted_page: &[u8]) -> Result> { + use crate::storage::sqlite3_ondisk::DatabaseHeader; + + tracing::debug!("decrypting page 1"); + assert_eq!( + encrypted_page.len(), + self.page_size, + "Encrypted page data must be exactly {} bytes", + self.page_size + ); + + self.validate_turso_header(&encrypted_page[..TURSO_HEADER_SIZE])?; + + let nonce_size = self.cipher_mode.nonce_size(); + let nonce_offset = encrypted_page.len() - nonce_size; + let payload = &encrypted_page[DatabaseHeader::SIZE..nonce_offset]; + let nonce = &encrypted_page[nonce_offset..]; + + // it's important to use the header on disk (with Turso magic bytes) as associated data + // for protection against tampering the header + let header = &encrypted_page[..DatabaseHeader::SIZE]; + let decrypted_data = self.decrypt_raw_with_ad(payload, nonce, header)?; let metadata_size = self.cipher_mode.metadata_size(); assert_eq!( decrypted_data.len(), - self.page_size - metadata_size - encrypted_page_offset, + self.page_size - metadata_size - DatabaseHeader::SIZE, "Decrypted page data must be exactly {} bytes", - self.page_size - metadata_size - encrypted_page_offset + self.page_size - metadata_size - DatabaseHeader::SIZE ); + // reconstruct the page with the appropriate SQLite header let mut result = Vec::with_capacity(self.page_size); - result.extend_from_slice(&encrypted_page[..encrypted_page_offset]); + result.extend_from_slice(SQLITE_HEADER); + result.extend_from_slice(&encrypted_page[TURSO_HEADER_SIZE..DatabaseHeader::SIZE]); result.extend_from_slice(&decrypted_data); result.resize(self.page_size, 0); + assert_eq!( result.len(), self.page_size, @@ -527,10 +797,14 @@ impl EncryptionContext { /// encrypts raw data using the configured cipher, returns ciphertext and nonce fn encrypt_raw(&self, plaintext: &[u8]) -> Result<(Vec, Vec)> { const AD: &[u8] = b""; + self.encrypt_raw_with_ad(plaintext, AD) + } + /// encrypts raw data with associated data using the configured cipher + fn encrypt_raw_with_ad(&self, plaintext: &[u8], ad: &[u8]) -> Result<(Vec, Vec)> { macro_rules! encrypt_cipher { ($cipher:expr) => {{ - let (ciphertext, nonce) = $cipher.encrypt(plaintext, AD)?; + let (ciphertext, nonce) = $cipher.encrypt(plaintext, ad)?; Ok((ciphertext, nonce.to_vec())) }}; } @@ -549,7 +823,10 @@ impl EncryptionContext { fn decrypt_raw(&self, ciphertext: &[u8], nonce: &[u8]) -> Result> { const AD: &[u8] = b""; + self.decrypt_raw_with_ad(ciphertext, nonce, AD) + } + fn decrypt_raw_with_ad(&self, ciphertext: &[u8], nonce: &[u8], ad: &[u8]) -> Result> { macro_rules! decrypt_with_nonce { ($cipher:expr, $nonce_size:literal, $name:literal) => {{ let nonce_array: [u8; $nonce_size] = nonce.try_into().map_err(|_| { @@ -560,7 +837,7 @@ impl EncryptionContext { nonce.len() )) })?; - $cipher.decrypt(ciphertext, &nonce_array, AD) + $cipher.decrypt(ciphertext, &nonce_array, ad) }}; } @@ -715,6 +992,15 @@ mod tests { hex::encode(bytes) } + fn create_test_page_1() -> Vec { + let mut page = vec![0u8; DEFAULT_ENCRYPTED_PAGE_SIZE]; + page[..SQLITE_HEADER.len()].copy_from_slice(SQLITE_HEADER); + let mut rng = rand::thread_rng(); + // 48 is the max reserved bytes we might need for metadata with any cipher + rng.fill(&mut page[SQLITE_HEADER.len()..DEFAULT_ENCRYPTED_PAGE_SIZE - 48]); + page + } + test_aes_cipher_wrapper!( test_aes128gcm_cipher_wrapper, Aes128GcmCipher, @@ -731,6 +1017,122 @@ mod tests { "Hello, AES-128-GCM!" ); + #[test] + fn test_page_1_encrypt_decrypt_round_trip_with_ad() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + assert_eq!(encrypted.len(), DEFAULT_ENCRYPTED_PAGE_SIZE); + + // check that header is readable directly from disk (not encrypted) + assert_eq!(&encrypted[..5], b"Turso"); + assert_eq!(encrypted[5], TURSO_VERSION); + assert_eq!(encrypted[6], CipherMode::Aegis256.cipher_id()); + + // header should be unencrypted, but data after DatabaseHeader::SIZE should be different + assert_eq!(&encrypted[16..100], &page_data[16..100]); // header portion + assert_ne!(&encrypted[100..200], &page_data[100..200]); // some encrypted portion + + // decrypt page 1 + let decrypted = ctx.decrypt_page(&encrypted, 1).unwrap(); + assert_eq!(decrypted.len(), DEFAULT_ENCRYPTED_PAGE_SIZE); + + // check that SQLite header was restored + assert_eq!(&decrypted[..SQLITE_HEADER.len()], SQLITE_HEADER); + assert_eq!(decrypted, page_data); + } + + #[test] + fn test_turso_header_validation() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + // test cipher_id conversion + assert_eq!(CipherMode::Aes128Gcm.cipher_id(), 1); + assert_eq!(CipherMode::Aes256Gcm.cipher_id(), 2); + assert_eq!(CipherMode::Aegis256.cipher_id(), 3); + assert_eq!(CipherMode::Aegis128L.cipher_id(), 6); + + // test from_cipher_id conversion + assert_eq!( + CipherMode::from_cipher_id(1).unwrap(), + CipherMode::Aes128Gcm + ); + assert_eq!(CipherMode::from_cipher_id(3).unwrap(), CipherMode::Aegis256); + assert!(CipherMode::from_cipher_id(99).is_err()); + + // test header creation + let header = ctx.create_turso_header(); + assert_eq!(&header[..5], b"Turso"); + assert_eq!(header[5], TURSO_VERSION); + assert_eq!(header[6], 3); // AEGIS-256 + assert_eq!(&header[7..], &[0u8; 9]); // unused bytes are zero + } + + #[test] + fn test_invalid_turso_header_fails_decrypt() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + // corrupt the header prefix + let mut corrupted = encrypted.clone(); + corrupted[0] = b'V'; // make `Turso` to `Vurso` + assert!(ctx.decrypt_page(&corrupted, 1).is_err()); + + // test with wrong cipher ID + let mut wrong_cipher = encrypted.clone(); + wrong_cipher[6] = 99; // invalid cipher ID + assert!(ctx.decrypt_page(&wrong_cipher, 1).is_err()); + } + + #[test] + fn test_associated_data_validation() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + // modify a byte in the preserved header portion (bytes 16-100) + let mut corrupted_ad = encrypted.clone(); + corrupted_ad[50] ^= 1; // flip one bit in the associated data portion + + // this should fail decryption because associated data doesn't match + let decrypt_result = ctx.decrypt_page(&corrupted_ad, 1); + assert!( + decrypt_result.is_err(), + "Decryption should fail with corrupted associated data" + ); + } + + #[test] + fn test_turso_header_corruption_detection() { + let key = EncryptionKey::from_hex_string(&generate_random_hex_key()).unwrap(); + let ctx = EncryptionContext::new(CipherMode::Aegis256, &key, DEFAULT_ENCRYPTED_PAGE_SIZE) + .unwrap(); + + let page_data = create_test_page_1(); + let encrypted = ctx.encrypt_page(&page_data, 1).unwrap(); + + let mut corrupted_turso_header = encrypted.clone(); + corrupted_turso_header[7] ^= 1; + + let decrypt_result = ctx.decrypt_page(&corrupted_turso_header, 1); + assert!( + decrypt_result.is_err(), + "Decryption should fail with corrupted Turso header" + ); + } + #[test] fn test_aes128gcm_encrypt_decrypt_round_trip() { let mut rng = rand::thread_rng(); diff --git a/tests/integration/query_processing/encryption.rs b/tests/integration/query_processing/encryption.rs index 2682d1104..3fc7a8e63 100644 --- a/tests/integration/query_processing/encryption.rs +++ b/tests/integration/query_processing/encryption.rs @@ -196,3 +196,259 @@ fn test_non_4k_page_size_encryption() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn test_corruption_turso_magic_bytes() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let db_name = format!("test-corruption-magic-{}.db", rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query( + &tmp_db, + &conn, + "PRAGMA hexkey = 'b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327';", + )?; + run_query(&tmp_db, &conn, "PRAGMA cipher = 'aegis256';")?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + run_query( + &tmp_db, + &conn, + "INSERT INTO test (value) VALUES ('Test corruption')", + )?; + run_query(&tmp_db, &conn, "PRAGMA wal_checkpoint(TRUNCATE);")?; + do_flush(&conn, &tmp_db)?; + } + + // corrupt the Turso magic bytes by changing "Turso" to "Vurso" (the db name as it was intended) + { + use std::fs::OpenOptions; + use std::io::{Seek, SeekFrom, Write}; + + let mut file = OpenOptions::new().write(true).open(&db_path)?; + + file.seek(SeekFrom::Start(0))?; + file.write_all(b"V")?; + } + + // try to connect to the corrupted database - this should fail + { + let uri = format!( + "file:{}?cipher=aegis256&hexkey=b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + db_path.to_str().unwrap() + ); + + let should_panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let (_io, conn) = + turso_core::Connection::from_uri(&uri, true, false, false, false).unwrap(); + run_query_on_row(&tmp_db, &conn, "SELECT * FROM test", |_row: &Row| {}).unwrap(); + })); + + assert!( + should_panic.is_err(), + "should panic when accessing encrypted DB with corrupted Turso magic bytes" + ); + } + + Ok(()) +} + +#[test] +fn test_corruption_associated_data_bytes() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + let db_name = format!("test-corruption-ad-{}.db", rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query( + &tmp_db, + &conn, + "PRAGMA hexkey = 'b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327';", + )?; + run_query(&tmp_db, &conn, "PRAGMA cipher = 'aegis256';")?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + run_query( + &tmp_db, + &conn, + "INSERT INTO test (value) VALUES ('Test AD corruption')", + )?; + run_query(&tmp_db, &conn, "PRAGMA wal_checkpoint(TRUNCATE);")?; + do_flush(&conn, &tmp_db)?; + } + + // test corruption at different positions in the header (the first 100 bytes) + let corruption_positions = [3, 7, 16, 30, 50, 70, 99]; + + for &corrupt_pos in &corruption_positions { + let test_db_name = format!( + "test-corruption-ad-pos-{}-{}.db", + corrupt_pos, + rng().next_u32() + ); + let test_tmp_db = TempDatabase::new(&test_db_name, false); + let test_db_path = test_tmp_db.path.clone(); + std::fs::copy(&db_path, &test_db_path)?; + + // corrupt one byte + { + use std::fs::OpenOptions; + use std::io::{Read, Seek, SeekFrom, Write}; + + let mut file = OpenOptions::new() + .read(true) + .write(true) + .open(&test_db_path)?; + + file.seek(SeekFrom::Start(corrupt_pos as u64))?; + let mut original_byte = [0u8; 1]; + file.read_exact(&mut original_byte)?; + + // corrupt it by flipping all bits + let corrupted_byte = [!original_byte[0]]; + + file.seek(SeekFrom::Start(corrupt_pos as u64))?; + file.write_all(&corrupted_byte)?; + } + + // this should fail + { + let uri = format!( + "file:{}?cipher=aegis256&hexkey=b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + test_db_path.to_str().unwrap() + ); + + let should_panic = panic::catch_unwind(panic::AssertUnwindSafe(|| { + let (_io, conn) = + turso_core::Connection::from_uri(&uri, true, false, false, false).unwrap(); + run_query_on_row(&test_tmp_db, &conn, "SELECT * FROM test", |_row: &Row| {}) + .unwrap(); + })); + + assert!( + should_panic.is_err(), + "should panic when accessing encrypted DB with corrupted associated data at position {corrupt_pos}", + ); + } + } + + Ok(()) +} + +#[test] +fn test_turso_header_structure() -> anyhow::Result<()> { + let _ = env_logger::try_init(); + + let verify_header = + |db_path: &str, expected_cipher_id: u8, description: &str| -> anyhow::Result<()> { + use std::fs::File; + use std::io::{Read, Seek, SeekFrom}; + + let mut file = File::open(db_path)?; + let mut header = [0u8; 16]; + file.seek(SeekFrom::Start(0))?; + file.read_exact(&mut header)?; + + assert_eq!( + &header[0..5], + b"Turso", + "Magic bytes should be 'Turso' for {description}" + ); + assert_eq!(header[5], 0x00, "Version should be 0x00 for {description}"); + assert_eq!( + header[6], expected_cipher_id, + "Cipher ID should be {expected_cipher_id} for {description}" + ); + + // the unused bytes should be zeroed + for (i, &byte) in header[7..16].iter().enumerate() { + assert_eq!( + byte, + 0, + "Unused byte at position {} should be 0 for {}", + i + 7, + description + ); + } + + println!("Verified {} header: cipher ID = {}", description, header[6]); + Ok(()) + }; + + let test_cases = [ + ( + "aes128gcm", + 1, + "AES-128-GCM", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aes256gcm", + 2, + "AES-256-GCM", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis256", + 3, + "AEGIS-256", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis256x2", + 4, + "AEGIS-256X2", + "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327", + ), + ( + "aegis128l", + 6, + "AEGIS-128L", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aegis128x2", + 7, + "AEGIS-128X2", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ( + "aegis128x4", + 8, + "AEGIS-128X4", + "b1bbfda4f589dc9daaf004fe21111e00", + ), + ]; + + for (cipher_name, expected_id, description, hexkey) in test_cases { + let db_name = format!("test-header-{}-{}.db", cipher_name, rng().next_u32()); + let tmp_db = TempDatabase::new(&db_name, false); + let db_path = tmp_db.path.clone(); + + { + let conn = tmp_db.connect_limbo(); + run_query(&tmp_db, &conn, &format!("PRAGMA hexkey = '{hexkey}';"))?; + run_query(&tmp_db, &conn, &format!("PRAGMA cipher = '{cipher_name}';"))?; + run_query( + &tmp_db, + &conn, + "CREATE TABLE test (id INTEGER PRIMARY KEY, value TEXT);", + )?; + do_flush(&conn, &tmp_db)?; + } + + verify_header(db_path.to_str().unwrap(), expected_id, description)?; + } + Ok(()) +}