From 039fe22405c5f8e167cc36cfbcd17e82bbe60ffa Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 18:58:09 -0400 Subject: [PATCH 01/10] Add copy_to to io::File trait to support copying DB files --- core/io/mod.rs | 37 +++++++++++++++++++++++++++++++++++++ core/lib.rs | 11 +++++++++++ core/storage/database.rs | 6 ++++++ 3 files changed, 54 insertions(+) diff --git a/core/io/mod.rs b/core/io/mod.rs index 3a08940b5..e209561d3 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -52,6 +52,43 @@ pub trait File: Send + Sync { } fn size(&self) -> Result; fn truncate(&self, len: usize, c: Completion) -> Result; + fn copy_to(&self, io: &dyn IO, path: &str) -> Result<()> { + // Open or create the destination file + let dest_file = io.open_file(path, OpenFlags::Create, false)?; + // Get the size of the source file + let file_size = self.size()? as usize; + if file_size == 0 { + return Ok(()); + } + + // use 1MB chunk size + const BUFFER_SIZE: usize = 1024 * 1024; + let mut pos = 0; + + while pos < file_size { + let chunk_size = (file_size - pos).min(BUFFER_SIZE); + // Read from source + let read_buffer = Arc::new(RefCell::new(Buffer::allocate(chunk_size, Rc::new(|_| {})))); + let read_completion = self.pread( + pos, + Completion::new_read(read_buffer.clone(), move |_, _| {}), + )?; + + // Wait for read to complete + io.wait_for_completion(read_completion)?; + + // Write to destination + let write_completion = + dest_file.pwrite(pos, read_buffer, Completion::new_write(|_| {}))?; + io.wait_for_completion(write_completion)?; + + pos += chunk_size; + } + let sync_completion = dest_file.sync(Completion::new_sync(|_| {}))?; + io.wait_for_completion(sync_completion)?; + + Ok(()) + } } #[derive(Debug, Copy, Clone, PartialEq)] diff --git a/core/lib.rs b/core/lib.rs index 903e1b552..bd0eb257c 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1737,6 +1737,17 @@ impl Connection { pub fn get_pager(&self) -> Rc { self.pager.borrow().clone() } + + /// Copy the current Database and write out to a new file + pub fn copy_db(&self, file: &str) -> Result<()> { + let io = self._db.io.clone(); + let disabled = false; + // checkpoint so everything is in the DB file before copying + self.pager + .borrow_mut() + .wal_checkpoint(disabled, CheckpointMode::Truncate)?; + self.pager.borrow_mut().db_file.copy_to(&*io, file) + } } pub struct Statement { diff --git a/core/storage/database.rs b/core/storage/database.rs index 3687cacb8..1e82b1028 100644 --- a/core/storage/database.rs +++ b/core/storage/database.rs @@ -22,6 +22,7 @@ pub trait DatabaseStorage: Send + Sync { fn sync(&self, c: Completion) -> Result; fn size(&self) -> Result; fn truncate(&self, len: usize, c: Completion) -> Result; + fn copy_to(&self, io: &dyn crate::IO, path: &str) -> Result<()>; } #[cfg(feature = "fs")] @@ -95,6 +96,11 @@ impl DatabaseStorage for DatabaseFile { let c = self.file.truncate(len, c)?; Ok(c) } + + #[instrument(skip_all, level = Level::INFO)] + fn copy_to(&self, io: &dyn crate::IO, path: &str) -> Result<()> { + self.file.copy_to(io, path) + } } #[cfg(feature = "fs")] From 736f78de64f934fb6b72ccf74a24e8aab1b854b9 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 18:58:28 -0400 Subject: [PATCH 02/10] Add .copy CLI command --- cli/app.rs | 5 +++++ cli/commands/args.rs | 5 +++++ cli/commands/mod.rs | 7 ++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cli/app.rs b/cli/app.rs index d624ce3a7..210a95fce 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -713,6 +713,11 @@ impl Limbo { HeadersMode::Off => false, }; } + Command::Copy(args) => { + if let Err(e) = self.conn.copy_db(&args.output_file) { + let _ = self.writeln(e.to_string()); + } + } }, } } diff --git a/cli/commands/args.rs b/cli/commands/args.rs index ab6a96705..c2744b6bf 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -143,6 +143,11 @@ pub struct HeadersArgs { pub mode: HeadersMode, } +#[derive(Debug, Clone, Args)] +pub struct CopyArgs { + pub output_file: String, +} + #[derive(ValueEnum, Copy, Clone, Debug, PartialEq, Eq)] pub enum HeadersMode { On, diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 8c2eec10c..08f12d63b 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -9,7 +9,10 @@ use args::{ use clap::Parser; use import::ImportArgs; -use crate::input::{AFTER_HELP_MSG, BEFORE_HELP_MSG}; +use crate::{ + commands::args::CopyArgs, + input::{AFTER_HELP_MSG, BEFORE_HELP_MSG}, +}; #[derive(Parser, Debug)] #[command( @@ -86,6 +89,8 @@ pub enum Command { /// Toggle column headers on/off in list mode #[command(name = "headers", display_name = ".headers")] Headers(HeadersArgs), + #[command(name = "copy", display_name = ".copy")] + Copy(CopyArgs), } const _HELP_TEMPLATE: &str = "{before-help}{name} From 837278c2d0e689dc8004926a654318399565bd8d Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 19:00:15 -0400 Subject: [PATCH 03/10] Add .help output for copying db file cmd --- cli/input.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cli/input.rs b/cli/input.rs index 247798021..4cc42383a 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -239,6 +239,9 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples: 17. To turn off column headers in list mode: .headers off +18. To copy the open database to another file: + .copy output_file.db + Note: - All SQL commands must end with a semicolon (;). - Special commands start with a dot (.) and are not required to end with a semicolon."#; From 7e42b97b939883ba0a74de9cb5023b30197be03b Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 19:01:00 -0400 Subject: [PATCH 04/10] Add .help output for copying db file cmd --- cli/input.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/input.rs b/cli/input.rs index 4cc42383a..7e8582c3f 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -182,13 +182,13 @@ pub fn get_io(db_location: DbLocation, io_choice: &str) -> anyhow::Result Date: Mon, 4 Aug 2025 20:29:50 -0400 Subject: [PATCH 05/10] Rename .copy -> .clone --- cli/app.rs | 2 +- cli/commands/args.rs | 2 +- cli/commands/mod.rs | 6 +++--- cli/input.rs | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cli/app.rs b/cli/app.rs index 210a95fce..480e7e63f 100644 --- a/cli/app.rs +++ b/cli/app.rs @@ -713,7 +713,7 @@ impl Limbo { HeadersMode::Off => false, }; } - Command::Copy(args) => { + Command::Clone(args) => { if let Err(e) = self.conn.copy_db(&args.output_file) { let _ = self.writeln(e.to_string()); } diff --git a/cli/commands/args.rs b/cli/commands/args.rs index c2744b6bf..5b83c08b3 100644 --- a/cli/commands/args.rs +++ b/cli/commands/args.rs @@ -144,7 +144,7 @@ pub struct HeadersArgs { } #[derive(Debug, Clone, Args)] -pub struct CopyArgs { +pub struct CloneArgs { pub output_file: String, } diff --git a/cli/commands/mod.rs b/cli/commands/mod.rs index 08f12d63b..ce4ec920f 100644 --- a/cli/commands/mod.rs +++ b/cli/commands/mod.rs @@ -10,7 +10,7 @@ use clap::Parser; use import::ImportArgs; use crate::{ - commands::args::CopyArgs, + commands::args::CloneArgs, input::{AFTER_HELP_MSG, BEFORE_HELP_MSG}, }; @@ -89,8 +89,8 @@ pub enum Command { /// Toggle column headers on/off in list mode #[command(name = "headers", display_name = ".headers")] Headers(HeadersArgs), - #[command(name = "copy", display_name = ".copy")] - Copy(CopyArgs), + #[command(name = "clone", display_name = ".clone")] + Clone(CloneArgs), } const _HELP_TEMPLATE: &str = "{before-help}{name} diff --git a/cli/input.rs b/cli/input.rs index 7e8582c3f..b12628ebf 100644 --- a/cli/input.rs +++ b/cli/input.rs @@ -239,8 +239,8 @@ pub const AFTER_HELP_MSG: &str = r#"Usage Examples: 17. To turn off column headers in list mode: .headers off -18. To copy the open database to another file: - .copy output_file.db +18. To clone the open database to another file: + .clone output_file.db Note: - All SQL commands must end with a semicolon (;). From 04b40b4cf5cf8bd195f8eae56bd6582f06c2869c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 20:30:13 -0400 Subject: [PATCH 06/10] Impl copy_to for Database impl in JS bindings --- bindings/javascript/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bindings/javascript/src/lib.rs b/bindings/javascript/src/lib.rs index 01a5b3d5e..cec30ef79 100644 --- a/bindings/javascript/src/lib.rs +++ b/bindings/javascript/src/lib.rs @@ -516,4 +516,7 @@ impl turso_core::DatabaseStorage for DatabaseFile { let c = self.file.truncate(len, c)?; Ok(c) } + fn copy_to(&self, io: &dyn turso_core::IO, path: &str) -> turso_core::Result<()> { + self.file.copy_to(io, path) + } } From e32d04ea972ba58995cb5b20ab407753548a0620 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 20:30:49 -0400 Subject: [PATCH 07/10] Use ephemeral PlatformIO for clone method to support memory io --- core/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/lib.rs b/core/lib.rs index bd0eb257c..405ab96df 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1739,8 +1739,10 @@ impl Connection { } /// Copy the current Database and write out to a new file + #[cfg(feature = "fs")] pub fn copy_db(&self, file: &str) -> Result<()> { - let io = self._db.io.clone(); + // use a new PlatformIO instance here to allow for copying in-memory databases + let io: Arc = Arc::new(PlatformIO::new()?); let disabled = false; // checkpoint so everything is in the DB file before copying self.pager From b13133167360f568107e95aa7a237f140a19e15c Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Mon, 4 Aug 2025 20:31:15 -0400 Subject: [PATCH 08/10] Add shell .py tests for .clone cli command --- testing/cli_tests/cli_test_cases.py | 45 +++++++++++++++++++++++++++++ testing/cli_tests/test_turso_cli.py | 9 +++--- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/testing/cli_tests/cli_test_cases.py b/testing/cli_tests/cli_test_cases.py index f0812d59f..85e8fbcec 100755 --- a/testing/cli_tests/cli_test_cases.py +++ b/testing/cli_tests/cli_test_cases.py @@ -312,6 +312,49 @@ def test_uri_readonly(): turso.quit() +def test_copy_db_file(): + testpath = "testing/test_copy.db" + if Path(testpath).exists(): + os.unlink(Path(testpath)) + time.sleep(0.2) # make sure closed + time.sleep(0.3) + turso = TestTursoShell(init_commands="", flags=f" {testpath}") + turso.execute_dot("create table testing(a,b,c);") + turso.run_test_fn(".schema", lambda x: "CREATE TABLE testing (a, b, c)" in x, "test-database-has-expected-schema") + for i in range(100): + turso.execute_dot(f"insert into testing (a,b,c) values ({i},{i + 1}, {i + 2});") + turso.run_test_fn("SELECT COUNT(*) FROM testing;", lambda x: "100" == x, "test-database-has-expected-count") + turso.execute_dot(f".clone {testpath}") + + turso.execute_dot(f".open {testpath}") + turso.run_test_fn(".schema", lambda x: "CREATE TABLE testing" in x, "test-copied-database-has-expected-schema") + turso.run_test_fn("SELECT COUNT(*) FROM testing;", lambda x: "100" == x, "test-copied-database-has-expected-count") + turso.quit() + + +def test_copy_memory_db_to_file(): + testpath = "testing/memory.db" + if Path(testpath).exists(): + os.unlink(Path(testpath)) + time.sleep(0.2) # make sure closed + + turso = TestTursoShell(init_commands="") + turso.execute_dot("create table testing(a,b,c);") + for i in range(100): + turso.execute_dot(f"insert into testing (a, b, c) values ({i},{i + 1}, {i + 2});") + turso.execute_dot(f".clone {testpath}") + turso.quit() + time.sleep(0.3) + sqlite = TestTursoShell(exec_name="sqlite3", flags=f" {testpath}") + sqlite.run_test_fn( + ".schema", lambda x: "CREATE TABLE testing (a, b, c)" in x, "test-copied-database-has-expected-schema" + ) + sqlite.run_test_fn( + "SELECT COUNT(*) FROM testing;", lambda x: "100" == x, "test-copied-database-has-expected-user-count" + ) + sqlite.quit() + + def main(): console.info("Running all turso CLI tests...") test_basic_queries() @@ -333,6 +376,8 @@ def main(): test_update_with_limit() test_update_with_limit_and_offset() test_uri_readonly() + test_copy_db_file() + test_copy_memory_db_to_file() console.info("All tests have passed") diff --git a/testing/cli_tests/test_turso_cli.py b/testing/cli_tests/test_turso_cli.py index 5083aefd4..25637de8f 100755 --- a/testing/cli_tests/test_turso_cli.py +++ b/testing/cli_tests/test_turso_cli.py @@ -135,9 +135,9 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) def run_test(self, name: str, sql: str, expected: str) -> None: console.test(f"Running test: {name}", _stack_offset=2) actual = self.shell.execute(sql) - assert ( - actual == expected - ), f"Test failed: {name}\nSQL: {sql}\nExpected:\n{repr(expected)}\nActual:\n{repr(actual)}" + assert actual == expected, ( + f"Test failed: {name}\nSQL: {sql}\nExpected:\n{repr(expected)}\nActual:\n{repr(actual)}" + ) def run_debug(self, sql: str): console.debug(f"debugging: {sql}", _stack_offset=2) @@ -160,9 +160,10 @@ INSERT INTO t VALUES (zeroblob(1024 - 1), zeroblob(1024 - 2), zeroblob(1024 - 3) path = os.path.join("testing", "testing_clone.db") if os.path.exists(path): os.remove(path) - time.sleep(0.1) # Ensure the file is removed before cloning + time.sleep(0.2) # Ensure the file is removed before cloning cmd = "sqlite3 testing/testing.db '.clone testing/testing_clone.db'" subprocess.run(cmd, shell=True, capture_output=True, text=True) + time.sleep(0.2) # Ensure lock releaesd if not os.path.exists("testing/testing_clone.db"): raise RuntimeError("Failed to clone testing.db to testing/testing_clone.db") From 98f4e5cd2db959c76749bc8bfda4d5097d92e7d7 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Tue, 5 Aug 2025 12:49:24 -0400 Subject: [PATCH 09/10] Add comment/TODO about method we use to copy the db file --- core/lib.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/lib.rs b/core/lib.rs index 405ab96df..8f1f4ba6c 100644 --- a/core/lib.rs +++ b/core/lib.rs @@ -1738,8 +1738,13 @@ impl Connection { self.pager.borrow().clone() } - /// Copy the current Database and write out to a new file #[cfg(feature = "fs")] + /// Copy the current Database and write out to a new file. + /// TODO: sqlite3 instead essentially does the equivalent of + /// `.dump` and creates a new .db file from that. + /// + /// Because we are instead making a copy of the File, as a side-effect we are + /// also having to checkpoint the database. pub fn copy_db(&self, file: &str) -> Result<()> { // use a new PlatformIO instance here to allow for copying in-memory databases let io: Arc = Arc::new(PlatformIO::new()?); From bcadcb20147dca06d47699ae43ee70fd3b0699d7 Mon Sep 17 00:00:00 2001 From: PThorpe92 Date: Thu, 7 Aug 2025 17:07:53 -0400 Subject: [PATCH 10/10] Remove RefCell from copy_to method in io trait --- core/io/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/io/mod.rs b/core/io/mod.rs index e209561d3..52b9be2e2 100644 --- a/core/io/mod.rs +++ b/core/io/mod.rs @@ -68,7 +68,7 @@ pub trait File: Send + Sync { while pos < file_size { let chunk_size = (file_size - pos).min(BUFFER_SIZE); // Read from source - let read_buffer = Arc::new(RefCell::new(Buffer::allocate(chunk_size, Rc::new(|_| {})))); + let read_buffer = Arc::new(Buffer::allocate(chunk_size, Rc::new(|_| {}))); let read_completion = self.pread( pos, Completion::new_read(read_buffer.clone(), move |_, _| {}),