diff --git a/Makefile b/Makefile
index 1fa102991..fdb498371 100644
--- a/Makefile
+++ b/Makefile
@@ -140,6 +140,10 @@ bench-vfs: uv-sync-test
 	cargo build --release
 	RUST_LOG=$(RUST_LOG) uv run --project limbo_test bench-vfs "$(SQL)" "$(N)"
 
+bench-sqlite: uv-sync-test
+	cargo build --release
+	RUST_LOG=$(RUST_LOG) uv run --project limbo_test bench-sqlite "$(VFS)" "$(SQL)" "$(N)"
+
 clickbench:
 	./perf/clickbench/benchmark.sh
 .PHONY: clickbench
diff --git a/testing/cli_tests/sqlite_bench.py b/testing/cli_tests/sqlite_bench.py
new file mode 100644
index 000000000..3506f1c7c
--- /dev/null
+++ b/testing/cli_tests/sqlite_bench.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+# vfs vs sqlite3 benchmarking/comparison
+import argparse
+import os
+import platform
+import statistics
+import subprocess
+from faker import Faker
+from pathlib import Path
+from time import perf_counter, sleep
+from typing import Dict, List
+
+from cli_tests.console import error, info, test
+from cli_tests.test_turso_cli import TestTursoShell
+
+# for now, use debug for the debug assertions
+LIMBO_BIN = Path("./target/release/tursodb")
+DB_FILE = Path("testing/temp.db")
+
+SQLITE_BIN = "sqlite3"
+
+vfs_list = ["syscall", "sqlite"]
+if platform.system() == "Linux":
+    vfs_list.append("io_uring")
+
+
+def append_time(times, start, perf_counter):
+    times.append(perf_counter() - start)
+    return True
+
+
+fake = Faker()
+
+
+def bench_one(vfs: str, sql: str, iterations: int, assorted: bool, use_sqlite3=False) -> List[float]:
+    """
+    Launch a single process (Tursodb with the requested VFS or sqlite3), run `sql`
+    `iterations` times, return a list of elapsed wall‑clock times.
+    """
+    if use_sqlite3:
+        shell = TestTursoShell(
+            exec_name=SQLITE_BIN,
+            flags=str(DB_FILE),
+            init_commands="",
+        )
+        test_name = "sqlite3"
+    else:
+        shell = TestTursoShell(
+            exec_name=str(LIMBO_BIN),
+            flags=f"-m list --vfs {vfs} {DB_FILE}",
+            init_commands="",
+        )
+        test_name = f"limbo ({vfs})"
+
+    times: List[float] = []
+    queries = [sql]
+    if assorted:
+        queries.extend(
+            [
+                "select * from users;",
+                "insert into products (name,price) values (randomblob(1024*64), randomblob(1024*64));",
+                "select first_name, last_name, age from users limit 1000;",
+                "insert into users (first_name, last_name, email, phone_number, address, city, state, zipcode,age) values "
+                + f"('{fake.first_name()}', '{fake.last_name()}', '{fake.email()}', '{fake.phone_number()}', '{fake.street_address()}', "
+                + f"'{fake.city()}', '{fake.state_abbr()}', '{fake.zipcode()}', 62);",
+            ]
+        )
+    for i in range(1, iterations + 1):
+        for query in queries:
+            start = perf_counter()
+            _ = shell.run_test_fn(query, lambda x: x is not None and append_time(times, start, perf_counter))
+            test(f"  {test_name} | run {i:>3}: {times[-1]:.6f}s")
+
+    shell.quit()
+    return times
+
+
+def setup_temp_db() -> None:
+    # make sure we start fresh, otherwise we could end up with
+    # one having to checkpoint the others from the previous run
+    cleanup_temp_db()
+    cmd = ["sqlite3", "testing/testing.db", ".clone testing/temp.db"]
+    proc = subprocess.run(cmd, check=True)
+    proc.check_returncode()
+    sleep(0.3)  # make sure it's finished
+
+
+def cleanup_temp_db() -> None:
+    if DB_FILE.exists():
+        DB_FILE.unlink()
+    wal_file = DB_FILE.with_suffix(".db-wal")
+    if wal_file.exists():
+        os.remove(wal_file)
+    shm_file = DB_FILE.with_suffix(".db-shm")
+    if shm_file.exists():
+        os.remove(shm_file)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Benchmark a specific Turso VFS against sqlite3.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=f"Available VFS options: {', '.join(vfs_list)}",
+    )
+    parser.add_argument("vfs", choices=vfs_list, help="VFS backend to benchmark against sqlite3")
+    parser.add_argument("sql", help="SQL statement to execute (quote it)")
+    parser.add_argument("iterations", type=int, default=100, help="number of repetitions")
+    parser.add_argument("--assorted", action="store_true", help="use additional assorted queries")
+    args = parser.parse_args()
+
+    vfs, sql, iterations, assorted = args.vfs, args.sql, args.iterations, args.assorted
+    if iterations <= 0:
+        error("iterations must be a positive integer")
+        parser.error("Invalid Arguments")
+
+    info(f"VFS        : {vfs}")
+    info(f"SQL        : {sql}")
+    info(f"Iterations : {iterations}")
+    info(f"Assorted   : {assorted}")
+    info(f"Database   : {DB_FILE.resolve()}")
+    info("-" * 60)
+
+    # Benchmark sqlite3
+    setup_temp_db()
+    test("\n### SQLite3 (baseline) ###")
+    sqlite_times = bench_one(vfs, sql, iterations, assorted, use_sqlite3=True)
+    if len(sqlite_times) < 1000:
+        info("All times (sqlite3):", " ".join(f"{t:.6f}" for t in sqlite_times))
+    else:
+        info("All times truncated...")
+
+    sqlite_avg = statistics.mean(sqlite_times)
+    sqlite_median = statistics.median(sqlite_times)
+    sqlite_stdev = statistics.stdev(sqlite_times) if len(sqlite_times) > 1 else 0
+
+    # Benchmark Turso with specified IO backend
+    setup_temp_db()
+    test(f"\n### Turso with I/O: {vfs} ###")
+    limbo_times = bench_one(vfs, sql, iterations, assorted, use_sqlite3=False)
+    info(f"All times (limbo {vfs}):", " ".join(f"{t:.6f}" for t in limbo_times))
+    limbo_avg = statistics.mean(limbo_times)
+    limbo_median = statistics.median(limbo_times)
+    limbo_stdev = statistics.stdev(limbo_times) if len(limbo_times) > 1 else 0
+
+    cleanup_temp_db()
+
+    # Results summary
+    info("\n" + "=" * 60)
+    info("BENCHMARK RESULTS")
+    info("=" * 60)
+    info("\nSQLite3 (baseline):")
+    info(f"  Average : {sqlite_avg:.6f} s")
+    info(f"  Median  : {sqlite_median:.6f} s")
+    info(f"  Std Dev : {sqlite_stdev:.6f} s")
+    if len(sqlite_times) > 0:
+        info(f"  Min     : {min(sqlite_times):.6f} s")
+        info(f"  Max     : {max(sqlite_times):.6f} s")
+    info(f"\nTurso ({vfs}):")
+    info(f"  Average : {limbo_avg:.6f} s")
+    info(f"  Median  : {limbo_median:.6f} s")
+    info(f"  Std Dev : {limbo_stdev:.6f} s")
+    if len(limbo_times) > 0:
+        info(f"  Min     : {min(limbo_times):.6f} s")
+        info(f"  Max     : {max(limbo_times):.6f} s")
+    info("\n" + "-" * 60)
+    info("COMPARISON")
+    info("-" * 60)
+    # Performance comparison
+    pct_diff = (limbo_avg - sqlite_avg) / sqlite_avg * 100.0
+    faster_slower = "slower" if pct_diff > 0 else "faster"
+    info(f"Turso ({vfs}) is {abs(pct_diff):.1f}% {faster_slower} than SQLite3")
+    info(f"  SQLite3 avg: {sqlite_avg:.6f} s")
+    info(f"  Turso avg  : {limbo_avg:.6f} s")
+    info(f"  Difference : {limbo_avg - sqlite_avg:+.6f} s")
+    # Median comparison
+    median_pct_diff = (limbo_median - sqlite_median) / sqlite_median * 100.0
+    median_faster_slower = "slower" if median_pct_diff > 0 else "faster"
+    info(f"\nMedian comparison: Turso is {abs(median_pct_diff):.1f}% {median_faster_slower}")
+    info(f"  SQLite3 median: {sqlite_median:.6f} s")
+    info(f"  Turso median  : {limbo_median:.6f} s")
+    info("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/testing/pyproject.toml b/testing/pyproject.toml
index 41b3608cc..f2db9bdb2 100644
--- a/testing/pyproject.toml
+++ b/testing/pyproject.toml
@@ -16,6 +16,7 @@ test-extensions = "cli_tests.extensions:main"
 test-update = "cli_tests.update:main"
 test-memory = "cli_tests.memory:main"
 bench-vfs = "cli_tests.vfs_bench:main"
+bench-sqlite = "cli_tests.sqlite_bench:main"
 test-constraint = "cli_tests.constraint:main"
 test-collate = "cli_tests.collate:main"
 test-mvcc = "cli_tests.mvcc:main"