Merge branch 'tursodatabase:main' into issue/2077

This commit is contained in:
rajajisai
2025-08-01 15:20:36 -04:00
committed by GitHub
116 changed files with 6707 additions and 5464 deletions

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
*target

View File

@@ -50,7 +50,7 @@ jobs:
- uses: actions/checkout@v3
- name: Clippy
run: |
cargo clippy --workspace --all-features --all-targets --exclude limbo-wasm -- -A unused-variables --deny=warnings
cargo clippy --workspace --all-features --all-targets -- -A unused-variables --deny=warnings
simulator:
runs-on: blacksmith-4vcpu-ubuntu-2404
@@ -81,10 +81,6 @@ jobs:
- name: Test
run: make test
timeout-minutes: 20
# - uses: "./.github/shared/install_sqlite"
# - name: Test with index enabled
# run: SQLITE_EXEC="scripts/limbo-sqlite3-index-experimental" make test
# timeout-minutes: 20
test-sqlite:
runs-on: blacksmith-4vcpu-ubuntu-2404
steps:

View File

@@ -415,8 +415,8 @@ Modifiers:
| Opcode | Status | Comment |
|----------------|--------|---------|
| Add | Yes | |
| AddImm | No | |
| Affinity | No | |
| AddImm | Yes | |
| Affinity | Yes | |
| AggFinal | Yes | |
| AggStep | Yes | |
| AggStep | Yes | |
@@ -427,6 +427,7 @@ Modifiers:
| BitOr | Yes | |
| Blob | Yes | |
| BeginSubrtn | Yes | |
| Cast | Yes | |
| Checkpoint | Yes | |
| Clear | No | |
| Close | Yes | |
@@ -473,7 +474,6 @@ Modifiers:
| Init | Yes | |
| InitCoroutine | Yes | |
| Insert | Yes | |
| InsertInt | No | |
| Int64 | Yes | |
| Integer | Yes | |
| IntegrityCk | Yes | |
@@ -550,15 +550,10 @@ Modifiers:
| SorterNext | Yes | |
| SorterOpen | Yes | |
| SorterSort | Yes | |
| String | No | |
| String | NotNeeded | SQLite uses String for sized strings and String8 for null-terminated. All our strings are sized |
| String8 | Yes | |
| Subtract | Yes | |
| TableLock | No | |
| ToBlob | No | |
| ToInt | No | |
| ToNumeric | No | |
| ToReal | No | |
| ToText | No | |
| Trace | No | |
| Transaction | Yes | |
| VBegin | No | |
@@ -572,7 +567,6 @@ Modifiers:
| VUpdate | Yes | |
| Vacuum | No | |
| Variable | Yes | |
| VerifyCookie | No | |
| Yield | Yes | |
| ZeroOrNull | Yes | |

99
Cargo.lock generated
View File

@@ -382,9 +382,23 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytemuck"
version = "1.22.0"
version = "1.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540"
checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
dependencies = [
"bytemuck_derive",
]
[[package]]
name = "bytemuck_derive"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "byteorder"
@@ -656,7 +670,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "core_tester"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"anyhow",
"assert_cmd",
@@ -668,8 +682,10 @@ dependencies = [
"rusqlite",
"tempfile",
"test-log",
"tokio",
"tracing",
"tracing-subscriber",
"turso",
"turso_core",
"zerocopy 0.8.26",
]
@@ -1600,7 +1616,7 @@ dependencies = [
"hyper",
"libc",
"pin-project-lite",
"socket2",
"socket2 0.5.10",
"tokio",
"tower-service",
"tracing",
@@ -2100,14 +2116,14 @@ dependencies = [
[[package]]
name = "limbo-go"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"turso_core",
]
[[package]]
name = "limbo_completion"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"mimalloc",
"turso_ext",
@@ -2115,7 +2131,7 @@ dependencies = [
[[package]]
name = "limbo_crypto"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"blake3",
"data-encoding",
@@ -2128,7 +2144,7 @@ dependencies = [
[[package]]
name = "limbo_csv"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"csv",
"mimalloc",
@@ -2138,7 +2154,7 @@ dependencies = [
[[package]]
name = "limbo_ipaddr"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"ipnetwork",
"mimalloc",
@@ -2147,7 +2163,7 @@ dependencies = [
[[package]]
name = "limbo_percentile"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"mimalloc",
"turso_ext",
@@ -2155,7 +2171,7 @@ dependencies = [
[[package]]
name = "limbo_regexp"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"mimalloc",
"regex",
@@ -2164,7 +2180,7 @@ dependencies = [
[[package]]
name = "limbo_sim"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"anarchist-readable-name-generator-lib",
"anyhow",
@@ -2191,7 +2207,7 @@ dependencies = [
[[package]]
name = "limbo_sqlite3"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"env_logger 0.11.7",
"libc",
@@ -2204,7 +2220,7 @@ dependencies = [
[[package]]
name = "limbo_sqlite_test_ext"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"cc",
]
@@ -2676,6 +2692,15 @@ version = "4.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1036865bb9422d3300cf723f657c2851d0e9ab12567854b1f4eba3d77decf564"
[[package]]
name = "pack1"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6e7cd9bd638dc2c831519a0caa1c006cab771a92b1303403a8322773c5b72d6"
dependencies = [
"bytemuck",
]
[[package]]
name = "parking_lot"
version = "0.12.3"
@@ -2908,7 +2933,7 @@ dependencies = [
[[package]]
name = "py-turso"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"anyhow",
"pyo3",
@@ -3619,6 +3644,16 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "socket2"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
dependencies = [
"libc",
"windows-sys 0.59.0",
]
[[package]]
name = "sorted-vec"
version = "0.8.6"
@@ -3966,9 +4001,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.46.1"
version = "1.47.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
checksum = "43864ed400b6043a4757a25c7a64a8efde741aed79a056a2fb348a406701bb35"
dependencies = [
"backtrace",
"bytes",
@@ -3979,9 +4014,9 @@ dependencies = [
"pin-project-lite",
"signal-hook-registry",
"slab",
"socket2",
"socket2 0.6.0",
"tokio-macros",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@@ -4134,7 +4169,7 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "turso"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"rand 0.8.5",
"rand_chacha 0.3.1",
@@ -4146,7 +4181,7 @@ dependencies = [
[[package]]
name = "turso-java"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"jni",
"thiserror 2.0.12",
@@ -4155,7 +4190,7 @@ dependencies = [
[[package]]
name = "turso-sync"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"ctor",
"futures",
@@ -4182,7 +4217,7 @@ dependencies = [
[[package]]
name = "turso_cli"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"anyhow",
"cfg-if",
@@ -4214,11 +4249,12 @@ dependencies = [
[[package]]
name = "turso_core"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"antithesis_sdk",
"bitflags 2.9.0",
"built",
"bytemuck",
"cfg_block",
"chrono",
"criterion",
@@ -4236,6 +4272,7 @@ dependencies = [
"memory-stats",
"miette",
"mimalloc",
"pack1",
"parking_lot",
"paste",
"polling",
@@ -4267,7 +4304,7 @@ dependencies = [
[[package]]
name = "turso_dart"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"flutter_rust_bridge",
"turso_core",
@@ -4275,7 +4312,7 @@ dependencies = [
[[package]]
name = "turso_ext"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"chrono",
"getrandom 0.3.2",
@@ -4284,7 +4321,7 @@ dependencies = [
[[package]]
name = "turso_ext_tests"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"env_logger 0.11.7",
"lazy_static",
@@ -4295,7 +4332,7 @@ dependencies = [
[[package]]
name = "turso_macros"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"proc-macro2",
"quote",
@@ -4304,7 +4341,7 @@ dependencies = [
[[package]]
name = "turso_node"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"napi",
"napi-build",
@@ -4315,7 +4352,7 @@ dependencies = [
[[package]]
name = "turso_sqlite3_parser"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"bitflags 2.9.0",
"cc",
@@ -4333,7 +4370,7 @@ dependencies = [
[[package]]
name = "turso_stress"
version = "0.1.3"
version = "0.1.4-pre.1"
dependencies = [
"anarchist-readable-name-generator-lib",
"antithesis_sdk",

View File

@@ -31,26 +31,26 @@ members = [
exclude = ["perf/latency/limbo"]
[workspace.package]
version = "0.1.3"
version = "0.1.4-pre.1"
authors = ["the Limbo authors"]
edition = "2021"
license = "MIT"
repository = "https://github.com/tursodatabase/turso"
[workspace.dependencies]
turso = { path = "bindings/rust", version = "0.1.3" }
limbo_completion = { path = "extensions/completion", version = "0.1.3" }
turso_core = { path = "core", version = "0.1.3" }
limbo_crypto = { path = "extensions/crypto", version = "0.1.3" }
limbo_csv = { path = "extensions/csv", version = "0.1.3" }
turso_ext = { path = "extensions/core", version = "0.1.3" }
turso_ext_tests = { path = "extensions/tests", version = "0.1.3" }
limbo_ipaddr = { path = "extensions/ipaddr", version = "0.1.3" }
turso_macros = { path = "macros", version = "0.1.3" }
limbo_percentile = { path = "extensions/percentile", version = "0.1.3" }
limbo_regexp = { path = "extensions/regexp", version = "0.1.3" }
turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.1.3" }
limbo_uuid = { path = "extensions/uuid", version = "0.1.3" }
turso = { path = "bindings/rust", version = "0.1.4-pre.1" }
limbo_completion = { path = "extensions/completion", version = "0.1.4-pre.1" }
turso_core = { path = "core", version = "0.1.4-pre.1" }
limbo_crypto = { path = "extensions/crypto", version = "0.1.4-pre.1" }
limbo_csv = { path = "extensions/csv", version = "0.1.4-pre.1" }
turso_ext = { path = "extensions/core", version = "0.1.4-pre.1" }
turso_ext_tests = { path = "extensions/tests", version = "0.1.4-pre.1" }
limbo_ipaddr = { path = "extensions/ipaddr", version = "0.1.4-pre.1" }
turso_macros = { path = "macros", version = "0.1.4-pre.1" }
limbo_percentile = { path = "extensions/percentile", version = "0.1.4-pre.1" }
limbo_regexp = { path = "extensions/regexp", version = "0.1.4-pre.1" }
turso_sqlite3_parser = { path = "vendored/sqlite3-parser", version = "0.1.4-pre.1" }
limbo_uuid = { path = "extensions/uuid", version = "0.1.4-pre.1" }
strum = { version = "0.26", features = ["derive"] }
strum_macros = "0.26"
serde = "1.0"

20
Dockerfile.cli Normal file
View File

@@ -0,0 +1,20 @@
FROM rust:1.88.0 as builder
WORKDIR /app
# Copy the actual source code
COPY . .
# Build the CLI binary
RUN cargo build --release --package turso_cli
# Runtime stage
FROM rust:1.88.0-slim
WORKDIR /app
# Copy the built binary
COPY --from=builder /app/target/release/tursodb /usr/local/bin/
# Set the entrypoint
ENTRYPOINT ["tursodb"]

View File

@@ -150,3 +150,46 @@ bench-exclude-tpc-h:
cargo bench $$benchmarks; \
fi
.PHONY: bench-exclude-tpc-h
docker-cli-build:
docker build -f Dockerfile.cli -t turso-cli .
docker-cli-run:
docker run -it -v ./:/app turso-cli
merge-pr:
ifndef PR
$(error PR is required. Usage: make merge-pr PR=123)
endif
@echo "Setting up environment for PR merge..."
@if [ -z "$(GITHUB_REPOSITORY)" ]; then \
REPO=$$(git remote get-url origin | sed -E 's|.*github\.com[:/]([^/]+/[^/]+?)(\.git)?$$|\1|'); \
if [ -z "$$REPO" ]; then \
echo "Error: Could not detect repository from git remote"; \
exit 1; \
fi; \
export GITHUB_REPOSITORY="$$REPO"; \
else \
export GITHUB_REPOSITORY="$(GITHUB_REPOSITORY)"; \
fi; \
echo "Repository: $$REPO"; \
AUTH=$$(gh auth status); \
if [ -z "$$AUTH" ]; then \
echo "auth: $$AUTH"; \
echo "GitHub CLI not authenticated. Starting login process..."; \
gh auth login --scopes repo,workflow; \
else \
if ! echo "$$AUTH" | grep -q "workflow"; then \
echo "Warning: 'workflow' scope not detected. You may need to re-authenticate if merging PRs with workflow changes."; \
echo "Run: gh auth refresh -s repo,workflow"; \
fi; \
fi; \
if [ "$(LOCAL)" = "1" ]; then \
echo "merging PR #$(PR) locally"; \
uv run scripts/merge-pr.py $(PR) --local; \
else \
echo "merging PR #$(PR) on GitHub"; \
uv run scripts/merge-pr.py $(PR); \
fi
.PHONY: merge-pr

View File

@@ -88,6 +88,13 @@ You can also build and run the latest development version with:
cargo run
```
If you like docker, we got you covered. Simply run this in the root folder:
```bash
make docker-cli-build && \
make docker-cli-run
```
### MCP Server Mode
The Turso CLI includes a built-in [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) server that allows AI assistants to interact with your databases. Start the MCP server with:

View File

@@ -12,7 +12,7 @@ crate-type = ["cdylib"]
[dependencies]
turso_core = { workspace = true }
napi = { version = "3.1.3", default-features = false }
napi = { version = "3.1.3", default-features = false, features = ["napi6"] }
napi-derive = { version = "3.1.1", default-features = true }
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }

View File

@@ -1,3 +0,0 @@
CREATE TABLE users (name TEXT, age INTEGER);
INSERT INTO users (name, age) VALUES ('Bob', 24);
INSERT INTO users (name, age) VALUES ('Alice', 42);

View File

@@ -1,445 +0,0 @@
import crypto from "crypto";
import fs from "node:fs";
import { fileURLToPath } from "url";
import path from "node:path";
import DualTest from "./dual-test.mjs";
const inMemoryTest = new DualTest(":memory:");
const foobarTest = new DualTest("foobar.db");
inMemoryTest.both("Open in-memory database", async (t) => {
const db = t.context.db;
t.is(db.memory, true);
});
inMemoryTest.both("Property .name of in-memory database", async (t) => {
const db = t.context.db;
t.is(db.name, t.context.path);
});
foobarTest.both("Property .name of database", async (t) => {
const db = t.context.db;
t.is(db.name, t.context.path);
});
new DualTest("foobar.db", { readonly: true }).both(
"Property .readonly of database if set",
async (t) => {
const db = t.context.db;
t.is(db.readonly, true);
},
);
const genDatabaseFilename = () => {
return `test-${crypto.randomBytes(8).toString("hex")}.db`;
};
new DualTest().both(
"opening a read-only database fails if the file doesn't exist",
async (t) => {
t.throws(
() => t.context.connect(genDatabaseFilename(), { readonly: true }),
{
any: true,
code: "SQLITE_CANTOPEN",
},
);
},
);
foobarTest.both("Property .readonly of database if not set", async (t) => {
const db = t.context.db;
t.is(db.readonly, false);
});
foobarTest.both("Property .open of database", async (t) => {
const db = t.context.db;
t.is(db.open, true);
});
inMemoryTest.both("Statement.get() returns data", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT 1");
const result = stmt.get();
t.is(result["1"], 1);
const result2 = stmt.get();
t.is(result2["1"], 1);
});
inMemoryTest.both(
"Statement.get() returns undefined when no data",
async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT 1 WHERE 1 = 2");
const result = stmt.get();
t.is(result, undefined);
},
);
inMemoryTest.both(
"Statement.run() returns correct result object",
async (t) => {
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT)").run();
const rows = db.prepare("INSERT INTO users (name) VALUES (?)").run("Alice");
t.deepEqual(rows, { changes: 1, lastInsertRowid: 1 });
},
);
inMemoryTest.onlySqlitePasses(
"Statment.iterate() should correctly return an iterable object",
async (t) => {
const db = t.context.db;
db.prepare(
"CREATE TABLE users (name TEXT, age INTEGER, nationality TEXT)",
).run();
db.prepare(
"INSERT INTO users (name, age, nationality) VALUES (?, ?, ?)",
).run(["Alice", 42], "UK");
db.prepare(
"INSERT INTO users (name, age, nationality) VALUES (?, ?, ?)",
).run("Bob", 24, "USA");
let rows = db.prepare("SELECT * FROM users").iterate();
for (const row of rows) {
t.truthy(row.name);
t.truthy(row.nationality);
t.true(typeof row.age === "number");
}
},
);
inMemoryTest.both(
"Empty prepared statement should throw the correct error",
async (t) => {
const db = t.context.db;
t.throws(
() => {
db.prepare("");
},
{
instanceOf: RangeError,
message: "The supplied SQL string contains no statements",
},
);
},
);
inMemoryTest.both("Test pragma()", async (t) => {
const db = t.context.db;
t.deepEqual(typeof db.pragma("cache_size")[0].cache_size, "number");
t.deepEqual(typeof db.pragma("cache_size", { simple: true }), "number");
});
inMemoryTest.both("pragma query", async (t) => {
const db = t.context.db;
let page_size = db.pragma("page_size");
let expectedValue = [{ page_size: 4096 }];
t.deepEqual(page_size, expectedValue);
});
inMemoryTest.both("pragma table_list", async (t) => {
const db = t.context.db;
let param = "sqlite_schema";
let actual = db.pragma(`table_info(${param})`);
let expectedValue = [
{ cid: 0, name: "type", type: "TEXT", notnull: 0, dflt_value: null, pk: 0 },
{ cid: 1, name: "name", type: "TEXT", notnull: 0, dflt_value: null, pk: 0 },
{
cid: 2,
name: "tbl_name",
type: "TEXT",
notnull: 0,
dflt_value: null,
pk: 0,
},
{
cid: 3,
name: "rootpage",
type: "INT",
notnull: 0,
dflt_value: null,
pk: 0,
},
{ cid: 4, name: "sql", type: "TEXT", notnull: 0, dflt_value: null, pk: 0 },
];
t.deepEqual(actual, expectedValue);
});
inMemoryTest.both("simple pragma table_list", async (t) => {
const db = t.context.db;
let param = "sqlite_schema";
let actual = db.pragma(`table_info(${param})`, { simple: true });
let expectedValue = 0;
t.deepEqual(actual, expectedValue);
});
inMemoryTest.onlySqlitePasses(
"Statement shouldn't bind twice with bind()",
async (t) => {
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, age INTEGER)").run();
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Alice", 42);
let stmt = db.prepare("SELECT * FROM users WHERE name = ?").bind("Alice");
let row = stmt.get();
t.truthy(row.name);
t.true(typeof row.age === "number");
t.throws(
() => {
stmt.bind("Bob");
},
{
instanceOf: TypeError,
message:
"The bind() method can only be invoked once per statement object",
},
);
},
);
inMemoryTest.both(
"Test pluck(): Rows should only have the values of the first column",
async (t) => {
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, age INTEGER)").run();
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Alice", 42);
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Bob", 24);
let stmt = db.prepare("SELECT * FROM users").pluck();
for (const row of stmt.all()) {
t.truthy(row);
t.assert(typeof row === "string");
}
},
);
inMemoryTest.both(
"Test raw(): Rows should be returned as arrays",
async (t) => {
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, age INTEGER)").run();
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Alice", 42);
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Bob", 24);
let stmt = db.prepare("SELECT * FROM users").raw();
for (const row of stmt.all()) {
t.true(Array.isArray(row));
t.true(typeof row[0] === "string");
t.true(typeof row[1] === "number");
}
stmt = db.prepare("SELECT * FROM users WHERE name = ?").raw();
const row = stmt.get("Alice");
t.true(Array.isArray(row));
t.is(row.length, 2);
t.is(row[0], "Alice");
t.is(row[1], 42);
const noRow = stmt.get("Charlie");
t.is(noRow, undefined);
stmt = db.prepare("SELECT * FROM users").raw();
const rows = stmt.all();
t.true(Array.isArray(rows));
t.is(rows.length, 2);
t.deepEqual(rows[0], ["Alice", 42]);
t.deepEqual(rows[1], ["Bob", 24]);
},
);
inMemoryTest.onlySqlitePasses(
"Test expand(): Columns should be namespaced",
async (t) => {
const expandedResults = [
{
users: {
name: "Alice",
type: "premium",
},
addresses: {
userName: "Alice",
type: "home",
street: "Alice's street",
},
},
{
users: {
name: "Bob",
type: "basic",
},
addresses: {
userName: "Bob",
type: "work",
street: "Bob's street",
},
},
];
let regularResults = [
{
name: "Alice",
street: "Alice's street",
type: "home",
userName: "Alice",
},
{
name: "Bob",
street: "Bob's street",
type: "work",
userName: "Bob",
},
];
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, type TEXT)").run();
db.prepare(
"CREATE TABLE addresses (userName TEXT, street TEXT, type TEXT)",
).run();
db.prepare("INSERT INTO users (name, type) VALUES (?, ?)").run(
"Alice",
"premium",
);
db.prepare("INSERT INTO users (name, type) VALUES (?, ?)").run(
"Bob",
"basic",
);
db.prepare(
"INSERT INTO addresses (userName, street, type) VALUES (?, ?, ?)",
).run("Alice", "Alice's street", "home");
db.prepare(
"INSERT INTO addresses (userName, street, type) VALUES (?, ?, ?)",
).run("Bob", "Bob's street", "work");
let allRows = db
.prepare(
"SELECT * FROM users u JOIN addresses a ON (u.name = a.userName)",
)
.expand(true)
.all();
t.deepEqual(allRows, expandedResults);
allRows = db
.prepare(
"SELECT * FROM users u JOIN addresses a ON (u.name = a.userName)",
)
.expand()
.all();
t.deepEqual(allRows, expandedResults);
allRows = db
.prepare(
"SELECT * FROM users u JOIN addresses a ON (u.name = a.userName)",
)
.expand(false)
.all();
t.deepEqual(allRows, regularResults);
},
);
inMemoryTest.both(
"Presentation modes should be mutually exclusive",
async (t) => {
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, age INTEGER)").run();
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Alice", 42);
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Bob", 24);
// test raw()
let stmt = db.prepare("SELECT * FROM users").pluck().raw();
for (const row of stmt.all()) {
t.true(Array.isArray(row));
t.true(typeof row[0] === "string");
t.true(typeof row[1] === "number");
}
stmt = db.prepare("SELECT * FROM users WHERE name = ?").raw();
const row = stmt.get("Alice");
t.true(Array.isArray(row));
t.is(row.length, 2);
t.is(row[0], "Alice");
t.is(row[1], 42);
const noRow = stmt.get("Charlie");
t.is(noRow, undefined);
stmt = db.prepare("SELECT * FROM users").raw();
let rows = stmt.all();
t.true(Array.isArray(rows));
t.is(rows.length, 2);
t.deepEqual(rows[0], ["Alice", 42]);
t.deepEqual(rows[1], ["Bob", 24]);
// test pluck()
stmt = db.prepare("SELECT * FROM users").raw().pluck();
for (const name of stmt.all()) {
t.truthy(name);
t.assert(typeof name === "string");
}
},
);
inMemoryTest.onlySqlitePasses(
"Presentation mode 'expand' should be mutually exclusive",
async (t) => {
// this test can be appended to the previous one when 'expand' is implemented in Turso
const db = t.context.db;
db.prepare("CREATE TABLE users (name TEXT, age INTEGER)").run();
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Alice", 42);
db.prepare("INSERT INTO users (name, age) VALUES (?, ?)").run("Bob", 24);
let stmt = db.prepare("SELECT * FROM users").pluck().raw();
// test expand()
stmt = db.prepare("SELECT * FROM users").raw().pluck().expand();
const rows = stmt.all();
t.true(Array.isArray(rows));
t.is(rows.length, 2);
t.deepEqual(rows[0], { users: { name: "Alice", age: 42 } });
t.deepEqual(rows[1], { users: { name: "Bob", age: 24 } });
},
);
inMemoryTest.both(
"Test exec(): Should correctly load multiple statements from file",
async (t) => {
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const db = t.context.db;
const file = fs.readFileSync(
path.resolve(__dirname, "./artifacts/basic-test.sql"),
"utf8",
);
db.exec(file);
let rows = db.prepare("SELECT * FROM users").all();
for (const row of rows) {
t.truthy(row.name);
t.true(typeof row.age === "number");
}
},
);
inMemoryTest.both(
"Test Statement.database gets the database object",
async (t) => {
const db = t.context.db;
let stmt = db.prepare("SELECT 1");
t.is(stmt.database, db);
},
);
inMemoryTest.both("Test Statement.source", async (t) => {
const db = t.context.db;
let sql = "CREATE TABLE t (id int)";
let stmt = db.prepare(sql);
t.is(stmt.source, sql);
});

View File

@@ -1,82 +0,0 @@
import avaTest from "ava";
import turso from "../sync.js";
import sqlite from "better-sqlite3";
class DualTest {
#libs = { turso, sqlite };
#beforeEaches = [];
#pathFn;
#options;
constructor(path_opt, options = {}) {
if (typeof path_opt === 'function') {
this.#pathFn = path_opt;
} else {
this.#pathFn = () => path_opt ?? "hello.db";
}
this.#options = options;
}
beforeEach(fn) {
this.#beforeEaches.push(fn);
}
only(name, impl, ...rest) {
avaTest.serial.only('[TESTING TURSO] ' + name, this.#wrap('turso', impl), ...rest);
avaTest.serial.only('[TESTING BETTER-SQLITE3] ' + name, this.#wrap('sqlite', impl), ...rest);
}
onlySqlitePasses(name, impl, ...rest) {
avaTest.serial.failing('[TESTING TURSO] ' + name, this.#wrap('turso', impl), ...rest);
avaTest.serial('[TESTING BETTER-SQLITE3] ' + name, this.#wrap('sqlite', impl), ...rest);
}
both(name, impl, ...rest) {
avaTest.serial('[TESTING TURSO] ' + name, this.#wrap('turso', impl), ...rest);
avaTest.serial('[TESTING BETTER-SQLITE3] ' + name, this.#wrap('sqlite', impl), ...rest);
}
skip(name, impl, ...rest) {
avaTest.serial.skip('[TESTING TURSO] ' + name, this.#wrap('turso', impl), ...rest);
avaTest.serial.skip('[TESTING BETTER-SQLITE3] ' + name, this.#wrap('sqlite', impl), ...rest);
}
async #runBeforeEach(t) {
for (const beforeEach of this.#beforeEaches) {
await beforeEach(t);
}
}
#wrap(provider, fn) {
return async (t, ...rest) => {
const path = this.#pathFn();
const Lib = this.#libs[provider];
const db = this.#connect(Lib, path, this.#options)
t.context = {
...t,
connect: this.#curry(this.#connect)(Lib),
db,
errorType: Lib.SqliteError,
path,
provider,
};
t.teardown(() => db.close());
await this.#runBeforeEach(t);
await fn(t, ...rest);
};
}
#connect(constructor, path, options) {
return new constructor(path, options);
}
#curry(fn) {
return (first) => (...rest) => fn(first, ...rest);
}
}
export default DualTest;

View File

@@ -1,530 +0,0 @@
import crypto from "crypto";
import fs from "fs";
import DualTest from "./dual-test.mjs";
const dualTest = new DualTest();
new DualTest(":memory:").both("Open in-memory database", async (t) => {
const db = t.context.db;
t.is(db.memory, true);
});
dualTest.beforeEach(async (t) => {
const db = t.context.db;
db.exec(`
DROP TABLE IF EXISTS users;
CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)
`);
db.exec(
"INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'alice@example.org')",
);
db.exec(
"INSERT INTO users (id, name, email) VALUES (2, 'Bob', 'bob@example.com')",
);
});
dualTest.onlySqlitePasses("Statement.prepare() error", async (t) => {
const db = t.context.db;
t.throws(
() => {
return db.prepare("SYNTAX ERROR");
},
{
any: true,
instanceOf: t.context.errorType,
message: 'near "SYNTAX": syntax error',
},
);
});
dualTest.both("Statement.run() returning rows", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT 1");
const info = stmt.run();
t.is(info.changes, 0);
});
dualTest.both("Statement.run() [positional]", async (t) => {
const db = t.context.db;
const stmt = db.prepare("INSERT INTO users(name, email) VALUES (?, ?)");
const info = stmt.run(["Carol", "carol@example.net"]);
t.is(info.changes, 1);
t.is(info.lastInsertRowid, 3);
// Verify that the data is inserted
const stmt2 = db.prepare("SELECT * FROM users WHERE id = 3");
t.is(stmt2.get().name, "Carol");
t.is(stmt2.get().email, "carol@example.net");
});
dualTest.both("Statement.run() [named]", async (t) => {
const db = t.context.db;
const stmt = db.prepare(
"INSERT INTO users(name, email) VALUES (@name, @email);",
);
const info = stmt.run({ name: "Carol", email: "carol@example.net" });
t.is(info.changes, 1);
t.is(info.lastInsertRowid, 3);
});
dualTest.both("Statement.get() returns no rows", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users WHERE id = 0");
t.is(stmt.get(), undefined);
});
dualTest.both("Statement.get() [no parameters]", async (t) => {
const db = t.context.db;
var stmt = 0;
stmt = db.prepare("SELECT * FROM users");
t.is(stmt.get().name, "Alice");
t.deepEqual(stmt.raw().get(), [1, "Alice", "alice@example.org"]);
});
dualTest.both("Statement.get() [positional]", async (t) => {
const db = t.context.db;
var stmt = 0;
stmt = db.prepare("SELECT * FROM users WHERE id = ?");
t.is(stmt.get(0), undefined);
t.is(stmt.get([0]), undefined);
t.is(stmt.get(1).name, "Alice");
t.is(stmt.get(2).name, "Bob");
stmt = db.prepare("SELECT * FROM users WHERE id = ?1");
t.is(stmt.get({ 1: 0 }), undefined);
t.is(stmt.get({ 1: 1 }).name, "Alice");
t.is(stmt.get({ 1: 2 }).name, "Bob");
});
dualTest.both("Statement.get() [named]", async (t) => {
const db = t.context.db;
var stmt = undefined;
stmt = db.prepare("SELECT :b, :a");
t.deepEqual(stmt.raw().get({ a: "a", b: "b" }), ["b", "a"]);
stmt = db.prepare("SELECT * FROM users WHERE id = :id");
t.is(stmt.get({ id: 0 }), undefined);
t.is(stmt.get({ id: 1 }).name, "Alice");
t.is(stmt.get({ id: 2 }).name, "Bob");
stmt = db.prepare("SELECT * FROM users WHERE id = @id");
t.is(stmt.get({ id: 0 }), undefined);
t.is(stmt.get({ id: 1 }).name, "Alice");
t.is(stmt.get({ id: 2 }).name, "Bob");
stmt = db.prepare("SELECT * FROM users WHERE id = $id");
t.is(stmt.get({ id: 0 }), undefined);
t.is(stmt.get({ id: 1 }).name, "Alice");
t.is(stmt.get({ id: 2 }).name, "Bob");
});
dualTest.both("Statement.get() [raw]", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users WHERE id = ?");
t.deepEqual(stmt.raw().get(1), [1, "Alice", "alice@example.org"]);
});
dualTest.onlySqlitePasses("Statement.iterate() [empty]", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users WHERE id = 0");
t.is(stmt.iterate().next().done, true);
t.is(stmt.iterate([]).next().done, true);
t.is(stmt.iterate({}).next().done, true);
});
dualTest.onlySqlitePasses("Statement.iterate()", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [1, 2];
var idx = 0;
for (const row of stmt.iterate()) {
t.is(row.id, expected[idx++]);
}
});
dualTest.both("Statement.all()", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [
{ id: 1, name: "Alice", email: "alice@example.org" },
{ id: 2, name: "Bob", email: "bob@example.com" },
];
t.deepEqual(stmt.all(), expected);
});
dualTest.both("Statement.all() [raw]", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [
[1, "Alice", "alice@example.org"],
[2, "Bob", "bob@example.com"],
];
t.deepEqual(stmt.raw().all(), expected);
});
dualTest.both("Statement.all() [pluck]", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [1, 2];
t.deepEqual(stmt.pluck().all(), expected);
});
dualTest.both(
"Statement.raw() [passing false should disable raw mode]",
async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [
{ id: 1, name: "Alice", email: "alice@example.org" },
{ id: 2, name: "Bob", email: "bob@example.com" },
];
t.deepEqual(stmt.raw(false).all(), expected);
},
);
dualTest.both(
"Statement.pluck() [passing false should disable pluck mode]",
async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
const expected = [
{ id: 1, name: "Alice", email: "alice@example.org" },
{ id: 2, name: "Bob", email: "bob@example.com" },
];
t.deepEqual(stmt.pluck(false).all(), expected);
},
);
dualTest.onlySqlitePasses(
"Statement.all() [default safe integers]",
async (t) => {
const db = t.context.db;
db.defaultSafeIntegers();
const stmt = db.prepare("SELECT * FROM users");
const expected = [
[1n, "Alice", "alice@example.org"],
[2n, "Bob", "bob@example.com"],
];
t.deepEqual(stmt.raw().all(), expected);
},
);
dualTest.onlySqlitePasses(
"Statement.all() [statement safe integers]",
async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT * FROM users");
stmt.safeIntegers();
const expected = [
[1n, "Alice", "alice@example.org"],
[2n, "Bob", "bob@example.com"],
];
t.deepEqual(stmt.raw().all(), expected);
},
);
dualTest.onlySqlitePasses("Statement.raw() [failure]", async (t) => {
const db = t.context.db;
const stmt = db.prepare(
"INSERT INTO users (id, name, email) VALUES (?, ?, ?)",
);
await t.throws(
() => {
stmt.raw();
},
{
message: "The raw() method is only for statements that return data",
},
);
});
dualTest.onlySqlitePasses(
"Statement.run() with array bind parameter",
async (t) => {
const db = t.context.db;
db.exec(`
DROP TABLE IF EXISTS t;
CREATE TABLE t (value BLOB);
`);
const array = [1, 2, 3];
const insertStmt = db.prepare("INSERT INTO t (value) VALUES (?)");
await t.throws(
() => {
insertStmt.run([array]);
},
{
message:
"SQLite3 can only bind numbers, strings, bigints, buffers, and null",
},
);
},
);
dualTest.onlySqlitePasses(
"Statement.run() with Float32Array bind parameter",
async (t) => {
const db = t.context.db;
db.exec(`
DROP TABLE IF EXISTS t;
CREATE TABLE t (value BLOB);
`);
const array = new Float32Array([1, 2, 3]);
const insertStmt = db.prepare("INSERT INTO t (value) VALUES (?)");
insertStmt.run([array]);
const selectStmt = db.prepare("SELECT value FROM t");
t.deepEqual(selectStmt.raw().get()[0], Buffer.from(array.buffer));
},
);
/// This test is not supported by better-sqlite3, but is supported by libsql.
/// Therefore, when implementing it in Turso, only enable the test for Turso.
dualTest.skip(
"Statement.run() for vector feature with Float32Array bind parameter",
async (t) => {
const db = t.context.db;
db.exec(`
DROP TABLE IF EXISTS t;
CREATE TABLE t (embedding FLOAT32(8));
CREATE INDEX t_idx ON t ( libsql_vector_idx(embedding) );
`);
const insertStmt = db.prepare("INSERT INTO t VALUES (?)");
insertStmt.run([new Float32Array([1, 1, 1, 1, 1, 1, 1, 1])]);
insertStmt.run([new Float32Array([-1, -1, -1, -1, -1, -1, -1, -1])]);
const selectStmt = db.prepare(
"SELECT embedding FROM vector_top_k('t_idx', vector('[2,2,2,2,2,2,2,2]'), 1) n JOIN t ON n.rowid = t.rowid",
);
t.deepEqual(
selectStmt.raw().get()[0],
Buffer.from(new Float32Array([1, 1, 1, 1, 1, 1, 1, 1]).buffer),
);
// we need to explicitly delete this table because later when sqlite-based (not LibSQL) tests will delete table 't' they will leave 't_idx_shadow' table untouched
db.exec(`DROP TABLE t`);
},
);
dualTest.onlySqlitePasses("Statement.columns()", async (t) => {
const db = t.context.db;
var stmt = undefined;
stmt = db.prepare("SELECT 1");
t.deepEqual(stmt.columns(), [
{
column: null,
database: null,
name: "1",
table: null,
type: null,
},
]);
stmt = db.prepare("SELECT * FROM users WHERE id = ?");
t.deepEqual(stmt.columns(), [
{
column: "id",
database: "main",
name: "id",
table: "users",
type: "INTEGER",
},
{
column: "name",
database: "main",
name: "name",
table: "users",
type: "TEXT",
},
{
column: "email",
database: "main",
name: "email",
table: "users",
type: "TEXT",
},
]);
});
dualTest.onlySqlitePasses("Database.transaction()", async (t) => {
const db = t.context.db;
const insert = db.prepare(
"INSERT INTO users(name, email) VALUES (:name, :email)",
);
const insertMany = db.transaction((users) => {
t.is(db.inTransaction, true);
for (const user of users) insert.run(user);
});
t.is(db.inTransaction, false);
insertMany([
{ name: "Joey", email: "joey@example.org" },
{ name: "Sally", email: "sally@example.org" },
{ name: "Junior", email: "junior@example.org" },
]);
t.is(db.inTransaction, false);
const stmt = db.prepare("SELECT * FROM users WHERE id = ?");
t.is(stmt.get(3).name, "Joey");
t.is(stmt.get(4).name, "Sally");
t.is(stmt.get(5).name, "Junior");
});
dualTest.onlySqlitePasses("Database.transaction().immediate()", async (t) => {
const db = t.context.db;
const insert = db.prepare(
"INSERT INTO users(name, email) VALUES (:name, :email)",
);
const insertMany = db.transaction((users) => {
t.is(db.inTransaction, true);
for (const user of users) insert.run(user);
});
t.is(db.inTransaction, false);
insertMany.immediate([
{ name: "Joey", email: "joey@example.org" },
{ name: "Sally", email: "sally@example.org" },
{ name: "Junior", email: "junior@example.org" },
]);
t.is(db.inTransaction, false);
});
dualTest.onlySqlitePasses("values", async (t) => {
const db = t.context.db;
const stmt = db.prepare("SELECT ?").raw();
t.deepEqual(stmt.get(1), [1]);
t.deepEqual(stmt.get(Number.MIN_VALUE), [Number.MIN_VALUE]);
t.deepEqual(stmt.get(Number.MAX_VALUE), [Number.MAX_VALUE]);
t.deepEqual(stmt.get(Number.MAX_SAFE_INTEGER), [Number.MAX_SAFE_INTEGER]);
t.deepEqual(stmt.get(9007199254740991n), [9007199254740991]);
});
dualTest.both("Database.pragma()", async (t) => {
const db = t.context.db;
db.pragma("cache_size = 2000");
t.deepEqual(db.pragma("cache_size"), [{ cache_size: 2000 }]);
});
dualTest.both("errors", async (t) => {
const db = t.context.db;
const syntaxError = await t.throws(
() => {
db.exec("SYNTAX ERROR");
},
{
any: true,
instanceOf: t.context.errorType,
message: /near "SYNTAX": syntax error/,
code: "SQLITE_ERROR",
},
);
const noTableError = await t.throws(
() => {
db.exec("SELECT * FROM missing_table");
},
{
any: true,
instanceOf: t.context.errorType,
message:
/(Parse error: Table missing_table not found|no such table: missing_table)/,
code: "SQLITE_ERROR",
},
);
if (t.context.provider === "libsql") {
t.is(noTableError.rawCode, 1);
t.is(syntaxError.rawCode, 1);
}
});
dualTest.onlySqlitePasses("Database.prepare() after close()", async (t) => {
const db = t.context.db;
db.close();
t.throws(
() => {
db.prepare("SELECT 1");
},
{
instanceOf: TypeError,
message: "The database connection is not open",
},
);
});
dualTest.onlySqlitePasses("Database.exec() after close()", async (t) => {
const db = t.context.db;
db.close();
t.throws(
() => {
db.exec("SELECT 1");
},
{
instanceOf: TypeError,
message: "The database connection is not open",
},
);
});
/// Generate a unique database filename
const genDatabaseFilename = () => {
return `test-${crypto.randomBytes(8).toString("hex")}.db`;
};
new DualTest(genDatabaseFilename).onlySqlitePasses(
"Timeout option",
async (t) => {
t.teardown(() => fs.unlinkSync(t.context.path));
const timeout = 1000;
const { db: conn1 } = t.context;
conn1.exec("CREATE TABLE t(x)");
conn1.exec("BEGIN IMMEDIATE");
conn1.exec("INSERT INTO t VALUES (1)");
const options = { timeout };
const conn2 = t.context.connect(t.context.path, options);
const start = Date.now();
try {
conn2.exec("INSERT INTO t VALUES (1)");
} catch (e) {
t.is(e.code, "SQLITE_BUSY");
const end = Date.now();
const elapsed = end - start;
// Allow some tolerance for the timeout.
t.is(elapsed > timeout / 2, true);
}
conn1.close();
conn2.close();
},
);

View File

@@ -0,0 +1,70 @@
// Bind parameters to a statement.
//
// This function is used to bind parameters to a statement. It supports both
// named and positional parameters, and nested arrays.
//
// The `stmt` parameter is a statement object.
// The `params` parameter is an array of parameters.
//
// The function returns void.
function bindParams(stmt, params) {
const len = params?.length;
if (len === 0) {
return;
}
if (len === 1) {
const param = params[0];
if (isPlainObject(param)) {
bindNamedParams(stmt, param);
return;
}
bindValue(stmt, 1, param);
return;
}
bindPositionalParams(stmt, params);
}
// Check if object is plain (no prototype chain)
function isPlainObject(obj) {
if (!obj || typeof obj !== 'object') return false;
const proto = Object.getPrototypeOf(obj);
return proto === Object.prototype || proto === null;
}
// Handle named parameters
function bindNamedParams(stmt, paramObj) {
const paramCount = stmt.parameterCount();
for (let i = 1; i <= paramCount; i++) {
const paramName = stmt.parameterName(i);
if (paramName) {
const key = paramName.substring(1); // Remove ':' or '$' prefix
const value = paramObj[key];
if (value !== undefined) {
bindValue(stmt, i, value);
}
}
}
}
// Handle positional parameters (including nested arrays)
function bindPositionalParams(stmt, params) {
let bindIndex = 1;
for (let i = 0; i < params.length; i++) {
const param = params[i];
if (Array.isArray(param)) {
for (let j = 0; j < param.length; j++) {
bindValue(stmt, bindIndex++, param[j]);
}
} else {
bindValue(stmt, bindIndex++, param);
}
}
}
function bindValue(stmt, index, value) {
stmt.bindAt(index, value);
}
module.exports = { bindParams };

View File

@@ -1,46 +1,110 @@
/* auto-generated by NAPI-RS */
/* eslint-disable */
/** A database connection. */
export declare class Database {
memory: boolean
readonly: boolean
open: boolean
name: string
constructor(path: string, options?: OpenDatabaseOptions | undefined | null)
/**
* Creates a new database instance.
*
* # Arguments
* * `path` - The path to the database file.
*/
constructor(path: string)
/** Returns whether the database is in memory-only mode. */
get memory(): boolean
/**
* Executes a batch of SQL statements.
*
* # Arguments
*
* * `sql` - The SQL statements to execute.
*
* # Returns
*/
batch(sql: string): void
/**
* Prepares a statement for execution.
*
* # Arguments
*
* * `sql` - The SQL statement to prepare.
*
* # Returns
*
* A `Statement` instance.
*/
prepare(sql: string): Statement
pragma(pragmaName: string, options?: PragmaOptions | undefined | null): unknown
backup(): void
serialize(): void
function(): void
aggregate(): void
table(): void
loadExtension(path: string): void
exec(sql: string): void
/**
* Returns the rowid of the last row inserted.
*
* # Returns
*
* The rowid of the last row inserted.
*/
lastInsertRowid(): number
/**
* Returns the number of changes made by the last statement.
*
* # Returns
*
* The number of changes made by the last statement.
*/
changes(): number
/**
* Returns the total number of changes made by all statements.
*
* # Returns
*
* The total number of changes made by all statements.
*/
totalChanges(): number
/**
* Closes the database connection.
*
* # Returns
*
* `Ok(())` if the database is closed successfully.
*/
close(): void
/** Runs the I/O loop synchronously. */
ioLoopSync(): void
/** Runs the I/O loop asynchronously, returning a Promise. */
ioLoopAsync(): Promise<void>
}
/** A prepared statement. */
export declare class Statement {
source: string
get(args?: Array<unknown> | undefined | null): unknown
run(args?: Array<unknown> | undefined | null): RunResult
all(args?: Array<unknown> | undefined | null): unknown
pluck(pluck?: boolean | undefined | null): void
static expand(): void
reset(): void
/** Returns the number of parameters in the statement. */
parameterCount(): number
/**
* Returns the name of a parameter at a specific 1-based index.
*
* # Arguments
*
* * `index` - The 1-based parameter index.
*/
parameterName(index: number): string | null
/**
* Binds a parameter at a specific 1-based index with explicit type.
*
* # Arguments
*
* * `index` - The 1-based parameter index.
* * `value_type` - The type constant (0=null, 1=int, 2=float, 3=text, 4=blob).
* * `value` - The value to bind.
*/
bindAt(index: number, value: unknown): void
/**
* Step the statement and return result code:
* 1 = Row available, 2 = Done, 3 = I/O needed
*/
step(): number
/** Get the current row data according to the presentation mode */
row(): unknown
/** Sets the presentation mode to raw. */
raw(raw?: boolean | undefined | null): void
static columns(): void
bind(args?: Array<unknown> | undefined | null): Statement
}
export interface OpenDatabaseOptions {
readonly?: boolean
fileMustExist?: boolean
timeout?: number
}
export interface PragmaOptions {
simple: boolean
}
export interface RunResult {
changes: number
lastInsertRowid: number
/** Sets the presentation mode to pluck. */
pluck(pluck?: boolean | undefined | null): void
/** Finalizes the statement. */
finalize(): void
}

View File

@@ -1,12 +1,12 @@
{
"name": "@tursodatabase/turso",
"version": "0.1.3",
"version": "0.1.4-pre.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@tursodatabase/turso",
"version": "0.1.3",
"version": "0.1.4-pre.1",
"license": "MIT",
"dependencies": {
"@napi-rs/wasm-runtime": "^1.0.1"

View File

@@ -1,6 +1,6 @@
{
"name": "@tursodatabase/turso",
"version": "0.1.3",
"version": "0.1.4-pre.1",
"repository": {
"type": "git",
"url": "https://github.com/tursodatabase/turso"
@@ -12,6 +12,7 @@
"./sync": "./sync.js"
},
"files": [
"bind.js",
"browser.js",
"index.js",
"promise.js",
@@ -46,7 +47,7 @@
"build": "napi build --platform --release",
"build:debug": "napi build --platform",
"prepublishOnly": "napi prepublish -t npm",
"test": "ava -s",
"test": "true",
"universal": "napi universalize",
"version": "napi version"
},

487
bindings/javascript/perf/package-lock.json generated Normal file
View File

@@ -0,0 +1,487 @@
{
"name": "turso-perf",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "turso-perf",
"dependencies": {
"@tursodatabase/turso": "..",
"better-sqlite3": "^9.5.0",
"mitata": "^0.1.11"
}
},
"..": {
"name": "@tursodatabase/turso",
"version": "0.1.3",
"license": "MIT",
"devDependencies": {
"@napi-rs/cli": "^3.0.4",
"@napi-rs/wasm-runtime": "^1.0.1",
"ava": "^6.0.1",
"better-sqlite3": "^11.9.1"
},
"engines": {
"node": ">= 10"
}
},
"node_modules/@tursodatabase/turso": {
"resolved": "..",
"link": true
},
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT"
},
"node_modules/better-sqlite3": {
"version": "9.6.0",
"resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-9.6.0.tgz",
"integrity": "sha512-yR5HATnqeYNVnkaUTf4bOP2dJSnyhP4puJN/QPRyx4YkBEEUxib422n2XzPqDEHjQQqazoYoADdAm5vE15+dAQ==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"bindings": "^1.5.0",
"prebuild-install": "^7.1.1"
}
},
"node_modules/bindings": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
"integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
"license": "MIT",
"dependencies": {
"file-uri-to-path": "1.0.0"
}
},
"node_modules/bl": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
"license": "MIT",
"dependencies": {
"buffer": "^5.5.0",
"inherits": "^2.0.4",
"readable-stream": "^3.4.0"
}
},
"node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT",
"dependencies": {
"base64-js": "^1.3.1",
"ieee754": "^1.1.13"
}
},
"node_modules/chownr": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
"license": "ISC"
},
"node_modules/decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"license": "MIT",
"dependencies": {
"mimic-response": "^3.1.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/deep-extend": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
"license": "MIT",
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/detect-libc": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz",
"integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==",
"license": "Apache-2.0",
"engines": {
"node": ">=8"
}
},
"node_modules/end-of-stream": {
"version": "1.4.5",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
"license": "MIT",
"dependencies": {
"once": "^1.4.0"
}
},
"node_modules/expand-template": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
"integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
"license": "(MIT OR WTFPL)",
"engines": {
"node": ">=6"
}
},
"node_modules/file-uri-to-path": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
"integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
"license": "MIT"
},
"node_modules/fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
"license": "MIT"
},
"node_modules/github-from-package": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
"integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
"license": "MIT"
},
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "BSD-3-Clause"
},
"node_modules/inherits": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
"license": "ISC"
},
"node_modules/ini": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
"license": "ISC"
},
"node_modules/mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
"license": "MIT",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/mitata": {
"version": "0.1.14",
"resolved": "https://registry.npmjs.org/mitata/-/mitata-0.1.14.tgz",
"integrity": "sha512-8kRs0l636eT4jj68PFXOR2D5xl4m56T478g16SzUPOYgkzQU+xaw62guAQxzBPm+SXb15GQi1cCpDxJfkr4CSA==",
"license": "MIT"
},
"node_modules/mkdirp-classic": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==",
"license": "MIT"
},
"node_modules/napi-build-utils": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
"integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==",
"license": "MIT"
},
"node_modules/node-abi": {
"version": "3.75.0",
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.75.0.tgz",
"integrity": "sha512-OhYaY5sDsIka7H7AtijtI9jwGYLyl29eQn/W623DiN/MIv5sUqc4g7BIDThX+gb7di9f6xK02nkp8sdfFWZLTg==",
"license": "MIT",
"dependencies": {
"semver": "^7.3.5"
},
"engines": {
"node": ">=10"
}
},
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"license": "ISC",
"dependencies": {
"wrappy": "1"
}
},
"node_modules/prebuild-install": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
"integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==",
"license": "MIT",
"dependencies": {
"detect-libc": "^2.0.0",
"expand-template": "^2.0.3",
"github-from-package": "0.0.0",
"minimist": "^1.2.3",
"mkdirp-classic": "^0.5.3",
"napi-build-utils": "^2.0.0",
"node-abi": "^3.3.0",
"pump": "^3.0.0",
"rc": "^1.2.7",
"simple-get": "^4.0.0",
"tar-fs": "^2.0.0",
"tunnel-agent": "^0.6.0"
},
"bin": {
"prebuild-install": "bin.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/pump": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
"integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
"license": "MIT",
"dependencies": {
"end-of-stream": "^1.1.0",
"once": "^1.3.1"
}
},
"node_modules/rc": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
"integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
"license": "(BSD-2-Clause OR MIT OR Apache-2.0)",
"dependencies": {
"deep-extend": "^0.6.0",
"ini": "~1.3.0",
"minimist": "^1.2.0",
"strip-json-comments": "~2.0.1"
},
"bin": {
"rc": "cli.js"
}
},
"node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"license": "MIT",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT"
},
"node_modules/semver": {
"version": "7.7.2",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
"integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
"license": "ISC",
"bin": {
"semver": "bin/semver.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/simple-concat": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
"integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT"
},
"node_modules/simple-get": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
"integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT",
"dependencies": {
"decompress-response": "^6.0.0",
"once": "^1.3.1",
"simple-concat": "^1.0.0"
}
},
"node_modules/string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"license": "MIT",
"dependencies": {
"safe-buffer": "~5.2.0"
}
},
"node_modules/strip-json-comments": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
"integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/tar-fs": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.3.tgz",
"integrity": "sha512-090nwYJDmlhwFwEW3QQl+vaNnxsO2yVsd45eTKRBzSzu+hlb1w2K9inVq5b0ngXuLVqQ4ApvsUHHnu/zQNkWAg==",
"license": "MIT",
"dependencies": {
"chownr": "^1.1.1",
"mkdirp-classic": "^0.5.2",
"pump": "^3.0.0",
"tar-stream": "^2.1.4"
}
},
"node_modules/tar-stream": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
"license": "MIT",
"dependencies": {
"bl": "^4.0.3",
"end-of-stream": "^1.4.1",
"fs-constants": "^1.0.0",
"inherits": "^2.0.3",
"readable-stream": "^3.1.1"
},
"engines": {
"node": ">=6"
}
},
"node_modules/tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
"license": "Apache-2.0",
"dependencies": {
"safe-buffer": "^5.0.1"
},
"engines": {
"node": "*"
}
},
"node_modules/util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
"license": "MIT"
},
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC"
}
}
}

View File

@@ -0,0 +1,10 @@
{
"name": "turso-perf",
"type": "module",
"private": true,
"dependencies": {
"better-sqlite3": "^9.5.0",
"@tursodatabase/turso": "..",
"mitata": "^0.1.11"
}
}

View File

@@ -0,0 +1,34 @@
import { run, bench, group, baseline } from 'mitata';
import Database from 'better-sqlite3';
const db = new Database(':memory:');
db.exec("CREATE TABLE users (id INTEGER, name TEXT, email TEXT)");
db.exec("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'alice@example.org')");
const stmtSelect = db.prepare("SELECT * FROM users WHERE id = ?");
const rawStmtSelect = db.prepare("SELECT * FROM users WHERE id = ?").raw();
const stmtInsert = db.prepare("INSERT INTO users (id, name, email) VALUES (?, ?, ?)");
bench('Statement.get() with bind parameters [expanded]', () => {
stmtSelect.get(1);
});
bench('Statement.git() with bind parameters [raw]', () => {
rawStmtSelect.get(1);
});
bench('Statement.run() with bind parameters', () => {
stmtInsert.run([1, 'foobar', 'foobar@example.com']);
});
await run({
units: false,
silent: false,
avg: true,
json: false,
colors: true,
min_max: true,
percentiles: true,
});

View File

@@ -0,0 +1,34 @@
import { run, bench, group, baseline } from 'mitata';
import Database from '@tursodatabase/turso';
const db = new Database(':memory:');
db.exec("CREATE TABLE users (id INTEGER, name TEXT, email TEXT)");
db.exec("INSERT INTO users (id, name, email) VALUES (1, 'Alice', 'alice@example.org')");
const stmtSelect = db.prepare("SELECT * FROM users WHERE id = ?");
const rawStmtSelect = db.prepare("SELECT * FROM users WHERE id = ?").raw();
const stmtInsert = db.prepare("INSERT INTO users (id, name, email) VALUES (?, ?, ?)");
bench('Statement.get() with bind parameters [expanded]', () => {
stmtSelect.get(1);
});
bench('Statement.get() with bind parameters [raw]', () => {
rawStmtSelect.get(1);
});
bench('Statement.run() with bind parameters', () => {
stmtInsert.run([1, 'foobar', 'foobar@example.com']);
});
await run({
units: false,
silent: false,
avg: true,
json: false,
colors: true,
min_max: true,
percentiles: true,
});

View File

@@ -1,9 +1,15 @@
"use strict";
const { Database: NativeDB } = require("./index.js");
const { bindParams } = require("./bind.js");
const SqliteError = require("./sqlite-error.js");
// Step result constants
const STEP_ROW = 1;
const STEP_DONE = 2;
const STEP_IO = 3;
const convertibleErrorTypes = { TypeError };
const CONVERTIBLE_ERROR_PREFIX = "[TURSO_CONVERT_TYPE]";
@@ -138,12 +144,12 @@ class Database {
if (typeof options !== "object")
throw new TypeError("Expected second argument to be an options object");
const simple = options["simple"];
const pragma = `PRAGMA ${source}`;
return simple
? this.db.pragma(source, { simple: true })
: this.db.pragma(source);
const stmt = this.prepare(pragma);
const results = stmt.all();
return results;
}
backup(filename, options) {
@@ -181,7 +187,7 @@ class Database {
*/
exec(sql) {
try {
this.db.exec(sql);
this.db.batch(sql);
} catch (err) {
throw convertError(err);
}
@@ -250,8 +256,31 @@ class Statement {
/**
* Executes the SQL statement and returns an info object.
*/
run(...bindParameters) {
return this.stmt.run(bindParameters.flat());
async run(...bindParameters) {
const totalChangesBefore = this.db.db.totalChanges();
this.stmt.reset();
bindParams(this.stmt, bindParameters);
while (true) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
await this.db.db.ioLoopAsync();
continue;
}
if (stepResult === STEP_DONE) {
break;
}
if (stepResult === STEP_ROW) {
// For run(), we don't need the row data, just continue
continue;
}
}
const lastInsertRowid = this.db.db.lastInsertRowid();
const changes = this.db.db.totalChanges() === totalChangesBefore ? 0 : this.db.db.changes();
return { changes, lastInsertRowid };
}
/**
@@ -259,8 +288,23 @@ class Statement {
*
* @param bindParameters - The bind parameters for executing the statement.
*/
get(...bindParameters) {
return this.stmt.get(bindParameters.flat());
async get(...bindParameters) {
this.stmt.reset();
bindParams(this.stmt, bindParameters);
while (true) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
await this.db.db.ioLoopAsync();
continue;
}
if (stepResult === STEP_DONE) {
return undefined;
}
if (stepResult === STEP_ROW) {
return this.stmt.row();
}
}
}
/**
@@ -277,8 +321,25 @@ class Statement {
*
* @param bindParameters - The bind parameters for executing the statement.
*/
all(...bindParameters) {
return this.stmt.all(bindParameters.flat());
async all(...bindParameters) {
this.stmt.reset();
bindParams(this.stmt, bindParameters);
const rows = [];
while (true) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
await this.db.db.ioLoopAsync();
continue;
}
if (stepResult === STEP_DONE) {
break;
}
if (stepResult === STEP_ROW) {
rows.push(this.stmt.row());
}
}
return rows;
}
/**
@@ -304,7 +365,8 @@ class Statement {
*/
bind(...bindParameters) {
try {
return new Statement(this.stmt.bind(bindParameters.flat()), this.db);
bindParams(this.stmt, bindParameters);
return this;
} catch (err) {
throw convertError(err);
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,15 @@
"use strict";
const { Database: NativeDB } = require("./index.js");
const { bindParams } = require("./bind.js");
const SqliteError = require("./sqlite-error.js");
// Step result constants
const STEP_ROW = 1;
const STEP_DONE = 2;
const STEP_IO = 3;
const convertibleErrorTypes = { TypeError };
const CONVERTIBLE_ERROR_PREFIX = "[TURSO_CONVERT_TYPE]";
@@ -138,12 +144,12 @@ class Database {
if (typeof options !== "object")
throw new TypeError("Expected second argument to be an options object");
const simple = options["simple"];
const pragma = `PRAGMA ${source}`;
return simple
? this.db.pragma(source, { simple: true })
: this.db.pragma(source);
const stmt = this.prepare(pragma);
const results = stmt.all();
return results;
}
backup(filename, options) {
@@ -181,7 +187,7 @@ class Database {
*/
exec(sql) {
try {
this.db.exec(sql);
this.db.batch(sql);
} catch (err) {
throw convertError(err);
}
@@ -251,7 +257,29 @@ class Statement {
* Executes the SQL statement and returns an info object.
*/
run(...bindParameters) {
return this.stmt.run(bindParameters.flat());
const totalChangesBefore = this.db.db.totalChanges();
this.stmt.reset();
bindParams(this.stmt, bindParameters);
for (;;) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
this.db.db.ioLoopSync();
continue;
}
if (stepResult === STEP_DONE) {
break;
}
if (stepResult === STEP_ROW) {
// For run(), we don't need the row data, just continue
continue;
}
}
const lastInsertRowid = this.db.db.lastInsertRowid();
const changes = this.db.db.totalChanges() === totalChangesBefore ? 0 : this.db.db.changes();
return { changes, lastInsertRowid };
}
/**
@@ -260,7 +288,21 @@ class Statement {
* @param bindParameters - The bind parameters for executing the statement.
*/
get(...bindParameters) {
return this.stmt.get(bindParameters.flat());
this.stmt.reset();
bindParams(this.stmt, bindParameters);
for (;;) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
this.db.db.ioLoopSync();
continue;
}
if (stepResult === STEP_DONE) {
return undefined;
}
if (stepResult === STEP_ROW) {
return this.stmt.row();
}
}
}
/**
@@ -278,7 +320,23 @@ class Statement {
* @param bindParameters - The bind parameters for executing the statement.
*/
all(...bindParameters) {
return this.stmt.all(bindParameters.flat());
this.stmt.reset();
bindParams(this.stmt, bindParameters);
const rows = [];
for (;;) {
const stepResult = this.stmt.step();
if (stepResult === STEP_IO) {
this.db.db.ioLoopSync();
continue;
}
if (stepResult === STEP_DONE) {
break;
}
if (stepResult === STEP_ROW) {
rows.push(this.stmt.row());
}
}
return rows;
}
/**
@@ -304,7 +362,8 @@ class Statement {
*/
bind(...bindParameters) {
try {
return new Statement(this.stmt.bind(bindParameters.flat()), this.db);
bindParams(this.stmt, bindParameters);
return this;
} catch (err) {
throw convertError(err);
}

View File

@@ -317,7 +317,7 @@ impl Drop for Connection {
#[allow(clippy::arc_with_non_send_sync)]
#[pyfunction(signature = (path, experimental_indexes=None))]
pub fn connect(path: &str, experimental_indexes: Option<bool>) -> Result<Connection> {
let experimental_indexes = experimental_indexes.unwrap_or(false);
let experimental_indexes = experimental_indexes.unwrap_or(true);
match turso_core::Connection::from_uri(path, experimental_indexes, false) {
Ok((io, conn)) => Ok(Connection { conn, _io: io }),
Err(e) => Err(PyErr::new::<ProgrammingError, _>(format!(

View File

@@ -10,7 +10,7 @@ repository.workspace = true
description = "Turso Rust API"
[features]
default = []
default = ["experimental_indexes"]
experimental_indexes = []
antithesis = ["turso_core/antithesis"]

View File

@@ -281,7 +281,7 @@ impl Connection {
.inner
.lock()
.map_err(|e| Error::MutexError(e.to_string()))?;
let res = conn.cacheflush()?;
let _res = conn.cacheflush()?;
Ok(())
}
@@ -477,23 +477,26 @@ impl Rows {
.inner
.lock()
.map_err(|e| Error::MutexError(e.to_string()))?;
match stmt.step() {
Ok(turso_core::StepResult::Row) => {
match stmt.step()? {
turso_core::StepResult::Row => {
let row = stmt.row().unwrap();
return Ok(Some(Row {
values: row.get_values().map(|v| v.to_owned()).collect(),
}));
}
Ok(turso_core::StepResult::Done) => return Ok(None),
Ok(turso_core::StepResult::IO) => {
turso_core::StepResult::Done => return Ok(None),
turso_core::StepResult::IO => {
if let Err(e) = stmt.run_once() {
return Err(e.into());
}
continue;
}
Ok(turso_core::StepResult::Busy) => return Ok(None),
Ok(turso_core::StepResult::Interrupt) => return Ok(None),
_ => return Ok(None),
turso_core::StepResult::Busy => {
return Err(Error::SqlExecutionFailure("database is locked".to_string()))
}
turso_core::StepResult::Interrupt => {
return Err(Error::SqlExecutionFailure("interrupted".to_string()))
}
}
}
}

View File

@@ -1,461 +0,0 @@
#[cfg(all(feature = "web", feature = "nodejs"))]
compile_error!("Features 'web' and 'nodejs' cannot be enabled at the same time");
use js_sys::{Array, Object};
use std::cell::RefCell;
use std::sync::Arc;
use turso_core::{Clock, Instant, OpenFlags, Result};
use wasm_bindgen::prelude::*;
#[allow(dead_code)]
#[wasm_bindgen]
pub struct Database {
db: Arc<turso_core::Database>,
conn: Arc<turso_core::Connection>,
}
#[allow(clippy::arc_with_non_send_sync)]
#[wasm_bindgen]
impl Database {
#[wasm_bindgen(constructor)]
pub fn new(path: &str) -> Database {
let io: Arc<dyn turso_core::IO> = Arc::new(PlatformIO { vfs: VFS::new() });
let file = io.open_file(path, OpenFlags::Create, false).unwrap();
let db_file = Arc::new(DatabaseFile::new(file));
let db = turso_core::Database::open(io, path, db_file, false, false).unwrap();
let conn = db.connect().unwrap();
Database { db, conn }
}
#[wasm_bindgen]
pub fn exec(&self, _sql: &str) {
self.conn.execute(_sql).unwrap();
}
#[wasm_bindgen]
pub fn prepare(&self, _sql: &str) -> Statement {
let stmt = self.conn.prepare(_sql).unwrap();
Statement::new(RefCell::new(stmt), false)
}
}
#[wasm_bindgen]
pub struct RowIterator {
inner: RefCell<turso_core::Statement>,
}
#[wasm_bindgen]
impl RowIterator {
fn new(inner: RefCell<turso_core::Statement>) -> Self {
Self { inner }
}
#[wasm_bindgen]
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> JsValue {
let mut stmt = self.inner.borrow_mut();
match stmt.step() {
Ok(turso_core::StepResult::Row) => {
let row = stmt.row().unwrap();
let row_array = Array::new();
for value in row.get_values() {
let value = to_js_value(value);
row_array.push(&value);
}
JsValue::from(row_array)
}
Ok(turso_core::StepResult::IO) => JsValue::UNDEFINED,
Ok(turso_core::StepResult::Done) | Ok(turso_core::StepResult::Interrupt) => {
JsValue::UNDEFINED
}
Ok(turso_core::StepResult::Busy) => JsValue::UNDEFINED,
Err(e) => panic!("Error: {e:?}"),
}
}
}
#[wasm_bindgen]
pub struct Statement {
inner: RefCell<turso_core::Statement>,
raw: bool,
}
#[wasm_bindgen]
impl Statement {
fn new(inner: RefCell<turso_core::Statement>, raw: bool) -> Self {
Self { inner, raw }
}
#[wasm_bindgen]
pub fn raw(mut self, toggle: Option<bool>) -> Self {
self.raw = toggle.unwrap_or(true);
self
}
pub fn get(&self) -> JsValue {
let mut stmt = self.inner.borrow_mut();
match stmt.step() {
Ok(turso_core::StepResult::Row) => {
let row = stmt.row().unwrap();
let row_array = js_sys::Array::new();
for value in row.get_values() {
let value = to_js_value(value);
row_array.push(&value);
}
JsValue::from(row_array)
}
Ok(turso_core::StepResult::IO)
| Ok(turso_core::StepResult::Done)
| Ok(turso_core::StepResult::Interrupt)
| Ok(turso_core::StepResult::Busy) => JsValue::UNDEFINED,
Err(e) => panic!("Error: {e:?}"),
}
}
pub fn all(&self) -> js_sys::Array {
let array = js_sys::Array::new();
loop {
let mut stmt = self.inner.borrow_mut();
match stmt.step() {
Ok(turso_core::StepResult::Row) => {
let row = stmt.row().unwrap();
let row_array = js_sys::Array::new();
for value in row.get_values() {
let value = to_js_value(value);
row_array.push(&value);
}
array.push(&row_array);
}
Ok(turso_core::StepResult::IO) => {}
Ok(turso_core::StepResult::Interrupt) => break,
Ok(turso_core::StepResult::Done) => break,
Ok(turso_core::StepResult::Busy) => break,
Err(e) => panic!("Error: {e:?}"),
}
}
array
}
#[wasm_bindgen]
pub fn iterate(self) -> JsValue {
let iterator = RowIterator::new(self.inner);
let iterator_obj = Object::new();
// Define the next method that will be called by JavaScript
let next_fn = js_sys::Function::new_with_args(
"",
"const value = this.iterator.next();
const done = value === undefined;
return {
value,
done
};",
);
js_sys::Reflect::set(&iterator_obj, &JsValue::from_str("next"), &next_fn).unwrap();
js_sys::Reflect::set(
&iterator_obj,
&JsValue::from_str("iterator"),
&JsValue::from(iterator),
)
.unwrap();
let symbol_iterator = js_sys::Function::new_no_args("return this;");
js_sys::Reflect::set(&iterator_obj, &js_sys::Symbol::iterator(), &symbol_iterator).unwrap();
JsValue::from(iterator_obj)
}
}
fn to_js_value(value: &turso_core::Value) -> JsValue {
match value {
turso_core::Value::Null => JsValue::null(),
turso_core::Value::Integer(i) => {
let i = *i;
if i >= i32::MIN as i64 && i <= i32::MAX as i64 {
JsValue::from(i as i32)
} else {
JsValue::from(i)
}
}
turso_core::Value::Float(f) => JsValue::from(*f),
turso_core::Value::Text(t) => JsValue::from_str(t.as_str()),
turso_core::Value::Blob(b) => js_sys::Uint8Array::from(b.as_slice()).into(),
}
}
pub struct File {
vfs: VFS,
fd: i32,
}
unsafe impl Send for File {}
unsafe impl Sync for File {}
#[allow(dead_code)]
impl File {
fn new(vfs: VFS, fd: i32) -> Self {
Self { vfs, fd }
}
}
impl turso_core::File for File {
fn lock_file(&self, _exclusive: bool) -> Result<()> {
// TODO
Ok(())
}
fn unlock_file(&self) -> Result<()> {
// TODO
Ok(())
}
fn pread(
&self,
pos: usize,
c: turso_core::Completion,
) -> Result<turso_core::Completion> {
let r = match c.completion_type {
turso_core::CompletionType::Read(ref r) => r,
_ => unreachable!(),
};
let nr = {
let mut buf = r.buf_mut();
let buf: &mut [u8] = buf.as_mut_slice();
self.vfs.pread(self.fd, buf, pos)
};
r.complete(nr);
#[allow(clippy::arc_with_non_send_sync)]
Ok(c)
}
fn pwrite(
&self,
pos: usize,
buffer: Arc<std::cell::RefCell<turso_core::Buffer>>,
c: turso_core::Completion,
) -> Result<turso_core::Completion> {
let w = match c.completion_type {
turso_core::CompletionType::Write(ref w) => w,
_ => unreachable!(),
};
let buf = buffer.borrow();
let buf: &[u8] = buf.as_slice();
self.vfs.pwrite(self.fd, buf, pos);
w.complete(buf.len() as i32);
#[allow(clippy::arc_with_non_send_sync)]
Ok(c)
}
fn sync(&self, c: turso_core::Completion) -> Result<turso_core::Completion> {
self.vfs.sync(self.fd);
c.complete(0);
#[allow(clippy::arc_with_non_send_sync)]
Ok(c)
}
fn size(&self) -> Result<u64> {
Ok(self.vfs.size(self.fd))
}
fn truncate(
&self,
len: usize,
c: turso_core::Completion,
) -> Result<turso_core::Completion> {
self.vfs.truncate(self.fd, len);
c.complete(0);
#[allow(clippy::arc_with_non_send_sync)]
Ok(c)
}
}
pub struct PlatformIO {
vfs: VFS,
}
unsafe impl Send for PlatformIO {}
unsafe impl Sync for PlatformIO {}
impl Clock for PlatformIO {
fn now(&self) -> Instant {
let date = Date::new();
let ms_since_epoch = date.getTime();
Instant {
secs: (ms_since_epoch / 1000.0) as i64,
micros: ((ms_since_epoch % 1000.0) * 1000.0) as u32,
}
}
}
impl turso_core::IO for PlatformIO {
fn open_file(
&self,
path: &str,
_flags: OpenFlags,
_direct: bool,
) -> Result<Arc<dyn turso_core::File>> {
let fd = self.vfs.open(path, "a+");
Ok(Arc::new(File {
vfs: VFS::new(),
fd,
}))
}
fn wait_for_completion(&self, c: turso_core::Completion) -> Result<()> {
while !c.is_completed() {
self.run_once()?;
}
Ok(())
}
fn run_once(&self) -> Result<()> {
Ok(())
}
fn generate_random_number(&self) -> i64 {
let mut buf = [0u8; 8];
getrandom::getrandom(&mut buf).unwrap();
i64::from_ne_bytes(buf)
}
fn get_memory_io(&self) -> Arc<turso_core::MemoryIO> {
Arc::new(turso_core::MemoryIO::new())
}
}
#[wasm_bindgen]
extern "C" {
type Date;
#[wasm_bindgen(constructor)]
fn new() -> Date;
#[wasm_bindgen(method, getter)]
fn toISOString(this: &Date) -> String;
#[wasm_bindgen(method)]
fn getTime(this: &Date) -> f64;
}
pub struct DatabaseFile {
file: Arc<dyn turso_core::File>,
}
unsafe impl Send for DatabaseFile {}
unsafe impl Sync for DatabaseFile {}
impl DatabaseFile {
pub fn new(file: Arc<dyn turso_core::File>) -> Self {
Self { file }
}
}
impl turso_core::DatabaseStorage for DatabaseFile {
fn read_page(&self, page_idx: usize, c: turso_core::Completion) -> Result<()> {
let r = match c.completion_type {
turso_core::CompletionType::Read(ref r) => r,
_ => unreachable!(),
};
let size = r.buf().len();
assert!(page_idx > 0);
if !(512..=65536).contains(&size) || size & (size - 1) != 0 {
return Err(turso_core::LimboError::NotADB);
}
let pos = (page_idx - 1) * size;
self.file.pread(pos, c.into())?;
Ok(())
}
fn write_page(
&self,
page_idx: usize,
buffer: Arc<std::cell::RefCell<turso_core::Buffer>>,
c: turso_core::Completion,
) -> Result<()> {
let size = buffer.borrow().len();
let pos = (page_idx - 1) * size;
self.file.pwrite(pos, buffer, c.into())?;
Ok(())
}
fn sync(&self, c: turso_core::Completion) -> Result<()> {
let _ = self.file.sync(c.into())?;
Ok(())
}
fn size(&self) -> Result<u64> {
self.file.size()
}
fn truncate(&self, len: usize, c: turso_core::Completion) -> Result<()> {
self.file.truncate(len, c)?;
Ok(())
}
}
#[cfg(all(feature = "web", not(feature = "nodejs")))]
#[wasm_bindgen(module = "/web/src/web-vfs.js")]
extern "C" {
type VFS;
#[wasm_bindgen(constructor)]
fn new() -> VFS;
#[wasm_bindgen(method)]
fn open(this: &VFS, path: &str, flags: &str) -> i32;
#[wasm_bindgen(method)]
fn close(this: &VFS, fd: i32) -> bool;
#[wasm_bindgen(method)]
fn pwrite(this: &VFS, fd: i32, buffer: &[u8], offset: usize) -> i32;
#[wasm_bindgen(method)]
fn pread(this: &VFS, fd: i32, buffer: &mut [u8], offset: usize) -> i32;
#[wasm_bindgen(method)]
fn size(this: &VFS, fd: i32) -> u64;
#[wasm_bindgen(method)]
fn truncate(this: &VFS, fd: i32, len: usize);
#[wasm_bindgen(method)]
fn sync(this: &VFS, fd: i32);
}
#[cfg(all(feature = "nodejs", not(feature = "web")))]
#[wasm_bindgen(module = "/node/src/vfs.cjs")]
extern "C" {
type VFS;
#[wasm_bindgen(constructor)]
fn new() -> VFS;
#[wasm_bindgen(method)]
fn open(this: &VFS, path: &str, flags: &str) -> i32;
#[wasm_bindgen(method)]
fn close(this: &VFS, fd: i32) -> bool;
#[wasm_bindgen(method)]
fn pwrite(this: &VFS, fd: i32, buffer: &[u8], offset: usize) -> i32;
#[wasm_bindgen(method)]
fn pread(this: &VFS, fd: i32, buffer: &mut [u8], offset: usize) -> i32;
#[wasm_bindgen(method)]
fn size(this: &VFS, fd: i32) -> u64;
#[wasm_bindgen(method)]
fn truncate(this: &VFS, fd: i32, len: usize);
#[wasm_bindgen(method)]
fn sync(this: &VFS, fd: i32);
}
#[wasm_bindgen(start)]
pub fn init() {
console_error_panic_hook::set_once();
}

View File

@@ -61,7 +61,7 @@ pub struct Opts {
#[clap(long, help = "Enable experimental MVCC feature")]
pub experimental_mvcc: bool,
#[clap(long, help = "Enable experimental indexing feature")]
pub experimental_indexes: bool,
pub experimental_indexes: Option<bool>,
#[clap(short = 't', long, help = "specify output file for log traces")]
pub tracing_output: Option<String>,
#[clap(long, help = "Start MCP server instead of interactive shell")]
@@ -119,8 +119,9 @@ impl Limbo {
.database
.as_ref()
.map_or(":memory:".to_string(), |p| p.to_string_lossy().to_string());
let indexes_enabled = opts.experimental_indexes.unwrap_or(true);
let (io, conn) = if db_file.contains([':', '?', '&', '#']) {
Connection::from_uri(&db_file, opts.experimental_indexes, opts.experimental_mvcc)?
Connection::from_uri(&db_file, indexes_enabled, opts.experimental_mvcc)?
} else {
let flags = if opts.readonly {
OpenFlags::ReadOnly
@@ -131,7 +132,7 @@ impl Limbo {
&db_file,
opts.vfs.as_ref(),
flags,
opts.experimental_indexes,
indexes_enabled,
opts.experimental_mvcc,
)?;
let conn = db.connect()?;

View File

@@ -19,7 +19,7 @@ default = ["fs", "uuid", "time", "json", "series"]
fs = ["turso_ext/vfs"]
json = []
uuid = ["dep:uuid"]
io_uring = ["dep:io-uring", "rustix/io_uring", "dep:libc"]
io_uring = ["dep:io-uring", "rustix/io_uring"]
time = []
fuzz = []
omit_autovacuum = []
@@ -29,10 +29,12 @@ series = []
[target.'cfg(target_os = "linux")'.dependencies]
io-uring = { version = "0.7.5", optional = true }
libc = { version = "0.2.172" }
[target.'cfg(target_family = "unix")'.dependencies]
polling = "3.7.4"
rustix = { version = "1.0.5", features = ["fs"] }
libc = { version = "0.2.172" }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
mimalloc = { version = "0.1.46", default-features = false }
@@ -44,7 +46,6 @@ turso_ext = { workspace = true, features = ["core_only"] }
cfg_block = "0.1.1"
fallible-iterator = "0.3.0"
hex = "0.4.3"
libc = { version = "0.2.172", optional = true }
turso_sqlite3_parser = { workspace = true }
thiserror = "1.0.61"
getrandom = { version = "0.2.15" }
@@ -70,6 +71,8 @@ serde = { workspace = true, optional = true, features = ["derive"] }
paste = "1.0.15"
uuid = { version = "1.11.0", features = ["v4", "v7"], optional = true }
tempfile = "3.8.0"
pack1 = { version = "1.0.0", features = ["bytemuck"] }
bytemuck = "1.23.1"
[build-dependencies]
chrono = { version = "0.4.38", default-features = false }

View File

@@ -37,14 +37,16 @@ fn bench_open(criterion: &mut Criterion) {
let io = Arc::new(PlatformIO::new().unwrap());
let db =
Database::open_file(io.clone(), "../testing/schema_5k.db", false, false).unwrap();
black_box(db.connect().unwrap());
let conn = db.connect().unwrap();
conn.execute("SELECT * FROM table_0").unwrap();
});
});
if enable_rusqlite {
group.bench_function(BenchmarkId::new("sqlite_schema", ""), |b| {
b.iter(|| {
black_box(rusqlite::Connection::open("../testing/schema_5k.db").unwrap());
let conn = rusqlite::Connection::open("../testing/schema_5k.db").unwrap();
conn.execute("SELECT * FROM table_0", ()).unwrap();
});
});
}

View File

@@ -1,13 +1,29 @@
use std::sync::Arc;
use criterion::async_executor::FuturesExecutor;
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use pprof::criterion::{Output, PProfProfiler};
use turso_core::mvcc::clock::LocalClock;
use turso_core::mvcc::database::{MvStore, Row, RowID};
use turso_core::types::{ImmutableRecord, Text};
use turso_core::{Connection, Database, MemoryIO, Value};
fn bench_db() -> MvStore<LocalClock> {
let clock = LocalClock::default();
let storage = turso_core::mvcc::persistent_storage::Storage::new_noop();
MvStore::new(clock, storage)
struct BenchDb {
_db: Arc<Database>,
conn: Arc<Connection>,
mvcc_store: Arc<MvStore<LocalClock>>,
}
fn bench_db() -> BenchDb {
let io = Arc::new(MemoryIO::new());
let db = Database::open_file(io.clone(), ":memory:", true, true).unwrap();
let conn = db.connect().unwrap();
let mvcc_store = db.get_mv_store().unwrap().clone();
BenchDb {
_db: db,
conn,
mvcc_store,
}
}
fn bench(c: &mut Criterion) {
@@ -16,107 +32,131 @@ fn bench(c: &mut Criterion) {
let db = bench_db();
group.bench_function("begin_tx + rollback_tx", |b| {
let db = bench_db();
b.to_async(FuturesExecutor).iter(|| async {
let tx_id = db.begin_tx();
db.rollback_tx(tx_id)
let conn = db.conn.clone();
let tx_id = db.mvcc_store.begin_tx(conn.get_pager().clone());
db.mvcc_store.rollback_tx(tx_id, conn.get_pager().clone())
})
});
let db = bench_db();
group.bench_function("begin_tx + commit_tx", |b| {
b.to_async(FuturesExecutor).iter(|| async {
let tx_id = db.begin_tx();
db.commit_tx(tx_id)
let conn = &db.conn;
let tx_id = db.mvcc_store.begin_tx(conn.get_pager().clone());
db.mvcc_store
.commit_tx(tx_id, conn.get_pager().clone(), conn)
})
});
let db = bench_db();
group.bench_function("begin_tx-read-commit_tx", |b| {
b.to_async(FuturesExecutor).iter(|| async {
let tx_id = db.begin_tx();
db.read(
tx_id,
RowID {
table_id: 1,
row_id: 1,
},
)
.unwrap();
db.commit_tx(tx_id)
let conn = &db.conn;
let tx_id = db.mvcc_store.begin_tx(conn.get_pager().clone());
db.mvcc_store
.read(
tx_id,
RowID {
table_id: 1,
row_id: 1,
},
)
.unwrap();
db.mvcc_store
.commit_tx(tx_id, conn.get_pager().clone(), conn)
})
});
let db = bench_db();
let record = ImmutableRecord::from_values(&vec![Value::Text(Text::new("World"))], 1);
let record_data = record.as_blob();
group.bench_function("begin_tx-update-commit_tx", |b| {
b.to_async(FuturesExecutor).iter(|| async {
let tx_id = db.begin_tx();
db.update(
tx_id,
Row {
id: RowID {
table_id: 1,
row_id: 1,
let conn = &db.conn;
let tx_id = db.mvcc_store.begin_tx(conn.get_pager().clone());
db.mvcc_store
.update(
tx_id,
Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: record_data.clone(),
column_count: 1,
},
data: "World".to_string().into_bytes(),
},
)
.unwrap();
db.commit_tx(tx_id)
conn.get_pager().clone(),
)
.unwrap();
db.mvcc_store
.commit_tx(tx_id, conn.get_pager().clone(), conn)
.unwrap();
})
});
let db = bench_db();
let tx = db.begin_tx();
db.insert(
tx,
Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
},
)
.unwrap();
group.bench_function("read", |b| {
b.to_async(FuturesExecutor).iter(|| async {
db.read(
tx,
RowID {
let tx_id = db.mvcc_store.begin_tx(db.conn.get_pager().clone());
db.mvcc_store
.insert(
tx_id,
Row {
id: RowID {
table_id: 1,
row_id: 1,
},
)
.unwrap();
data: record_data.clone(),
column_count: 1,
},
)
.unwrap();
group.bench_function("read", |b| {
b.to_async(FuturesExecutor).iter(|| async {
db.mvcc_store
.read(
tx_id,
RowID {
table_id: 1,
row_id: 1,
},
)
.unwrap();
})
});
let db = bench_db();
let tx = db.begin_tx();
db.insert(
tx,
Row {
id: RowID {
table_id: 1,
row_id: 1,
let tx_id = db.mvcc_store.begin_tx(db.conn.get_pager().clone());
let conn = &db.conn;
db.mvcc_store
.insert(
tx_id,
Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: record_data.clone(),
column_count: 1,
},
data: "Hello".to_string().into_bytes(),
},
)
.unwrap();
)
.unwrap();
group.bench_function("update", |b| {
b.to_async(FuturesExecutor).iter(|| async {
db.update(
tx,
Row {
id: RowID {
table_id: 1,
row_id: 1,
db.mvcc_store
.update(
tx_id,
Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: record_data.clone(),
column_count: 1,
},
data: "World".to_string().into_bytes(),
},
)
.unwrap();
conn.get_pager().clone(),
)
.unwrap();
})
});
}

View File

@@ -55,12 +55,20 @@ pub enum LimboError {
IntegerOverflow,
#[error("Schema is locked for write")]
SchemaLocked,
#[error("Runtime error: database table is locked")]
TableLocked,
#[error("Error: Resource is read-only")]
ReadOnly,
#[error("Database is busy")]
Busy,
#[error("Conflict: {0}")]
Conflict(String),
#[error("Transaction terminated")]
TxTerminated,
#[error("Write-write conflict")]
WriteWriteConflict,
#[error("No such transaction ID: {0}")]
NoSuchTransactionID(String),
}
#[macro_export]

View File

@@ -2,38 +2,43 @@
use super::{common, Completion, CompletionInner, File, OpenFlags, IO};
use crate::io::clock::{Clock, Instant};
use crate::storage::wal::CKPT_BATCH_PAGES;
use crate::{turso_assert, LimboError, MemoryIO, Result};
use rustix::fs::{self, FlockOperation, OFlags};
use std::cell::RefCell;
use std::collections::VecDeque;
use std::fmt;
use std::io::ErrorKind;
use std::os::fd::AsFd;
use std::os::unix::io::AsRawFd;
use std::rc::Rc;
use std::sync::Arc;
use thiserror::Error;
use std::{
cell::RefCell,
collections::{HashMap, VecDeque},
io::ErrorKind,
ops::Deref,
os::{fd::AsFd, unix::io::AsRawFd},
rc::Rc,
sync::Arc,
};
use tracing::{debug, trace};
/// Size of the io_uring submission and completion queues
const ENTRIES: u32 = 512;
/// Idle timeout for the sqpoll kernel thread before it needs
/// to be woken back up by a call IORING_ENTER_SQ_WAKEUP flag.
/// (handled by the io_uring crate in `submit_and_wait`)
const SQPOLL_IDLE: u32 = 1000;
/// Number of file descriptors we preallocate for io_uring.
/// NOTE: we may need to increase this when `attach` is fully implemented.
const FILES: u32 = 8;
#[derive(Debug, Error)]
enum UringIOError {
IOUringCQError(i32),
}
/// Number of Vec<Box<[iovec]>> we preallocate on initialization
const IOVEC_POOL_SIZE: usize = 64;
impl fmt::Display for UringIOError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
UringIOError::IOUringCQError(code) => write!(
f,
"IOUring completion queue error occurred with code {code}",
),
}
}
}
/// Maximum number of iovec entries per writev operation.
/// IOV_MAX is typically 1024, but we limit it to a smaller number
const MAX_IOVEC_ENTRIES: usize = CKPT_BATCH_PAGES;
/// Maximum number of I/O operations to wait for in a single run,
/// waiting for > 1 can reduce the amount of `io_uring_enter` syscalls we
/// make, but can increase single operation latency.
const MAX_WAIT: usize = 4;
pub struct UringIO {
inner: Rc<RefCell<InnerUringIO>>,
@@ -45,6 +50,8 @@ unsafe impl Sync for UringIO {}
struct WrappedIOUring {
ring: io_uring::IoUring,
pending_ops: usize,
writev_states: HashMap<u64, WritevState>,
iov_pool: IovecPool,
}
struct InnerUringIO {
@@ -52,6 +59,39 @@ struct InnerUringIO {
free_files: VecDeque<u32>,
}
/// preallocated vec of iovec arrays to avoid allocations during writev operations
struct IovecPool {
pool: Vec<Box<[libc::iovec; MAX_IOVEC_ENTRIES]>>,
}
impl IovecPool {
fn new() -> Self {
let pool = (0..IOVEC_POOL_SIZE)
.map(|_| {
Box::new(
[libc::iovec {
iov_base: std::ptr::null_mut(),
iov_len: 0,
}; MAX_IOVEC_ENTRIES],
)
})
.collect();
Self { pool }
}
#[inline(always)]
fn acquire(&mut self) -> Option<Box<[libc::iovec; MAX_IOVEC_ENTRIES]>> {
self.pool.pop()
}
#[inline(always)]
fn release(&mut self, iovec: Box<[libc::iovec; MAX_IOVEC_ENTRIES]>) {
if self.pool.len() < IOVEC_POOL_SIZE {
self.pool.push(iovec);
}
}
}
impl UringIO {
pub fn new() -> Result<Self> {
let ring = match io_uring::IoUring::builder()
@@ -69,6 +109,8 @@ impl UringIO {
ring: WrappedIOUring {
ring,
pending_ops: 0,
writev_states: HashMap::new(),
iov_pool: IovecPool::new(),
},
free_files: (0..FILES).collect(),
};
@@ -79,6 +121,126 @@ impl UringIO {
}
}
/// io_uring crate decides not to export their `UseFixed` trait, so we
/// are forced to use a macro here to handle either fixed or raw file descriptors.
macro_rules! with_fd {
($file:expr, |$fd:ident| $body:expr) => {
match $file.id() {
Some(id) => {
let $fd = io_uring::types::Fixed(id);
$body
}
None => {
let $fd = io_uring::types::Fd($file.as_raw_fd());
$body
}
}
};
}
/// wrapper type to represent a possibly registered file descriptor,
/// only used in WritevState, and piggy-backs on the available methods from
/// `UringFile`, so we don't have to store the file on `WritevState`.
enum Fd {
Fixed(u32),
RawFd(i32),
}
impl Fd {
/// to match the behavior of the File, we need to implement the same methods
fn id(&self) -> Option<u32> {
match self {
Fd::Fixed(id) => Some(*id),
Fd::RawFd(_) => None,
}
}
/// ONLY to be called by the macro, in the case where id() is None
fn as_raw_fd(&self) -> i32 {
match self {
Fd::RawFd(fd) => *fd,
_ => panic!("Cannot call as_raw_fd on a Fixed Fd"),
}
}
}
/// State to track an ongoing writev operation in
/// the case of a partial write.
struct WritevState {
/// File descriptor/id of the file we are writing to
file_id: Fd,
/// absolute file offset for next submit
file_pos: usize,
/// current buffer index in `bufs`
current_buffer_idx: usize,
/// intra-buffer offset
current_buffer_offset: usize,
/// total bytes written so far
total_written: usize,
/// cache the sum of all buffer lengths for the total expected write
total_len: usize,
/// buffers to write
bufs: Vec<Arc<RefCell<crate::Buffer>>>,
/// we keep the last iovec allocation alive until final CQE
last_iov_allocation: Option<Box<[libc::iovec; MAX_IOVEC_ENTRIES]>>,
}
impl WritevState {
fn new(file: &UringFile, pos: usize, bufs: Vec<Arc<RefCell<crate::Buffer>>>) -> Self {
let file_id = file
.id()
.map(Fd::Fixed)
.unwrap_or_else(|| Fd::RawFd(file.as_raw_fd()));
let total_len = bufs.iter().map(|b| b.borrow().len()).sum();
Self {
file_id,
file_pos: pos,
current_buffer_idx: 0,
current_buffer_offset: 0,
total_written: 0,
bufs,
last_iov_allocation: None,
total_len,
}
}
#[inline(always)]
fn remaining(&self) -> usize {
self.total_len - self.total_written
}
/// Advance (idx, off, pos) after written bytes
#[inline(always)]
fn advance(&mut self, written: usize) {
let mut remaining = written;
while remaining > 0 {
let current_buf_len = {
let r = self.bufs[self.current_buffer_idx].borrow();
r.len()
};
let left = current_buf_len - self.current_buffer_offset;
if remaining < left {
self.current_buffer_offset += remaining;
self.file_pos += remaining;
remaining = 0;
} else {
remaining -= left;
self.file_pos += left;
self.current_buffer_idx += 1;
self.current_buffer_offset = 0;
}
}
self.total_written += written;
}
#[inline(always)]
/// Free the allocation that keeps the iovec array alive while writev is ongoing
fn free_last_iov(&mut self, pool: &mut IovecPool) {
if let Some(allocation) = self.last_iov_allocation.take() {
pool.release(allocation);
}
}
}
impl InnerUringIO {
fn register_file(&mut self, fd: i32) -> Result<u32> {
if let Some(slot) = self.free_files.pop_front() {
@@ -106,33 +268,119 @@ impl WrappedIOUring {
fn submit_entry(&mut self, entry: &io_uring::squeue::Entry) {
trace!("submit_entry({:?})", entry);
unsafe {
self.ring
.submission()
.push(entry)
.expect("submission queue is full");
let mut sub = self.ring.submission_shared();
match sub.push(entry) {
Ok(_) => self.pending_ops += 1,
Err(e) => {
tracing::error!("Failed to submit entry: {e}");
self.ring.submit().expect("failed to submit entry");
sub.push(entry).expect("failed to push entry after submit");
self.pending_ops += 1;
}
}
}
self.pending_ops += 1;
}
fn wait_for_completion(&mut self) -> Result<()> {
self.ring.submit_and_wait(1)?;
fn submit_and_wait(&mut self) -> Result<()> {
if self.empty() {
return Ok(());
}
let wants = std::cmp::min(self.pending_ops, MAX_WAIT);
tracing::trace!("submit_and_wait for {wants} pending operations to complete");
self.ring.submit_and_wait(wants)?;
Ok(())
}
fn get_completion(&mut self) -> Option<io_uring::cqueue::Entry> {
// NOTE: This works because CompletionQueue's next function pops the head of the queue. This is not normal behaviour of iterators
let entry = self.ring.completion().next();
if entry.is_some() {
trace!("get_completion({:?})", entry);
// consumed an entry from completion queue, update pending_ops
self.pending_ops -= 1;
}
entry
}
fn empty(&self) -> bool {
self.pending_ops == 0
}
/// Submit or resubmit a writev operation
fn submit_writev(&mut self, key: u64, mut st: WritevState) {
st.free_last_iov(&mut self.iov_pool);
let mut iov_allocation = self.iov_pool.acquire().unwrap_or_else(|| {
// Fallback: allocate a new one if pool is exhausted
Box::new(
[libc::iovec {
iov_base: std::ptr::null_mut(),
iov_len: 0,
}; MAX_IOVEC_ENTRIES],
)
});
let mut iov_count = 0;
for (idx, buffer) in st
.bufs
.iter()
.enumerate()
.skip(st.current_buffer_idx)
.take(MAX_IOVEC_ENTRIES)
{
let buf = buffer.borrow();
let buf_slice = buf.as_slice();
// ensure we are providing a pointer to the proper offset in the buffer
let slice = if idx == st.current_buffer_idx {
&buf_slice[st.current_buffer_offset..]
} else {
buf_slice
};
if slice.is_empty() {
continue;
}
iov_allocation[iov_count] = libc::iovec {
iov_base: slice.as_ptr() as *mut _,
iov_len: slice.len(),
};
iov_count += 1;
}
// Store the pointers and get the pointer to the iovec array that we pass
// to the writev operation, and keep the array itself alive
let ptr = iov_allocation.as_ptr() as *mut libc::iovec;
st.last_iov_allocation = Some(iov_allocation);
let entry = with_fd!(st.file_id, |fd| {
io_uring::opcode::Writev::new(fd, ptr, iov_count as u32)
.offset(st.file_pos as u64)
.build()
.user_data(key)
});
// track the current state in case we get a partial write
self.writev_states.insert(key, st);
self.submit_entry(&entry);
}
fn handle_writev_completion(&mut self, mut state: WritevState, user_data: u64, result: i32) {
if result < 0 {
let err = std::io::Error::from_raw_os_error(result);
tracing::error!("writev failed (user_data: {}): {}", user_data, err);
state.free_last_iov(&mut self.iov_pool);
completion_from_key(user_data).complete(result);
return;
}
let written = result as usize;
state.advance(written);
match state.remaining() {
0 => {
tracing::info!(
"writev operation completed: wrote {} bytes",
state.total_written
);
// write complete, return iovec to pool
state.free_last_iov(&mut self.iov_pool);
completion_from_key(user_data).complete(state.total_written as i32);
}
remaining => {
tracing::trace!(
"resubmitting writev operation for user_data {}: wrote {} bytes, remaining {}",
user_data,
written,
remaining
);
// partial write, submit next
self.submit_writev(user_data, state);
}
}
}
}
impl IO for UringIO {
@@ -179,26 +427,28 @@ impl IO for UringIO {
trace!("run_once()");
let mut inner = self.inner.borrow_mut();
let ring = &mut inner.ring;
if ring.empty() {
return Ok(());
}
ring.wait_for_completion()?;
while let Some(cqe) = ring.get_completion() {
ring.submit_and_wait()?;
loop {
let Some(cqe) = ring.ring.completion().next() else {
return Ok(());
};
ring.pending_ops -= 1;
let user_data = cqe.user_data();
let result = cqe.result();
if result < 0 {
return Err(LimboError::UringIOError(format!(
"{} cqe: {:?}",
UringIOError::IOUringCQError(result),
cqe
)));
turso_assert!(
user_data != 0,
"user_data must not be zero, we dont submit linked timeouts or cancelations that would cause this"
);
if let Some(state) = ring.writev_states.remove(&user_data) {
// if we have ongoing writev state, handle it separately and don't call completion
ring.handle_writev_completion(state, user_data, result);
continue;
}
let ud = cqe.user_data();
turso_assert!(ud > 0, "therea are no linked timeouts or cancelations, all cqe user_data should be valid arc pointers");
completion_from_key(ud).complete(result);
completion_from_key(user_data).complete(result)
}
Ok(())
}
fn generate_random_number(&self) -> i64 {
@@ -242,24 +492,22 @@ pub struct UringFile {
id: Option<u32>,
}
impl Deref for UringFile {
type Target = std::fs::File;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl UringFile {
fn id(&self) -> Option<u32> {
self.id
}
}
unsafe impl Send for UringFile {}
unsafe impl Sync for UringFile {}
macro_rules! with_fd {
($file:expr, |$fd:ident| $body:expr) => {
match $file.id {
Some(id) => {
let $fd = io_uring::types::Fixed(id);
$body
}
None => {
let $fd = io_uring::types::Fd($file.file.as_raw_fd());
$body
}
}
};
}
impl File for UringFile {
fn lock_file(&self, exclusive: bool) -> Result<()> {
let fd = self.file.as_fd();
@@ -350,6 +598,24 @@ impl File for UringFile {
Ok(c)
}
fn pwritev(
&self,
pos: usize,
bufs: Vec<Arc<RefCell<crate::Buffer>>>,
c: Completion,
) -> Result<Completion> {
// for a single buffer use pwrite directly
if bufs.len().eq(&1) {
return self.pwrite(pos, bufs[0].clone(), c.clone());
}
tracing::trace!("pwritev(pos = {}, bufs.len() = {})", pos, bufs.len());
let mut io = self.io.borrow_mut();
// create state to track ongoing writev operation
let state = WritevState::new(self, pos, bufs);
io.ring.submit_writev(get_key(c.clone()), state);
Ok(c)
}
fn size(&self) -> Result<u64> {
Ok(self.file.metadata()?.len())
}

View File

@@ -187,6 +187,49 @@ impl File for MemoryFile {
Ok(c)
}
fn pwritev(
&self,
pos: usize,
buffers: Vec<Arc<RefCell<Buffer>>>,
c: Completion,
) -> Result<Completion> {
let mut offset = pos;
let mut total_written = 0;
for buffer in buffers {
let buf = buffer.borrow();
let buf_len = buf.len();
if buf_len == 0 {
continue;
}
let mut remaining = buf_len;
let mut buf_offset = 0;
let data = &buf.as_slice();
while remaining > 0 {
let page_no = offset / PAGE_SIZE;
let page_offset = offset % PAGE_SIZE;
let bytes_to_write = remaining.min(PAGE_SIZE - page_offset);
{
let page = self.get_or_allocate_page(page_no);
page[page_offset..page_offset + bytes_to_write]
.copy_from_slice(&data[buf_offset..buf_offset + bytes_to_write]);
}
offset += bytes_to_write;
buf_offset += bytes_to_write;
remaining -= bytes_to_write;
}
total_written += buf_len;
}
c.complete(total_written as i32);
self.size
.set(core::cmp::max(pos + total_written, self.size.get()));
Ok(c)
}
fn size(&self) -> Result<u64> {
Ok(self.size.get() as u64)
}

View File

@@ -18,6 +18,46 @@ pub trait File: Send + Sync {
fn pwrite(&self, pos: usize, buffer: Arc<RefCell<Buffer>>, c: Completion)
-> Result<Completion>;
fn sync(&self, c: Completion) -> Result<Completion>;
fn pwritev(
&self,
pos: usize,
buffers: Vec<Arc<RefCell<Buffer>>>,
c: Completion,
) -> Result<Completion> {
use std::sync::atomic::{AtomicUsize, Ordering};
if buffers.is_empty() {
c.complete(0);
return Ok(c);
}
// naive default implementation can be overridden on backends where it makes sense to
let mut pos = pos;
let outstanding = Arc::new(AtomicUsize::new(buffers.len()));
let total_written = Arc::new(AtomicUsize::new(0));
for buf in buffers {
let len = buf.borrow().len();
let child_c = {
let c_main = c.clone();
let outstanding = outstanding.clone();
let total_written = total_written.clone();
Completion::new_write(move |n| {
// accumulate bytes actually reported by the backend
total_written.fetch_add(n as usize, Ordering::Relaxed);
if outstanding.fetch_sub(1, Ordering::AcqRel) == 1 {
// last one finished
c_main.complete(total_written.load(Ordering::Acquire) as i32);
}
})
};
if let Err(e) = self.pwrite(pos, buf.clone(), child_c) {
// best-effort: mark as done so caller won't wait forever
c.complete(-1);
return Err(e);
}
pos += len;
}
Ok(c)
}
fn size(&self) -> Result<u64>;
fn truncate(&self, len: usize, c: Completion) -> Result<Completion>;
}
@@ -304,10 +344,10 @@ cfg_block! {
pub use unix::UnixIO as PlatformIO;
}
#[cfg(target_os = "windows")] {
#[cfg(target_os = "windows")] {
mod windows;
pub use windows::WindowsIO as PlatformIO;
pub use PlatformIO as SyscallIO;
pub use PlatformIO as SyscallIO;
}
#[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows", target_os = "android", target_os = "ios")))] {

View File

@@ -1,15 +1,15 @@
use super::{Completion, File, MemoryIO, OpenFlags, IO};
use crate::error::LimboError;
use crate::io::clock::{Clock, Instant};
use crate::io::common;
use crate::Result;
use super::{Completion, File, MemoryIO, OpenFlags, IO};
use crate::io::clock::{Clock, Instant};
use polling::{Event, Events, Poller};
use rustix::{
fd::{AsFd, AsRawFd},
fs::{self, FlockOperation, OFlags, OpenOptionsExt},
io::Errno,
};
use std::os::fd::RawFd;
use std::{
cell::{RefCell, UnsafeCell},
mem::MaybeUninit,
@@ -40,11 +40,6 @@ impl OwnedCallbacks {
self.as_mut().inline_count == 0
}
fn get(&self, fd: usize) -> Option<&CompletionCallback> {
let callbacks = unsafe { &mut *self.0.get() };
callbacks.get(fd)
}
fn remove(&self, fd: usize) -> Option<CompletionCallback> {
let callbacks = unsafe { &mut *self.0.get() };
callbacks.remove(fd)
@@ -135,16 +130,6 @@ impl Callbacks {
}
}
fn get(&self, fd: usize) -> Option<&CompletionCallback> {
if let Some(pos) = self.find_inline(fd) {
let (_, callback) = unsafe { self.inline_entries[pos].assume_init_ref() };
return Some(callback);
} else if let Some(pos) = self.heap_entries.iter().position(|&(k, _)| k == fd) {
return Some(&self.heap_entries[pos].1);
}
None
}
fn remove(&mut self, fd: usize) -> Option<CompletionCallback> {
if let Some(pos) = self.find_inline(fd) {
let (_, callback) = unsafe { self.inline_entries[pos].assume_init_read() };
@@ -213,6 +198,35 @@ impl Clock for UnixIO {
}
}
fn try_pwritev_raw(
fd: RawFd,
off: u64,
bufs: &[Arc<RefCell<crate::Buffer>>],
start_idx: usize,
start_off: usize,
) -> std::io::Result<usize> {
const MAX_IOV: usize = 1024;
let iov_len = std::cmp::min(bufs.len() - start_idx, MAX_IOV);
let mut iov = Vec::with_capacity(iov_len);
for (i, b) in bufs.iter().enumerate().skip(start_idx).take(iov_len) {
let r = b.borrow(); // borrow just to get pointer/len
let s = r.as_slice();
let s = if i == start_idx { &s[start_off..] } else { s };
iov.push(libc::iovec {
iov_base: s.as_ptr() as *mut _,
iov_len: s.len(),
});
}
let n = unsafe { libc::pwritev(fd, iov.as_ptr(), iov.len() as i32, off as i64) };
if n < 0 {
Err(std::io::Error::last_os_error())
} else {
Ok(n as usize)
}
}
impl IO for UnixIO {
fn open_file(&self, path: &str, flags: OpenFlags, _direct: bool) -> Result<Arc<dyn File>> {
trace!("open_file(path = {})", path);
@@ -243,46 +257,129 @@ impl IO for UnixIO {
if self.callbacks.is_empty() {
return Ok(());
}
self.events.clear();
trace!("run_once() waits for events");
self.poller.wait(self.events.as_mut(), None)?;
for event in self.events.iter() {
if let Some(cf) = self.callbacks.get(event.key) {
let result = match cf {
CompletionCallback::Read(ref file, ref c, pos) => {
let file = file.lock().unwrap();
let r = c.as_read();
let mut buf = r.buf_mut();
rustix::io::pread(file.as_fd(), buf.as_mut_slice(), *pos as u64)
}
CompletionCallback::Write(ref file, _, ref buf, pos) => {
let file = file.lock().unwrap();
let buf = buf.borrow();
rustix::io::pwrite(file.as_fd(), buf.as_slice(), *pos as u64)
}
};
match result {
Ok(n) => {
let cf = self
.callbacks
.remove(event.key)
.expect("callback should exist");
match cf {
CompletionCallback::Read(_, c, _) => c.complete(0),
CompletionCallback::Write(_, c, _, _) => c.complete(n as i32),
}
}
Err(Errno::AGAIN) => (),
Err(e) => {
self.callbacks.remove(event.key);
let key = event.key;
let cb = match self.callbacks.remove(key) {
Some(cb) => cb,
None => continue, // could have been completed/removed already
};
trace!("run_once() error: {}", e);
return Err(e.into());
match cb {
CompletionCallback::Read(ref file, c, pos) => {
let f = file
.lock()
.map_err(|e| LimboError::LockingError(e.to_string()))?;
let r = c.as_read();
let mut buf = r.buf_mut();
match rustix::io::pread(f.as_fd(), buf.as_mut_slice(), pos as u64) {
Ok(n) => c.complete(n as i32),
Err(Errno::AGAIN) => {
// re-arm
unsafe { self.poller.as_mut().add(&f.as_fd(), Event::readable(key))? };
self.callbacks.as_mut().insert(
key,
CompletionCallback::Read(file.clone(), c.clone(), pos),
);
}
Err(e) => return Err(e.into()),
}
}
CompletionCallback::Write(ref file, c, buf, pos) => {
let f = file
.lock()
.map_err(|e| LimboError::LockingError(e.to_string()))?;
let b = buf.borrow();
match rustix::io::pwrite(f.as_fd(), b.as_slice(), pos as u64) {
Ok(n) => c.complete(n as i32),
Err(Errno::AGAIN) => {
unsafe { self.poller.as_mut().add(&f.as_fd(), Event::writable(key))? };
self.callbacks.as_mut().insert(
key,
CompletionCallback::Write(file.clone(), c, buf.clone(), pos),
);
}
Err(e) => return Err(e.into()),
}
}
CompletionCallback::Writev(file, c, bufs, mut pos, mut idx, mut off) => {
let f = file
.lock()
.map_err(|e| LimboError::LockingError(e.to_string()))?;
// keep trying until WouldBlock or we're done with this event
match try_pwritev_raw(f.as_raw_fd(), pos as u64, &bufs, idx, off) {
Ok(written) => {
// advance through buffers
let mut rem = written;
while rem > 0 {
let len = {
let r = bufs[idx].borrow();
r.len()
};
let left = len - off;
if rem < left {
off += rem;
rem = 0;
} else {
rem -= left;
idx += 1;
off = 0;
if idx == bufs.len() {
break;
}
}
}
pos += written;
if idx == bufs.len() {
c.complete(pos as i32);
} else {
// Not finished; re-arm and store updated state
unsafe {
self.poller.as_mut().add(&f.as_fd(), Event::writable(key))?
};
self.callbacks.as_mut().insert(
key,
CompletionCallback::Writev(
file.clone(),
c.clone(),
bufs,
pos,
idx,
off,
),
);
}
break;
}
Err(e) if e.kind() == ErrorKind::WouldBlock => {
// re-arm with same state
unsafe { self.poller.as_mut().add(&f.as_fd(), Event::writable(key))? };
self.callbacks.as_mut().insert(
key,
CompletionCallback::Writev(
file.clone(),
c.clone(),
bufs,
pos,
idx,
off,
),
);
break;
}
Err(e) => return Err(e.into()),
}
}
}
}
Ok(())
}
@@ -312,6 +409,14 @@ enum CompletionCallback {
Arc<RefCell<crate::Buffer>>,
usize,
),
Writev(
Arc<Mutex<std::fs::File>>,
Completion,
Vec<Arc<RefCell<crate::Buffer>>>,
usize, // absolute file offset
usize, // buf index
usize, // intra-buf offset
),
}
pub struct UnixFile<'io> {
@@ -431,6 +536,52 @@ impl File for UnixFile<'_> {
}
}
#[instrument(err, skip_all, level = Level::TRACE)]
fn pwritev(
&self,
pos: usize,
buffers: Vec<Arc<RefCell<crate::Buffer>>>,
c: Completion,
) -> Result<Completion> {
let file = self
.file
.lock()
.map_err(|e| LimboError::LockingError(e.to_string()))?;
match try_pwritev_raw(file.as_raw_fd(), pos as u64, &buffers, 0, 0) {
Ok(written) => {
trace!("pwritev wrote {written}");
c.complete(written as i32);
Ok(c)
}
Err(e) => {
if e.kind() == ErrorKind::WouldBlock {
trace!("pwritev blocks");
} else {
return Err(e.into());
}
// Set up state so we can resume later
let fd = file.as_raw_fd();
self.poller
.add(&file.as_fd(), Event::writable(fd as usize))?;
let buf_idx = 0;
let buf_offset = 0;
self.callbacks.insert(
fd as usize,
CompletionCallback::Writev(
self.file.clone(),
c.clone(),
buffers,
pos,
buf_idx,
buf_offset,
),
);
Ok(c)
}
}
}
#[instrument(err, skip_all, level = Level::TRACE)]
fn sync(&self, c: Completion) -> Result<Completion> {
let file = self.file.lock().unwrap();

View File

@@ -18,6 +18,7 @@ pub mod result;
mod schema;
#[cfg(feature = "series")]
mod series;
mod state_machine;
mod storage;
#[allow(dead_code)]
#[cfg(feature = "time")]
@@ -41,15 +42,12 @@ mod numeric;
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
use crate::storage::header_accessor::get_schema_cookie;
use crate::storage::sqlite3_ondisk::is_valid_page_size;
use crate::storage::{header_accessor, wal::DummyWAL};
use crate::translate::optimizer::optimize_plan;
use crate::translate::pragma::TURSO_CDC_DEFAULT_TABLE_NAME;
#[cfg(feature = "fs")]
use crate::types::WalInsertInfo;
#[cfg(feature = "fs")]
use crate::util::{IOExt, OpenMode, OpenOptions};
use crate::util::{OpenMode, OpenOptions};
use crate::vtab::VirtualTable;
use core::str;
pub use error::LimboError;
@@ -80,6 +78,7 @@ use std::{
use storage::database::DatabaseFile;
use storage::page_cache::DumbLruPageCache;
use storage::pager::{AtomicDbState, DbState};
use storage::sqlite3_ondisk::PageSize;
pub use storage::{
buffer_pool::BufferPool,
database::DatabaseStorage,
@@ -93,7 +92,7 @@ use turso_sqlite3_parser::{ast, ast::Cmd, lexer::sql::Parser};
use types::IOResult;
pub use types::RefValue;
pub use types::Value;
use util::parse_schema_rows;
use util::{parse_schema_rows, IOExt as _};
use vdbe::builder::QueryMode;
use vdbe::builder::TableRefIdCounter;
@@ -121,7 +120,7 @@ static DATABASE_MANAGER: LazyLock<Mutex<HashMap<String, Weak<Database>>>> =
/// The `Database` object contains per database file state that is shared
/// between multiple connections.
pub struct Database {
mv_store: Option<Rc<MvStore>>,
mv_store: Option<Arc<MvStore>>,
schema: Mutex<Arc<Schema>>,
db_file: Arc<dyn DatabaseStorage>,
path: String,
@@ -269,7 +268,7 @@ impl Database {
let maybe_shared_wal = WalFileShared::open_shared_if_exists(&io, wal_path.as_str())?;
let mv_store = if enable_mvcc {
Some(Rc::new(MvStore::new(
Some(Arc::new(MvStore::new(
mvcc::LocalClock::new(),
mvcc::persistent_storage::Storage::new_noop(),
)))
@@ -333,10 +332,17 @@ impl Database {
pub fn connect(self: &Arc<Database>) -> Result<Arc<Connection>> {
let pager = self.init_pager(None)?;
let page_size = header_accessor::get_page_size(&pager)
.unwrap_or(storage::sqlite3_ondisk::DEFAULT_PAGE_SIZE);
let default_cache_size = header_accessor::get_default_page_cache_size(&pager)
.unwrap_or(storage::sqlite3_ondisk::DEFAULT_CACHE_SIZE);
let page_size = pager
.io
.block(|| pager.with_header(|header| header.page_size))
.unwrap_or_default()
.get();
let default_cache_size = pager
.io
.block(|| pager.with_header(|header| header.default_page_cache_size))
.unwrap_or_default()
.get();
let conn = Arc::new(Connection {
_db: self.clone(),
@@ -391,7 +397,7 @@ impl Database {
)));
let pager = Pager::new(
self.db_file.clone(),
wal,
Some(wal),
self.io.clone(),
Arc::new(RwLock::new(DumbLruPageCache::default())),
buffer_pool.clone(),
@@ -403,12 +409,10 @@ impl Database {
let buffer_pool = Arc::new(BufferPool::new(page_size));
// No existing WAL; create one.
// TODO: currently Pager needs to be instantiated with some implementation of trait Wal, so here's a workaround.
let dummy_wal = Rc::new(RefCell::new(DummyWAL {}));
let db_state = self.db_state.clone();
let mut pager = Pager::new(
self.db_file.clone(),
dummy_wal,
None,
self.io.clone(),
Arc::new(RwLock::new(DumbLruPageCache::default())),
buffer_pool.clone(),
@@ -419,8 +423,11 @@ impl Database {
let size = match page_size {
Some(size) => size as u32,
None => {
let size = header_accessor::get_page_size(&pager)
.unwrap_or(storage::sqlite3_ondisk::DEFAULT_PAGE_SIZE);
let size = pager
.io
.block(|| pager.with_header(|header| header.page_size))
.unwrap_or_default()
.get();
buffer_pool.set_page_size(size as usize);
size
}
@@ -522,6 +529,10 @@ impl Database {
}
Ok(())
}
pub fn get_mv_store(&self) -> Option<&Arc<MvStore>> {
self.mv_store.as_ref()
}
}
fn get_schema_version(conn: &Arc<Connection>) -> Result<u32> {
@@ -807,10 +818,12 @@ impl Connection {
// first, quickly read schema_version from the root page in order to check if schema changed
pager.begin_read_tx()?;
let db_schema_version = get_schema_cookie(&pager);
let db_schema_version = pager
.io
.block(|| pager.with_header(|header| header.schema_cookie));
pager.end_read_tx().expect("read txn must be finished");
let db_schema_version = db_schema_version?;
let db_schema_version = db_schema_version?.get();
let conn_schema_version = self.schema.borrow().schema_version;
turso_assert!(
conn_schema_version <= db_schema_version,
@@ -838,7 +851,10 @@ impl Connection {
let mut fresh = Schema::new(false); // todo: indices!
// read cookie before consuming statement program - otherwise we can end up reading cookie with closed transaction state
let cookie = get_schema_cookie(&pager)?;
let cookie = pager
.io
.block(|| pager.with_header(|header| header.schema_cookie))?
.get();
// TODO: This function below is synchronous, make it async
parse_schema_rows(stmt, &mut fresh, &self.syms.borrow(), None)?;
@@ -1170,13 +1186,19 @@ impl Connection {
{
let pager = self.pager.borrow();
let Some(wal) = pager.wal.as_ref() else {
return Err(LimboError::InternalError(
"wal_insert_end called without a wal".to_string(),
));
};
{
let wal = pager.wal.borrow_mut();
let wal = wal.borrow_mut();
wal.end_write_tx();
wal.end_read_tx();
}
// remove all non-commited changes in case if WAL session left some suffix without commit frame
pager.rollback(false, self)?;
pager.rollback(false, self, true)?;
}
// let's re-parse schema from scratch if schema cookie changed compared to the our in-memory view of schema
@@ -1315,7 +1337,7 @@ impl Connection {
/// is first created, if it does not already exist when the page_size pragma is issued,
/// or at the next VACUUM command that is run on the same database connection while not in WAL mode.
pub fn reset_page_size(&self, size: u32) -> Result<()> {
if !is_valid_page_size(size) {
if PageSize::new(size).is_none() {
return Ok(());
}
@@ -1683,19 +1705,23 @@ impl Connection {
databases.sort_by_key(|&(seq, _, _)| seq);
databases
}
pub fn get_pager(&self) -> Rc<Pager> {
self.pager.borrow().clone()
}
}
pub struct Statement {
program: Rc<vdbe::Program>,
state: vdbe::ProgramState,
mv_store: Option<Rc<MvStore>>,
mv_store: Option<Arc<MvStore>>,
pager: Rc<Pager>,
}
impl Statement {
pub fn new(
program: Rc<vdbe::Program>,
mv_store: Option<Rc<MvStore>>,
mv_store: Option<Arc<MvStore>>,
pager: Rc<Pager>,
) -> Self {
let state = vdbe::ProgramState::new(program.max_registers, program.cursor_ref.len());

View File

@@ -1,7 +1,10 @@
use crate::mvcc::clock::LogicalClock;
use crate::mvcc::database::{MvStore, Result, Row, RowID};
use crate::mvcc::database::{MvStore, Row, RowID};
use crate::Pager;
use crate::Result;
use std::fmt::Debug;
use std::rc::Rc;
use std::sync::Arc;
#[derive(Debug, Copy, Clone)]
enum CursorPosition {
@@ -14,20 +17,27 @@ enum CursorPosition {
}
#[derive(Debug)]
pub struct MvccLazyCursor<Clock: LogicalClock> {
pub db: Rc<MvStore<Clock>>,
pub db: Arc<MvStore<Clock>>,
current_pos: CursorPosition,
table_id: u64,
tx_id: u64,
}
impl<Clock: LogicalClock> MvccLazyCursor<Clock> {
pub fn new(db: Rc<MvStore<Clock>>, tx_id: u64, table_id: u64) -> Result<MvccLazyCursor<Clock>> {
Ok(Self {
pub fn new(
db: Arc<MvStore<Clock>>,
tx_id: u64,
table_id: u64,
pager: Rc<Pager>,
) -> Result<MvccLazyCursor<Clock>> {
db.maybe_initialize_table(table_id, pager)?;
let cursor = Self {
db,
tx_id,
current_pos: CursorPosition::BeforeFirst,
table_id,
})
};
Ok(cursor)
}
/// Insert a row into the table.
@@ -40,18 +50,37 @@ impl<Clock: LogicalClock> MvccLazyCursor<Clock> {
Ok(())
}
pub fn current_row_id(&self) -> Option<RowID> {
pub fn current_row_id(&mut self) -> Option<RowID> {
match self.current_pos {
CursorPosition::Loaded(id) => Some(id),
CursorPosition::BeforeFirst => None,
CursorPosition::BeforeFirst => {
// If we are before first, we need to try and find the first row.
let maybe_rowid = self.db.get_next_row_id_for_table(self.table_id, i64::MIN);
if let Some(id) = maybe_rowid {
self.current_pos = CursorPosition::Loaded(id);
Some(id)
} else {
self.current_pos = CursorPosition::BeforeFirst;
None
}
}
CursorPosition::End => None,
}
}
pub fn current_row(&self) -> Result<Option<Row>> {
pub fn current_row(&mut self) -> Result<Option<Row>> {
match self.current_pos {
CursorPosition::Loaded(id) => self.db.read(self.tx_id, id),
CursorPosition::BeforeFirst => Ok(None),
CursorPosition::BeforeFirst => {
// If we are before first, we need to try and find the first row.
let maybe_rowid = self.db.get_next_row_id_for_table(self.table_id, i64::MIN);
if let Some(id) = maybe_rowid {
self.current_pos = CursorPosition::Loaded(id);
self.db.read(self.tx_id, id)
} else {
Ok(None)
}
}
CursorPosition::End => Ok(None),
}
}
@@ -65,7 +94,8 @@ impl<Clock: LogicalClock> MvccLazyCursor<Clock> {
let before_first = matches!(self.current_pos, CursorPosition::BeforeFirst);
let min_id = match self.current_pos {
CursorPosition::Loaded(id) => id.row_id + 1,
CursorPosition::BeforeFirst => i64::MIN, // we need to find first row, so we look from the first id
// TODO: do we need to forward twice?
CursorPosition::BeforeFirst => i64::MIN, // we need to find first row, so we look from the first id,
CursorPosition::End => {
// let's keep same state, we reached the end so no point in moving forward.
return false;

View File

@@ -1,18 +1,30 @@
use crate::mvcc::clock::LogicalClock;
use crate::mvcc::errors::DatabaseError;
use crate::mvcc::persistent_storage::Storage;
use crate::state_machine::StateMachine;
use crate::state_machine::StateTransition;
use crate::state_machine::TransitionResult;
use crate::storage::btree::BTreeCursor;
use crate::storage::btree::BTreeKey;
use crate::types::IOResult;
use crate::types::ImmutableRecord;
use crate::LimboError;
use crate::Result;
use crate::{Connection, Pager};
use crossbeam_skiplist::{SkipMap, SkipSet};
use parking_lot::RwLock;
use std::collections::HashSet;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::rc::Rc;
use std::sync::atomic::{AtomicU64, Ordering};
pub type Result<T> = std::result::Result<T, DatabaseError>;
use std::sync::Arc;
#[cfg(test)]
mod tests;
pub mod tests;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RowID {
/// The table ID. Analogous to table's root page number.
pub table_id: u64,
pub row_id: i64,
}
@@ -28,11 +40,16 @@ impl RowID {
pub struct Row {
pub id: RowID,
pub data: Vec<u8>,
pub column_count: usize,
}
impl Row {
pub fn new(id: RowID, data: Vec<u8>) -> Self {
Self { id, data }
pub fn new(id: RowID, data: Vec<u8>, column_count: usize) -> Self {
Self {
id,
data,
column_count,
}
}
}
@@ -221,6 +238,444 @@ impl AtomicTransactionState {
}
}
#[derive(Debug)]
pub enum CommitState {
Initial,
BeginPagerTxn { end_ts: u64 },
WriteRow { end_ts: u64, write_set_index: usize },
WriteRowStateMachine { end_ts: u64, write_set_index: usize },
CommitPagerTxn { end_ts: u64 },
Commit { end_ts: u64 },
}
#[derive(Debug)]
pub enum WriteRowState {
Initial,
CreateCursor,
Seek,
Insert,
}
pub struct CommitStateMachine<Clock: LogicalClock> {
state: CommitState,
is_finalized: bool,
pager: Rc<Pager>,
tx_id: TxID,
connection: Arc<Connection>,
write_set: Vec<RowID>,
write_row_state_machine: Option<StateMachine<WriteRowStateMachine>>,
_phantom: PhantomData<Clock>,
}
pub struct WriteRowStateMachine {
state: WriteRowState,
is_finalized: bool,
pager: Rc<Pager>,
row: Row,
record: Option<ImmutableRecord>,
cursor: Option<BTreeCursor>,
}
impl<Clock: LogicalClock> CommitStateMachine<Clock> {
fn new(state: CommitState, pager: Rc<Pager>, tx_id: TxID, connection: Arc<Connection>) -> Self {
Self {
state,
is_finalized: false,
pager,
tx_id,
connection,
write_set: Vec::new(),
write_row_state_machine: None,
_phantom: PhantomData,
}
}
}
impl WriteRowStateMachine {
fn new(pager: Rc<Pager>, row: Row) -> Self {
Self {
state: WriteRowState::Initial,
is_finalized: false,
pager,
row,
record: None,
cursor: None,
}
}
}
impl<Clock: LogicalClock> StateTransition for CommitStateMachine<Clock> {
type State = CommitStateMachine<Clock>;
type Context = MvStore<Clock>;
type SMResult = ();
#[tracing::instrument(fields(state = ?self.state), skip(self, mvcc_store))]
fn step(&mut self, mvcc_store: &Self::Context) -> Result<TransitionResult<Self::SMResult>> {
match self.state {
CommitState::Initial => {
let end_ts = mvcc_store.get_timestamp();
// NOTICE: the first shadowed tx keeps the entry alive in the map
// for the duration of this whole function, which is important for correctness!
let tx = mvcc_store
.txs
.get(&self.tx_id)
.ok_or(LimboError::TxTerminated)?;
let tx = tx.value().write();
match tx.state.load() {
TransactionState::Terminated => {
return Err(LimboError::TxTerminated);
}
_ => {
assert_eq!(tx.state, TransactionState::Active);
}
}
tx.state.store(TransactionState::Preparing);
tracing::trace!("prepare_tx(tx_id={})", self.tx_id);
/* TODO: The code we have here is sufficient for snapshot isolation.
** In order to implement serializability, we need the following steps:
**
** 1. Validate if all read versions are still visible by inspecting the read_set
** 2. Validate if there are no phantoms by walking the scans from scan_set (which we don't even have yet)
** - a phantom is a version that became visible in the middle of our transaction,
** but wasn't taken into account during one of the scans from the scan_set
** 3. Wait for commit dependencies, which we don't even track yet...
** Excerpt from what's a commit dependency and how it's tracked in the original paper:
** """
A transaction T1 has a commit dependency on another transaction
T2, if T1 is allowed to commit only if T2 commits. If T2 aborts,
T1 must also abort, so cascading aborts are possible. T1 acquires a
commit dependency either by speculatively reading or speculatively ignoring a version,
instead of waiting for T2 to commit.
We implement commit dependencies by a register-and-report
approach: T1 registers its dependency with T2 and T2 informs T1
when it has committed or aborted. Each transaction T contains a
counter, CommitDepCounter, that counts how many unresolved
commit dependencies it still has. A transaction cannot commit
until this counter is zero. In addition, T has a Boolean variable
AbortNow that other transactions can set to tell T to abort. Each
transaction T also has a set, CommitDepSet, that stores transaction IDs
of the transactions that depend on T.
To take a commit dependency on a transaction T2, T1 increments
its CommitDepCounter and adds its transaction ID to T2s CommitDepSet.
When T2 has committed, it locates each transaction in
its CommitDepSet and decrements their CommitDepCounter. If
T2 aborted, it tells the dependent transactions to also abort by
setting their AbortNow flags. If a dependent transaction is not
found, this means that it has already aborted.
Note that a transaction with commit dependencies may not have to
wait at all - the dependencies may have been resolved before it is
ready to commit. Commit dependencies consolidate all waits into
a single wait and postpone the wait to just before commit.
Some transactions may have to wait before commit.
Waiting raises a concern of deadlocks.
However, deadlocks cannot occur because an older transaction never
waits on a younger transaction. In
a wait-for graph the direction of edges would always be from a
younger transaction (higher end timestamp) to an older transaction
(lower end timestamp) so cycles are impossible.
"""
** If you're wondering when a speculative read happens, here you go:
** Case 1: speculative read of TB:
"""
If transaction TB is in the Preparing state, it has acquired an end
timestamp TS which will be Vs begin timestamp if TB commits.
A safe approach in this situation would be to have transaction T
wait until transaction TB commits. However, we want to avoid all
blocking during normal processing so instead we continue with
the visibility test and, if the test returns true, allow T to
speculatively read V. Transaction T acquires a commit dependency on
TB, restricting the serialization order of the two transactions. That
is, T is allowed to commit only if TB commits.
"""
** Case 2: speculative ignore of TE:
"""
If TEs state is Preparing, it has an end timestamp TS that will become
the end timestamp of V if TE does commit. If TS is greater than the read
time RT, it is obvious that V will be visible if TE commits. If TE
aborts, V will still be visible, because any transaction that updates
V after TE has aborted will obtain an end timestamp greater than
TS. If TS is less than RT, we have a more complicated situation:
if TE commits, V will not be visible to T but if TE aborts, it will
be visible. We could handle this by forcing T to wait until TE
commits or aborts but we want to avoid all blocking during normal processing.
Instead we allow T to speculatively ignore V and
proceed with its processing. Transaction T acquires a commit
dependency (see Section 2.7) on TE, that is, T is allowed to commit
only if TE commits.
"""
*/
tx.state.store(TransactionState::Committed(end_ts));
tracing::trace!("commit_tx(tx_id={})", self.tx_id);
self.write_set
.extend(tx.write_set.iter().map(|v| *v.value()));
self.state = CommitState::BeginPagerTxn { end_ts };
Ok(TransitionResult::Continue)
}
CommitState::BeginPagerTxn { end_ts } => {
// FIXME: how do we deal with multiple concurrent writes?
// WAL requires a txn to be written sequentially. Either we:
// 1. Wait for currently writer to finish before second txn starts.
// 2. Choose a txn to write depending on some heuristics like amount of frames will be written.
// 3. ..
//
loop {
match self.pager.begin_write_tx() {
Ok(crate::types::IOResult::Done(result)) => {
if let crate::result::LimboResult::Busy = result {
return Err(LimboError::InternalError(
"Pager write transaction busy".to_string(),
));
}
break;
}
Ok(crate::types::IOResult::IO) => {
// FIXME: this is a hack to make the pager run the IO loop
self.pager.io.run_once().unwrap();
continue;
}
Err(e) => {
return Err(LimboError::InternalError(e.to_string()));
}
}
}
self.state = CommitState::WriteRow {
end_ts,
write_set_index: 0,
};
return Ok(TransitionResult::Continue);
}
CommitState::WriteRow {
end_ts,
write_set_index,
} => {
if write_set_index == self.write_set.len() {
self.state = CommitState::CommitPagerTxn { end_ts };
return Ok(TransitionResult::Continue);
}
let id = &self.write_set[write_set_index];
if let Some(row_versions) = mvcc_store.rows.get(id) {
let row_versions = row_versions.value().read();
// Find rows that were written by this transaction
for row_version in row_versions.iter() {
if let TxTimestampOrID::TxID(row_tx_id) = row_version.begin {
if row_tx_id == self.tx_id {
let state_machine = mvcc_store
.write_row_to_pager(self.pager.clone(), &row_version.row)?;
self.write_row_state_machine = Some(state_machine);
self.state = CommitState::WriteRowStateMachine {
end_ts,
write_set_index,
};
break;
}
}
if let Some(TxTimestampOrID::Timestamp(row_tx_id)) = row_version.end {
if row_tx_id == self.tx_id {
let state_machine = mvcc_store
.write_row_to_pager(self.pager.clone(), &row_version.row)?;
self.write_row_state_machine = Some(state_machine);
self.state = CommitState::WriteRowStateMachine {
end_ts,
write_set_index,
};
break;
}
}
}
}
Ok(TransitionResult::Continue)
}
CommitState::WriteRowStateMachine {
end_ts,
write_set_index,
} => {
let write_row_state_machine = self.write_row_state_machine.as_mut().unwrap();
match write_row_state_machine.step(&())? {
TransitionResult::Io => return Ok(TransitionResult::Io),
TransitionResult::Continue => {
return Ok(TransitionResult::Continue);
}
TransitionResult::Done(_) => {
self.state = CommitState::WriteRow {
end_ts,
write_set_index: write_set_index + 1,
};
return Ok(TransitionResult::Continue);
}
}
}
CommitState::CommitPagerTxn { end_ts } => {
// Write committed data to pager for persistence
// Flush dirty pages to WAL - this is critical for data persistence
// Similar to what step_end_write_txn does for legacy transactions
loop {
let result = self
.pager
.end_tx(
false, // rollback = false since we're committing
false, // schema_did_change = false for now (could be improved)
&self.connection,
self.connection.wal_checkpoint_disabled.get(),
)
.map_err(|e| LimboError::InternalError(e.to_string()))
.unwrap();
if let crate::types::IOResult::Done(_) = result {
break;
}
}
self.state = CommitState::Commit { end_ts };
Ok(TransitionResult::Continue)
}
CommitState::Commit { end_ts } => {
let mut log_record = LogRecord::new(end_ts);
for id in &self.write_set {
if let Some(row_versions) = mvcc_store.rows.get(id) {
let mut row_versions = row_versions.value().write();
for row_version in row_versions.iter_mut() {
if let TxTimestampOrID::TxID(id) = row_version.begin {
if id == self.tx_id {
// New version is valid STARTING FROM committing transaction's end timestamp
// See diagram on page 299: https://www.cs.cmu.edu/~15721-f24/papers/Hekaton.pdf
row_version.begin = TxTimestampOrID::Timestamp(end_ts);
mvcc_store.insert_version_raw(
&mut log_record.row_versions,
row_version.clone(),
); // FIXME: optimize cloning out
}
}
if let Some(TxTimestampOrID::TxID(id)) = row_version.end {
if id == self.tx_id {
// Old version is valid UNTIL committing transaction's end timestamp
// See diagram on page 299: https://www.cs.cmu.edu/~15721-f24/papers/Hekaton.pdf
row_version.end = Some(TxTimestampOrID::Timestamp(end_ts));
mvcc_store.insert_version_raw(
&mut log_record.row_versions,
row_version.clone(),
); // FIXME: optimize cloning out
}
}
}
}
}
tracing::trace!("updated(tx_id={})", self.tx_id);
// We have now updated all the versions with a reference to the
// transaction ID to a timestamp and can, therefore, remove the
// transaction. Please note that when we move to lockless, the
// invariant doesn't necessarily hold anymore because another thread
// might have speculatively read a version that we want to remove.
// But that's a problem for another day.
// FIXME: it actually just become a problem for today!!!
// TODO: test that reproduces this failure, and then a fix
mvcc_store.txs.remove(&self.tx_id);
if !log_record.row_versions.is_empty() {
mvcc_store.storage.log_tx(log_record)?;
}
tracing::trace!("logged(tx_id={})", self.tx_id);
self.finalize(mvcc_store)?;
Ok(TransitionResult::Done(()))
}
}
}
fn finalize(&mut self, _context: &Self::Context) -> Result<()> {
self.is_finalized = true;
Ok(())
}
fn is_finalized(&self) -> bool {
self.is_finalized
}
}
impl StateTransition for WriteRowStateMachine {
type State = WriteRowStateMachine;
type Context = ();
type SMResult = ();
#[tracing::instrument(fields(state = ?self.state), skip(self, _context))]
fn step(&mut self, _context: &Self::Context) -> Result<TransitionResult<Self::SMResult>> {
use crate::storage::btree::BTreeCursor;
use crate::types::{IOResult, SeekKey, SeekOp};
match self.state {
WriteRowState::Initial => {
// Create the record and key
let mut record = ImmutableRecord::new(self.row.data.len());
record.start_serialization(&self.row.data);
self.record = Some(record);
self.state = WriteRowState::CreateCursor;
Ok(TransitionResult::Continue)
}
WriteRowState::CreateCursor => {
// Create the cursor
let root_page = self.row.id.table_id as usize;
let num_columns = self.row.column_count;
let cursor = BTreeCursor::new_table(
None, // Write directly to B-tree
self.pager.clone(),
root_page,
num_columns,
);
self.cursor = Some(cursor);
self.state = WriteRowState::Seek;
Ok(TransitionResult::Continue)
}
WriteRowState::Seek => {
// Position the cursor by seeking to the row position
let seek_key = SeekKey::TableRowId(self.row.id.row_id);
let cursor = self.cursor.as_mut().unwrap();
match cursor.seek(seek_key, SeekOp::GE { eq_only: true })? {
IOResult::Done(_) => {
self.state = WriteRowState::Insert;
Ok(TransitionResult::Continue)
}
IOResult::IO => {
return Ok(TransitionResult::Io);
}
}
}
WriteRowState::Insert => {
// Insert the record into the B-tree
let cursor = self.cursor.as_mut().unwrap();
let key = BTreeKey::new_table_rowid(self.row.id.row_id, self.record.as_ref());
match cursor
.insert(&key, true)
.map_err(|e| LimboError::InternalError(e.to_string()))?
{
IOResult::Done(()) => {
tracing::trace!(
"write_row_to_pager(table_id={}, row_id={})",
self.row.id.table_id,
self.row.id.row_id
);
self.finalize(&())?;
Ok(TransitionResult::Done(()))
}
IOResult::IO => {
return Ok(TransitionResult::Io);
}
}
}
}
}
fn finalize(&mut self, _context: &Self::Context) -> Result<()> {
self.is_finalized = true;
Ok(())
}
fn is_finalized(&self) -> bool {
self.is_finalized
}
}
/// A multi-version concurrency control database.
#[derive(Debug)]
pub struct MvStore<Clock: LogicalClock> {
@@ -230,6 +685,7 @@ pub struct MvStore<Clock: LogicalClock> {
next_rowid: AtomicU64,
clock: Clock,
storage: Storage,
loaded_tables: RwLock<HashSet<u64>>,
}
impl<Clock: LogicalClock> MvStore<Clock> {
@@ -242,6 +698,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
next_rowid: AtomicU64::new(0), // TODO: determine this from B-Tree
clock,
storage,
loaded_tables: RwLock::new(HashSet::new()),
}
}
@@ -264,7 +721,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
let tx = self
.txs
.get(&tx_id)
.ok_or(DatabaseError::NoSuchTransactionID(tx_id))?;
.ok_or(LimboError::NoSuchTransactionID(tx_id.to_string()))?;
let mut tx = tx.value().write();
assert_eq!(tx.state, TransactionState::Active);
let id = row.id;
@@ -297,9 +754,9 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// # Returns
///
/// Returns `true` if the row was successfully updated, and `false` otherwise.
pub fn update(&self, tx_id: TxID, row: Row) -> Result<bool> {
pub fn update(&self, tx_id: TxID, row: Row, pager: Rc<Pager>) -> Result<bool> {
tracing::trace!("update(tx_id={}, row.id={:?})", tx_id, row.id);
if !self.delete(tx_id, row.id)? {
if !self.delete(tx_id, row.id, pager)? {
return Ok(false);
}
self.insert(tx_id, row)?;
@@ -308,9 +765,9 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// Inserts a row in the database with new values, previously deleting
/// any old data if it existed. Bails on a delete error, e.g. write-write conflict.
pub fn upsert(&self, tx_id: TxID, row: Row) -> Result<()> {
pub fn upsert(&self, tx_id: TxID, row: Row, pager: Rc<Pager>) -> Result<()> {
tracing::trace!("upsert(tx_id={}, row.id={:?})", tx_id, row.id);
self.delete(tx_id, row.id)?;
self.delete(tx_id, row.id, pager)?;
self.insert(tx_id, row)
}
@@ -328,7 +785,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
///
/// Returns `true` if the row was successfully deleted, and `false` otherwise.
///
pub fn delete(&self, tx_id: TxID, id: RowID) -> Result<bool> {
pub fn delete(&self, tx_id: TxID, id: RowID, pager: Rc<Pager>) -> Result<bool> {
tracing::trace!("delete(tx_id={}, id={:?})", tx_id, id);
let row_versions_opt = self.rows.get(&id);
if let Some(ref row_versions) = row_versions_opt {
@@ -337,7 +794,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
let tx = self
.txs
.get(&tx_id)
.ok_or(DatabaseError::NoSuchTransactionID(tx_id))?;
.ok_or(LimboError::NoSuchTransactionID(tx_id.to_string()))?;
let tx = tx.value().read();
assert_eq!(tx.state, TransactionState::Active);
// A transaction cannot delete a version that it cannot see,
@@ -349,8 +806,8 @@ impl<Clock: LogicalClock> MvStore<Clock> {
drop(row_versions);
drop(row_versions_opt);
drop(tx);
self.rollback_tx(tx_id);
return Err(DatabaseError::WriteWriteConflict);
self.rollback_tx(tx_id, pager);
return Err(LimboError::WriteWriteConflict);
}
rv.end = Some(TxTimestampOrID::TxID(tx.tx_id));
@@ -360,7 +817,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
let tx = self
.txs
.get(&tx_id)
.ok_or(DatabaseError::NoSuchTransactionID(tx_id))?;
.ok_or(LimboError::NoSuchTransactionID(tx_id.to_string()))?;
let mut tx = tx.value().write();
tx.insert_to_write_set(id);
return Ok(true);
@@ -409,24 +866,6 @@ impl<Clock: LogicalClock> MvStore<Clock> {
Ok(keys.collect())
}
/// Gets all row ids in the database for a given table.
pub fn scan_row_ids_for_table(&self, table_id: u64) -> Result<Vec<RowID>> {
tracing::trace!("scan_row_ids_for_table(table_id={})", table_id);
Ok(self
.rows
.range(
RowID {
table_id,
row_id: 0,
}..RowID {
table_id,
row_id: i64::MAX,
},
)
.map(|entry| *entry.key())
.collect())
}
pub fn get_row_id_range(
&self,
table_id: u64,
@@ -484,12 +923,16 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// This function starts a new transaction in the database and returns a `TxID` value
/// that you can use to perform operations within the transaction. All changes made within the
/// transaction are isolated from other transactions until you commit the transaction.
pub fn begin_tx(&self) -> TxID {
pub fn begin_tx(&self, pager: Rc<Pager>) -> TxID {
let tx_id = self.get_tx_id();
let begin_ts = self.get_timestamp();
let tx = Transaction::new(tx_id, begin_ts);
tracing::trace!("begin_tx(tx_id={})", tx_id);
self.txs.insert(tx_id, RwLock::new(tx));
// TODO: we need to tie a pager's read transaction to a transaction ID, so that future refactors to read
// pages from WAL/DB read from a consistent state to maintiain snapshot isolation.
pager.begin_read_tx().unwrap();
tx_id
}
@@ -502,145 +945,18 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// # Arguments
///
/// * `tx_id` - The ID of the transaction to commit.
pub fn commit_tx(&self, tx_id: TxID) -> Result<()> {
let end_ts = self.get_timestamp();
// NOTICE: the first shadowed tx keeps the entry alive in the map
// for the duration of this whole function, which is important for correctness!
let tx = self.txs.get(&tx_id).ok_or(DatabaseError::TxTerminated)?;
let tx = tx.value().write();
match tx.state.load() {
TransactionState::Terminated => return Err(DatabaseError::TxTerminated),
_ => {
assert_eq!(tx.state, TransactionState::Active);
}
}
tx.state.store(TransactionState::Preparing);
tracing::trace!("prepare_tx(tx_id={})", tx_id);
/* TODO: The code we have here is sufficient for snapshot isolation.
** In order to implement serializability, we need the following steps:
**
** 1. Validate if all read versions are still visible by inspecting the read_set
** 2. Validate if there are no phantoms by walking the scans from scan_set (which we don't even have yet)
** - a phantom is a version that became visible in the middle of our transaction,
** but wasn't taken into account during one of the scans from the scan_set
** 3. Wait for commit dependencies, which we don't even track yet...
** Excerpt from what's a commit dependency and how it's tracked in the original paper:
** """
A transaction T1 has a commit dependency on another transaction
T2, if T1 is allowed to commit only if T2 commits. If T2 aborts,
T1 must also abort, so cascading aborts are possible. T1 acquires a
commit dependency either by speculatively reading or speculatively ignoring a version,
instead of waiting for T2 to commit.
We implement commit dependencies by a register-and-report
approach: T1 registers its dependency with T2 and T2 informs T1
when it has committed or aborted. Each transaction T contains a
counter, CommitDepCounter, that counts how many unresolved
commit dependencies it still has. A transaction cannot commit
until this counter is zero. In addition, T has a Boolean variable
AbortNow that other transactions can set to tell T to abort. Each
transaction T also has a set, CommitDepSet, that stores transaction IDs
of the transactions that depend on T.
To take a commit dependency on a transaction T2, T1 increments
its CommitDepCounter and adds its transaction ID to T2s CommitDepSet.
When T2 has committed, it locates each transaction in
its CommitDepSet and decrements their CommitDepCounter. If
T2 aborted, it tells the dependent transactions to also abort by
setting their AbortNow flags. If a dependent transaction is not
found, this means that it has already aborted.
Note that a transaction with commit dependencies may not have to
wait at all - the dependencies may have been resolved before it is
ready to commit. Commit dependencies consolidate all waits into
a single wait and postpone the wait to just before commit.
Some transactions may have to wait before commit.
Waiting raises a concern of deadlocks.
However, deadlocks cannot occur because an older transaction never
waits on a younger transaction. In
a wait-for graph the direction of edges would always be from a
younger transaction (higher end timestamp) to an older transaction
(lower end timestamp) so cycles are impossible.
"""
** If you're wondering when a speculative read happens, here you go:
** Case 1: speculative read of TB:
"""
If transaction TB is in the Preparing state, it has acquired an end
timestamp TS which will be Vs begin timestamp if TB commits.
A safe approach in this situation would be to have transaction T
wait until transaction TB commits. However, we want to avoid all
blocking during normal processing so instead we continue with
the visibility test and, if the test returns true, allow T to
speculatively read V. Transaction T acquires a commit dependency on
TB, restricting the serialization order of the two transactions. That
is, T is allowed to commit only if TB commits.
"""
** Case 2: speculative ignore of TE:
"""
If TEs state is Preparing, it has an end timestamp TS that will become
the end timestamp of V if TE does commit. If TS is greater than the read
time RT, it is obvious that V will be visible if TE commits. If TE
aborts, V will still be visible, because any transaction that updates
V after TE has aborted will obtain an end timestamp greater than
TS. If TS is less than RT, we have a more complicated situation:
if TE commits, V will not be visible to T but if TE aborts, it will
be visible. We could handle this by forcing T to wait until TE
commits or aborts but we want to avoid all blocking during normal processing.
Instead we allow T to speculatively ignore V and
proceed with its processing. Transaction T acquires a commit
dependency (see Section 2.7) on TE, that is, T is allowed to commit
only if TE commits.
"""
*/
tx.state.store(TransactionState::Committed(end_ts));
tracing::trace!("commit_tx(tx_id={})", tx_id);
let write_set: Vec<RowID> = tx.write_set.iter().map(|v| *v.value()).collect();
drop(tx);
// Postprocessing: inserting row versions and logging the transaction to persistent storage.
// TODO: we should probably save to persistent storage first, and only then update the in-memory structures.
let mut log_record = LogRecord::new(end_ts);
for ref id in write_set {
if let Some(row_versions) = self.rows.get(id) {
let mut row_versions = row_versions.value().write();
for row_version in row_versions.iter_mut() {
if let TxTimestampOrID::TxID(id) = row_version.begin {
if id == tx_id {
// New version is valid STARTING FROM committing transaction's end timestamp
// See diagram on page 299: https://www.cs.cmu.edu/~15721-f24/papers/Hekaton.pdf
row_version.begin = TxTimestampOrID::Timestamp(end_ts);
self.insert_version_raw(
&mut log_record.row_versions,
row_version.clone(),
); // FIXME: optimize cloning out
}
}
if let Some(TxTimestampOrID::TxID(id)) = row_version.end {
if id == tx_id {
// Old version is valid UNTIL committing transaction's end timestamp
// See diagram on page 299: https://www.cs.cmu.edu/~15721-f24/papers/Hekaton.pdf
row_version.end = Some(TxTimestampOrID::Timestamp(end_ts));
self.insert_version_raw(
&mut log_record.row_versions,
row_version.clone(),
); // FIXME: optimize cloning out
}
}
}
}
}
tracing::trace!("updated(tx_id={})", tx_id);
// We have now updated all the versions with a reference to the
// transaction ID to a timestamp and can, therefore, remove the
// transaction. Please note that when we move to lockless, the
// invariant doesn't necessarily hold anymore because another thread
// might have speculatively read a version that we want to remove.
// But that's a problem for another day.
// FIXME: it actually just become a problem for today!!!
// TODO: test that reproduces this failure, and then a fix
self.txs.remove(&tx_id);
if !log_record.row_versions.is_empty() {
self.storage.log_tx(log_record)?;
}
tracing::trace!("logged(tx_id={})", tx_id);
Ok(())
pub fn commit_tx(
&self,
tx_id: TxID,
pager: Rc<Pager>,
connection: &Arc<Connection>,
) -> Result<StateMachine<CommitStateMachine<Clock>>> {
let state_machine: StateMachine<CommitStateMachine<Clock>> = StateMachine::<
CommitStateMachine<Clock>,
>::new(
CommitStateMachine::new(CommitState::Initial, pager, tx_id, connection.clone()),
);
Ok(state_machine)
}
/// Rolls back a transaction with the specified ID.
@@ -651,7 +967,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// # Arguments
///
/// * `tx_id` - The ID of the transaction to abort.
pub fn rollback_tx(&self, tx_id: TxID) {
pub fn rollback_tx(&self, tx_id: TxID, pager: Rc<Pager>) {
let tx_unlocked = self.txs.get(&tx_id).unwrap();
let tx = tx_unlocked.value().write();
assert_eq!(tx.state, TransactionState::Active);
@@ -673,6 +989,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
let tx = tx_unlocked.value().read();
tx.state.store(TransactionState::Terminated);
tracing::trace!("terminate(tx_id={})", tx_id);
pager.end_read_tx().unwrap();
// FIXME: verify that we can already remove the transaction here!
// Maybe it's fine for snapshot isolation, but too early for serializable?
self.txs.remove(&tx_id);
@@ -776,7 +1093,7 @@ impl<Clock: LogicalClock> MvStore<Clock> {
/// Inserts a new row version into the internal data structure for versions,
/// while making sure that the row version is inserted in the correct order.
fn insert_version_raw(&self, versions: &mut Vec<RowVersion>, row_version: RowVersion) {
pub fn insert_version_raw(&self, versions: &mut Vec<RowVersion>, row_version: RowVersion) {
// NOTICE: this is an insert a'la insertion sort, with pessimistic linear complexity.
// However, we expect the number of versions to be nearly sorted, so we deem it worthy
// to search linearly for the insertion point instead of paying the price of using
@@ -798,6 +1115,115 @@ impl<Clock: LogicalClock> MvStore<Clock> {
}
versions.insert(position, row_version);
}
pub fn write_row_to_pager(
&self,
pager: Rc<Pager>,
row: &Row,
) -> Result<StateMachine<WriteRowStateMachine>> {
let state_machine: StateMachine<WriteRowStateMachine> =
StateMachine::<WriteRowStateMachine>::new(WriteRowStateMachine::new(
pager,
row.clone(),
));
Ok(state_machine)
}
/// Try to scan for row ids in the table.
///
/// This function loads all row ids of a table if the rowids of table were not populated yet.
/// TODO: This is quite expensive so we should try and load rowids in a lazy way.
///
/// # Arguments
///
pub fn maybe_initialize_table(&self, table_id: u64, pager: Rc<Pager>) -> Result<()> {
tracing::trace!("scan_row_ids_for_table(table_id={})", table_id);
// First, check if the table is already loaded.
if self.loaded_tables.read().contains(&table_id) {
return Ok(());
}
// Then, scan the disk B-tree to find existing rows
self.scan_load_table(table_id, pager)?;
self.loaded_tables.write().insert(table_id);
Ok(())
}
/// Scans the table and inserts the rows into the database.
///
/// This is initialization step for a table, where we still don't have any rows so we need to insert them if there are.
fn scan_load_table(&self, table_id: u64, pager: Rc<Pager>) -> Result<()> {
let root_page = table_id as usize;
let mut cursor = BTreeCursor::new_table(
None, // No MVCC cursor for scanning
pager.clone(),
root_page,
1, // We'll adjust this as needed
);
loop {
match cursor
.rewind()
.map_err(|e| LimboError::InternalError(e.to_string()))?
{
IOResult::Done(()) => break,
IOResult::IO => {
pager.io.run_once().unwrap();
continue;
}
}
}
loop {
let rowid_result = cursor
.rowid()
.map_err(|e| LimboError::InternalError(e.to_string()))?;
let row_id = match rowid_result {
IOResult::Done(Some(row_id)) => row_id,
IOResult::Done(None) => break,
IOResult::IO => {
pager.io.run_once().unwrap();
continue;
}
};
match cursor
.record()
.map_err(|e| LimboError::InternalError(e.to_string()))?
{
IOResult::Done(Some(record)) => {
let id = RowID { table_id, row_id };
let column_count = record.column_count();
// We insert row with 0 timestamp, because it's the only version we have on initialization.
self.insert_version(
id,
RowVersion {
begin: TxTimestampOrID::Timestamp(0),
end: None,
row: Row::new(id, record.get_payload().to_vec(), column_count),
},
);
}
IOResult::Done(None) => break,
IOResult::IO => unreachable!(), // FIXME: lazy me not wanting to do state machine right now
}
// Move to next record
match cursor
.next()
.map_err(|e| LimboError::InternalError(e.to_string()))?
{
IOResult::Done(has_next) => {
if !has_next {
break;
}
}
IOResult::IO => unreachable!(), // FIXME: lazy me not wanting to do state machine right now
}
}
Ok(())
}
}
/// A write-write conflict happens when transaction T_current attempts to update a

View File

@@ -1,26 +1,59 @@
use super::*;
use crate::mvcc::clock::LocalClock;
fn test_db() -> MvStore<LocalClock> {
let clock = LocalClock::new();
let storage = crate::mvcc::persistent_storage::Storage::new_noop();
MvStore::new(clock, storage)
pub(crate) struct MvccTestDbNoConn {
pub(crate) db: Arc<Database>,
}
pub(crate) struct MvccTestDb {
pub(crate) mvcc_store: Arc<MvStore<LocalClock>>,
pub(crate) _db: Arc<Database>,
pub(crate) conn: Arc<Connection>,
}
impl MvccTestDb {
pub fn new() -> Self {
let io = Arc::new(MemoryIO::new());
let db = Database::open_file(io.clone(), ":memory:", true, true).unwrap();
let conn = db.connect().unwrap();
let mvcc_store = db.mv_store.as_ref().unwrap().clone();
Self {
mvcc_store,
_db: db,
conn,
}
}
}
impl MvccTestDbNoConn {
pub fn new() -> Self {
let io = Arc::new(MemoryIO::new());
let db = Database::open_file(io.clone(), ":memory:", true, true).unwrap();
Self { db }
}
}
pub(crate) fn generate_simple_string_row(table_id: u64, id: i64, data: &str) -> Row {
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(data))], 1);
Row {
id: RowID {
table_id,
row_id: id,
},
column_count: 1,
data: record.as_blob().to_vec(),
}
}
#[test]
fn test_insert_read() {
let db = test_db();
let db = MvccTestDb::new();
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -31,10 +64,11 @@ fn test_insert_read() {
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
db.commit_tx(tx1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db.begin_tx();
let tx2 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -49,9 +83,9 @@ fn test_insert_read() {
#[test]
fn test_read_nonexistent() {
let db = test_db();
let tx = db.begin_tx();
let row = db.read(
let db = MvccTestDb::new();
let tx = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row = db.mvcc_store.read(
tx,
RowID {
table_id: 1,
@@ -63,18 +97,13 @@ fn test_read_nonexistent() {
#[test]
fn test_delete() {
let db = test_db();
let db = MvccTestDb::new();
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -85,15 +114,18 @@ fn test_delete() {
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
db.delete(
tx1,
RowID {
table_id: 1,
row_id: 1,
},
)
.unwrap();
db.mvcc_store
.delete(
tx1,
RowID {
table_id: 1,
row_id: 1,
},
db.conn.pager.borrow().clone(),
)
.unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -103,10 +135,11 @@ fn test_delete() {
)
.unwrap();
assert!(row.is_none());
db.commit_tx(tx1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db.begin_tx();
let tx2 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -120,32 +153,29 @@ fn test_delete() {
#[test]
fn test_delete_nonexistent() {
let db = test_db();
let tx = db.begin_tx();
let db = MvccTestDb::new();
let tx = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
assert!(!db
.mvcc_store
.delete(
tx,
RowID {
table_id: 1,
row_id: 1
}
},
db.conn.pager.borrow().clone(),
)
.unwrap());
}
#[test]
fn test_commit() {
let db = test_db();
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let db = MvccTestDb::new();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -156,15 +186,12 @@ fn test_commit() {
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
let tx1_updated_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "World".to_string().into_bytes(),
};
db.update(tx1, tx1_updated_row.clone()).unwrap();
let tx1_updated_row = generate_simple_string_row(1, 1, "World");
db.mvcc_store
.update(tx1, tx1_updated_row.clone(), db.conn.pager.borrow().clone())
.unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -175,10 +202,11 @@ fn test_commit() {
.unwrap()
.unwrap();
assert_eq!(tx1_updated_row, row);
db.commit_tx(tx1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db.begin_tx();
let tx2 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -188,24 +216,19 @@ fn test_commit() {
)
.unwrap()
.unwrap();
db.commit_tx(tx2).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx2).unwrap();
assert_eq!(tx1_updated_row, row);
db.drop_unused_row_versions();
db.mvcc_store.drop_unused_row_versions();
}
#[test]
fn test_rollback() {
let db = test_db();
let tx1 = db.begin_tx();
let row1 = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, row1.clone()).unwrap();
let db = MvccTestDb::new();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row1 = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, row1.clone()).unwrap();
let row2 = db
.mvcc_store
.read(
tx1,
RowID {
@@ -216,15 +239,12 @@ fn test_rollback() {
.unwrap()
.unwrap();
assert_eq!(row1, row2);
let row3 = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "World".to_string().into_bytes(),
};
db.update(tx1, row3.clone()).unwrap();
let row3 = generate_simple_string_row(1, 1, "World");
db.mvcc_store
.update(tx1, row3.clone(), db.conn.pager.borrow().clone())
.unwrap();
let row4 = db
.mvcc_store
.read(
tx1,
RowID {
@@ -235,9 +255,11 @@ fn test_rollback() {
.unwrap()
.unwrap();
assert_eq!(row3, row4);
db.rollback_tx(tx1);
let tx2 = db.begin_tx();
db.mvcc_store
.rollback_tx(tx1, db.conn.pager.borrow().clone());
let tx2 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row5 = db
.mvcc_store
.read(
tx2,
RowID {
@@ -251,19 +273,14 @@ fn test_rollback() {
#[test]
fn test_dirty_write() {
let db = test_db();
let db = MvccTestDb::new();
// T1 inserts a row with ID 1, but does not commit.
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -275,18 +292,17 @@ fn test_dirty_write() {
.unwrap();
assert_eq!(tx1_row, row);
let conn2 = db._db.connect().unwrap();
// T2 attempts to delete row with ID 1, but fails because T1 has not committed.
let tx2 = db.begin_tx();
let tx2_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "World".to_string().into_bytes(),
};
assert!(!db.update(tx2, tx2_row).unwrap());
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let tx2_row = generate_simple_string_row(1, 1, "World");
assert!(!db
.mvcc_store
.update(tx2, tx2_row, conn2.pager.borrow().clone())
.unwrap());
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -301,22 +317,18 @@ fn test_dirty_write() {
#[test]
fn test_dirty_read() {
let db = test_db();
let db = MvccTestDb::new();
// T1 inserts a row with ID 1, but does not commit.
let tx1 = db.begin_tx();
let row1 = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, row1).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let row1 = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, row1).unwrap();
// T2 attempts to read row with ID 1, but doesn't see one because T1 has not committed.
let tx2 = db.begin_tx();
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let row2 = db
.mvcc_store
.read(
tx2,
RowID {
@@ -330,35 +342,34 @@ fn test_dirty_read() {
#[test]
fn test_dirty_read_deleted() {
let db = test_db();
let db = MvccTestDb::new();
// T1 inserts a row with ID 1 and commits.
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
db.commit_tx(tx1).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
// T2 deletes row with ID 1, but does not commit.
let tx2 = db.begin_tx();
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
assert!(db
.mvcc_store
.delete(
tx2,
RowID {
table_id: 1,
row_id: 1
}
},
conn2.pager.borrow().clone(),
)
.unwrap());
// T3 reads row with ID 1, but doesn't see the delete because T2 hasn't committed.
let tx3 = db.begin_tx();
let conn3 = db._db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx3,
RowID {
@@ -373,19 +384,14 @@ fn test_dirty_read_deleted() {
#[test]
fn test_fuzzy_read() {
let db = test_db();
let db = MvccTestDb::new();
// T1 inserts a row with ID 1 and commits.
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "First".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "First");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -396,11 +402,13 @@ fn test_fuzzy_read() {
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
db.commit_tx(tx1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
// T2 reads the row with ID 1 within an active transaction.
let tx2 = db.begin_tx();
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -413,19 +421,17 @@ fn test_fuzzy_read() {
assert_eq!(tx1_row, row);
// T3 updates the row and commits.
let tx3 = db.begin_tx();
let tx3_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Second".to_string().into_bytes(),
};
db.update(tx3, tx3_row).unwrap();
db.commit_tx(tx3).unwrap();
let conn3 = db._db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.borrow().clone());
let tx3_row = generate_simple_string_row(1, 1, "Second");
db.mvcc_store
.update(tx3, tx3_row, conn3.pager.borrow().clone())
.unwrap();
commit_tx(db.mvcc_store.clone(), &conn3, tx3).unwrap();
// T2 still reads the same version of the row as before.
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -439,32 +445,23 @@ fn test_fuzzy_read() {
// T2 tries to update the row, but fails because T3 has already committed an update to the row,
// so T2 trying to write would violate snapshot isolation if it succeeded.
let tx2_newrow = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Third".to_string().into_bytes(),
};
let update_result = db.update(tx2, tx2_newrow);
assert_eq!(Err(DatabaseError::WriteWriteConflict), update_result);
let tx2_newrow = generate_simple_string_row(1, 1, "Third");
let update_result = db
.mvcc_store
.update(tx2, tx2_newrow, conn2.pager.borrow().clone());
assert!(matches!(update_result, Err(LimboError::WriteWriteConflict)));
}
#[test]
fn test_lost_update() {
let db = test_db();
let db = MvccTestDb::new();
// T1 inserts a row with ID 1 and commits.
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -475,38 +472,37 @@ fn test_lost_update() {
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
db.commit_tx(tx1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
// T2 attempts to update row ID 1 within an active transaction.
let tx2 = db.begin_tx();
let tx2_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "World".to_string().into_bytes(),
};
assert!(db.update(tx2, tx2_row.clone()).unwrap());
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let tx2_row = generate_simple_string_row(1, 1, "World");
assert!(db
.mvcc_store
.update(tx2, tx2_row.clone(), conn2.pager.borrow().clone())
.unwrap());
// T3 also attempts to update row ID 1 within an active transaction.
let tx3 = db.begin_tx();
let tx3_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "Hello, world!".to_string().into_bytes(),
};
assert_eq!(
Err(DatabaseError::WriteWriteConflict),
db.update(tx3, tx3_row)
);
let conn3 = db._db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.borrow().clone());
let tx3_row = generate_simple_string_row(1, 1, "Hello, world!");
assert!(matches!(
db.mvcc_store
.update(tx3, tx3_row, conn3.pager.borrow().clone(),),
Err(LimboError::WriteWriteConflict)
));
db.commit_tx(tx2).unwrap();
assert_eq!(Err(DatabaseError::TxTerminated), db.commit_tx(tx3));
commit_tx(db.mvcc_store.clone(), &conn2, tx2).unwrap();
assert!(matches!(
commit_tx(db.mvcc_store.clone(), &conn3, tx3),
Err(LimboError::TxTerminated)
));
let tx4 = db.begin_tx();
let conn4 = db._db.connect().unwrap();
let tx4 = db.mvcc_store.begin_tx(conn4.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx4,
RowID {
@@ -523,31 +519,24 @@ fn test_lost_update() {
// This test checks for the typo present in the paper, explained in https://github.com/penberg/mvcc-rs/issues/15
#[test]
fn test_committed_visibility() {
let db = test_db();
let db = MvccTestDb::new();
// let's add $10 to my account since I like money
let tx1 = db.begin_tx();
let tx1_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "10".to_string().into_bytes(),
};
db.insert(tx1, tx1_row.clone()).unwrap();
db.commit_tx(tx1).unwrap();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx1_row = generate_simple_string_row(1, 1, "10");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
// but I like more money, so let me try adding $10 more
let tx2 = db.begin_tx();
let tx2_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "20".to_string().into_bytes(),
};
assert!(db.update(tx2, tx2_row.clone()).unwrap());
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let tx2_row = generate_simple_string_row(1, 1, "20");
assert!(db
.mvcc_store
.update(tx2, tx2_row.clone(), conn2.pager.borrow().clone())
.unwrap());
let row = db
.mvcc_store
.read(
tx2,
RowID {
@@ -560,8 +549,10 @@ fn test_committed_visibility() {
assert_eq!(row, tx2_row);
// can I check how much money I have?
let tx3 = db.begin_tx();
let conn3 = db._db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.borrow().clone());
let row = db
.mvcc_store
.read(
tx3,
RowID {
@@ -577,22 +568,18 @@ fn test_committed_visibility() {
// Test to check if a older transaction can see (un)committed future rows
#[test]
fn test_future_row() {
let db = test_db();
let db = MvccTestDb::new();
let tx1 = db.begin_tx();
let tx1 = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let tx2 = db.begin_tx();
let tx2_row = Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "10".to_string().into_bytes(),
};
db.insert(tx2, tx2_row).unwrap();
let conn2 = db._db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.borrow().clone());
let tx2_row = generate_simple_string_row(1, 1, "Hello");
db.mvcc_store.insert(tx2, tx2_row).unwrap();
// transaction in progress, so tx1 shouldn't be able to see the value
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -604,8 +591,9 @@ fn test_future_row() {
assert_eq!(row, None);
// lets commit the transaction and check if tx1 can see it
db.commit_tx(tx2).unwrap();
commit_tx(db.mvcc_store.clone(), &conn2, tx2).unwrap();
let row = db
.mvcc_store
.read(
tx1,
RowID {
@@ -617,92 +605,108 @@ fn test_future_row() {
assert_eq!(row, None);
}
use crate::mvcc::clock::LogicalClock;
use crate::mvcc::cursor::MvccLazyCursor;
use crate::mvcc::database::{MvStore, Row, RowID};
use crate::mvcc::persistent_storage::Storage;
use std::rc::Rc;
use std::sync::atomic::{AtomicU64, Ordering};
use crate::types::Text;
use crate::Database;
use crate::MemoryIO;
use crate::RefValue;
use crate::Value;
// Simple atomic clock implementation for testing
struct TestClock {
counter: AtomicU64,
}
impl TestClock {
fn new(start: u64) -> Self {
Self {
counter: AtomicU64::new(start),
}
}
}
impl LogicalClock for TestClock {
fn get_timestamp(&self) -> u64 {
self.counter.fetch_add(1, Ordering::SeqCst)
}
fn reset(&self, ts: u64) {
let current = self.counter.load(Ordering::SeqCst);
if ts > current {
self.counter.store(ts, Ordering::SeqCst);
}
}
}
fn setup_test_db() -> (Rc<MvStore<TestClock>>, u64) {
let clock = TestClock::new(1);
let storage = Storage::new_noop();
let db = Rc::new(MvStore::new(clock, storage));
let tx_id = db.begin_tx();
fn setup_test_db() -> (MvccTestDb, u64) {
let db = MvccTestDb::new();
let tx_id = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let table_id = 1;
let test_rows = [
(5, b"row5".to_vec()),
(10, b"row10".to_vec()),
(15, b"row15".to_vec()),
(20, b"row20".to_vec()),
(30, b"row30".to_vec()),
(5, "row5"),
(10, "row10"),
(15, "row15"),
(20, "row20"),
(30, "row30"),
];
for (row_id, data) in test_rows.iter() {
let id = RowID::new(table_id, *row_id);
let row = Row::new(id, data.clone());
db.insert(tx_id, row).unwrap();
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(data))], 1);
let row = Row::new(id, record.as_blob().to_vec(), 1);
db.mvcc_store.insert(tx_id, row).unwrap();
}
db.commit_tx(tx_id).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
let tx_id = db.begin_tx();
let tx_id = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
(db, tx_id)
}
fn setup_lazy_db(initial_keys: &[i64]) -> (Rc<MvStore<TestClock>>, u64) {
let clock = TestClock::new(1);
let storage = Storage::new_noop();
let db = Rc::new(MvStore::new(clock, storage));
let tx_id = db.begin_tx();
fn setup_lazy_db(initial_keys: &[i64]) -> (MvccTestDb, u64) {
let db = MvccTestDb::new();
let tx_id = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let table_id = 1;
for i in initial_keys {
let id = RowID::new(table_id, *i);
let data = format!("row{i}").into_bytes();
let row = Row::new(id, data);
db.insert(tx_id, row).unwrap();
let data = format!("row{i}");
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(&data))], 1);
let row = Row::new(id, record.as_blob().to_vec(), 1);
db.mvcc_store.insert(tx_id, row).unwrap();
}
db.commit_tx(tx_id).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
let tx_id = db.begin_tx();
let tx_id = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
(db, tx_id)
}
pub(crate) fn commit_tx(
mv_store: Arc<MvStore<LocalClock>>,
conn: &Arc<Connection>,
tx_id: u64,
) -> Result<()> {
let mut sm = mv_store
.commit_tx(tx_id, conn.pager.borrow().clone(), conn)
.unwrap();
let result = sm.step(&mv_store)?;
assert!(sm.is_finalized());
match result {
TransitionResult::Done(()) => Ok(()),
_ => unreachable!(),
}
}
pub(crate) fn commit_tx_no_conn(
db: &MvccTestDbNoConn,
tx_id: u64,
conn: &Arc<Connection>,
) -> Result<(), LimboError> {
let mut sm = db
.db
.get_mv_store()
.unwrap()
.commit_tx(tx_id, conn.pager.borrow().clone(), conn)
.unwrap();
let result = sm.step(db.db.mv_store.as_ref().unwrap())?;
assert!(sm.is_finalized());
match result {
TransitionResult::Done(()) => Ok(()),
_ => unreachable!(),
}
}
#[test]
fn test_lazy_scan_cursor_basic() {
let (db, tx_id) = setup_lazy_db(&[1, 2, 3, 4, 5]);
let table_id = 1;
let mut cursor = MvccLazyCursor::new(db.clone(), tx_id, table_id).unwrap();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
tx_id,
table_id,
db.conn.pager.borrow().clone(),
)
.unwrap();
// Check first row
assert!(cursor.forward());
@@ -731,7 +735,13 @@ fn test_lazy_scan_cursor_with_gaps() {
let (db, tx_id) = setup_test_db();
let table_id = 1;
let mut cursor = MvccLazyCursor::new(db.clone(), tx_id, table_id).unwrap();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
tx_id,
table_id,
db.conn.pager.borrow().clone(),
)
.unwrap();
// Check first row
assert!(cursor.forward());
@@ -761,7 +771,13 @@ fn test_cursor_basic() {
let (db, tx_id) = setup_lazy_db(&[1, 2, 3, 4, 5]);
let table_id = 1;
let mut cursor = MvccLazyCursor::new(db.clone(), tx_id, table_id).unwrap();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
tx_id,
table_id,
db.conn.pager.borrow().clone(),
)
.unwrap();
cursor.forward();
@@ -788,24 +804,40 @@ fn test_cursor_basic() {
#[test]
fn test_cursor_with_empty_table() {
let clock = TestClock::new(1);
let storage = Storage::new_noop();
let db = Rc::new(MvStore::new(clock, storage));
let tx_id = db.begin_tx();
let db = MvccTestDb::new();
{
// FIXME: force page 1 initialization
let pager = db.conn.pager.borrow().clone();
let tx_id = db.mvcc_store.begin_tx(pager.clone());
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
}
let tx_id = db.mvcc_store.begin_tx(db.conn.pager.borrow().clone());
let table_id = 1; // Empty table
// Test LazyScanCursor with empty table
let cursor = MvccLazyCursor::new(db.clone(), tx_id, table_id).unwrap();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
tx_id,
table_id,
db.conn.pager.borrow().clone(),
)
.unwrap();
assert!(cursor.is_empty());
assert!(cursor.current_row_id().is_none());
}
#[test]
fn test_cursor_modification_during_scan() {
let (db, tx_id) = setup_lazy_db(&[1, 2, 3, 4, 5]);
let (db, tx_id) = setup_lazy_db(&[1, 2, 4, 5]);
let table_id = 1;
let mut cursor = MvccLazyCursor::new(db.clone(), tx_id, table_id).unwrap();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
tx_id,
table_id,
db.conn.pager.borrow().clone(),
)
.unwrap();
// Read first row
assert!(cursor.forward());
@@ -814,21 +846,36 @@ fn test_cursor_modification_during_scan() {
// Insert a new row with ID between existing rows
let new_row_id = RowID::new(table_id, 3);
let new_row_data = b"new_row".to_vec();
let new_row = Row::new(new_row_id, new_row_data);
let new_row = generate_simple_string_row(table_id, new_row_id.row_id, "new_row");
cursor.insert(new_row).unwrap();
let row = cursor.current_row().unwrap().unwrap();
let row = db.mvcc_store.read(tx_id, new_row_id).unwrap().unwrap();
let mut record = ImmutableRecord::new(1024);
record.start_serialization(&row.data);
let value = record.get_value(0).unwrap();
match value {
RefValue::Text(text) => {
assert_eq!(text.as_str(), "new_row");
}
_ => panic!("Expected Text value"),
}
assert_eq!(row.id.row_id, 3);
assert_eq!(row.data, b"new_row".to_vec());
// Continue scanning - the cursor should still work correctly
cursor.forward(); // Move to 4
let row = cursor.current_row().unwrap().unwrap();
let row = db
.mvcc_store
.read(tx_id, RowID::new(table_id, 4))
.unwrap()
.unwrap();
assert_eq!(row.id.row_id, 4);
cursor.forward(); // Move to 5 (our new row)
let row = cursor.current_row().unwrap().unwrap();
let row = db
.mvcc_store
.read(tx_id, RowID::new(table_id, 5))
.unwrap()
.unwrap();
assert_eq!(row.id.row_id, 5);
assert!(!cursor.forward());
assert!(cursor.is_empty());
@@ -907,13 +954,7 @@ fn test_snapshot_isolation_tx_visible1() {
let row_version = RowVersion {
begin,
end,
row: Row {
id: RowID {
table_id: 1,
row_id: 1,
},
data: "testme".to_string().into_bytes(),
},
row: generate_simple_string_row(1, 1, "testme"),
};
tracing::debug!("Testing visibility of {row_version:?}");
row_version.is_visible_to(&current_tx, &txs)

View File

@@ -1,13 +0,0 @@
use thiserror::Error;
#[derive(Error, Debug, PartialEq)]
pub enum DatabaseError {
#[error("no such transaction ID: `{0}`")]
NoSuchTransactionID(u64),
#[error("transaction aborted because of a write-write conflict")]
WriteWriteConflict,
#[error("transaction is terminated")]
TxTerminated,
#[error("I/O error: {0}")]
Io(String),
}

View File

@@ -34,7 +34,6 @@
pub mod clock;
pub mod cursor;
pub mod database;
pub mod errors;
pub mod persistent_storage;
pub use clock::LocalClock;
@@ -42,8 +41,10 @@ pub use database::MvStore;
#[cfg(test)]
mod tests {
use crate::mvcc::clock::LocalClock;
use crate::mvcc::database::{MvStore, Row, RowID};
use crate::mvcc::database::tests::{
commit_tx_no_conn, generate_simple_string_row, MvccTestDbNoConn,
};
use crate::mvcc::database::RowID;
use std::sync::atomic::AtomicI64;
use std::sync::atomic::Ordering;
use std::sync::Arc;
@@ -51,55 +52,52 @@ mod tests {
static IDS: AtomicI64 = AtomicI64::new(1);
#[test]
#[ignore = "FIXME: This test fails because there is write busy lock yet to be fixed"]
fn test_non_overlapping_concurrent_inserts() {
// Two threads insert to the database concurrently using non-overlapping
// row IDs.
let clock = LocalClock::default();
let storage = crate::mvcc::persistent_storage::Storage::new_noop();
let db = Arc::new(MvStore::new(clock, storage));
let db = Arc::new(MvccTestDbNoConn::new());
let iterations = 100000;
let th1 = {
let db = db.clone();
std::thread::spawn(move || {
let conn = db.db.connect().unwrap();
let mvcc_store = db.db.mv_store.as_ref().unwrap().clone();
for _ in 0..iterations {
let tx = db.begin_tx();
let tx = mvcc_store.begin_tx(conn.pager.borrow().clone());
let id = IDS.fetch_add(1, Ordering::SeqCst);
let id = RowID {
table_id: 1,
row_id: id,
};
let row = Row {
id,
data: "Hello".to_string().into_bytes(),
};
db.insert(tx, row.clone()).unwrap();
db.commit_tx(tx).unwrap();
let tx = db.begin_tx();
let committed_row = db.read(tx, id).unwrap();
db.commit_tx(tx).unwrap();
let row = generate_simple_string_row(1, id.row_id, "Hello");
mvcc_store.insert(tx, row.clone()).unwrap();
commit_tx_no_conn(&db, tx, &conn).unwrap();
let tx = mvcc_store.begin_tx(conn.pager.borrow().clone());
let committed_row = mvcc_store.read(tx, id).unwrap();
commit_tx_no_conn(&db, tx, &conn).unwrap();
assert_eq!(committed_row, Some(row));
}
})
};
let th2 = {
std::thread::spawn(move || {
let conn = db.db.connect().unwrap();
let mvcc_store = db.db.mv_store.as_ref().unwrap().clone();
for _ in 0..iterations {
let tx = db.begin_tx();
let tx = mvcc_store.begin_tx(conn.pager.borrow().clone());
let id = IDS.fetch_add(1, Ordering::SeqCst);
let id = RowID {
table_id: 1,
row_id: id,
};
let row = Row {
id,
data: "World".to_string().into_bytes(),
};
db.insert(tx, row.clone()).unwrap();
db.commit_tx(tx).unwrap();
let tx = db.begin_tx();
let committed_row = db.read(tx, id).unwrap();
db.commit_tx(tx).unwrap();
let row = generate_simple_string_row(1, id.row_id, "World");
mvcc_store.insert(tx, row.clone()).unwrap();
commit_tx_no_conn(&db, tx, &conn).unwrap();
let tx = mvcc_store.begin_tx(conn.pager.borrow().clone());
let committed_row = mvcc_store.read(tx, id).unwrap();
commit_tx_no_conn(&db, tx, &conn).unwrap();
assert_eq!(committed_row, Some(row));
}
})
@@ -112,40 +110,38 @@ mod tests {
#[test]
#[ignore]
fn test_overlapping_concurrent_inserts_read_your_writes() {
let clock = LocalClock::default();
let storage = crate::mvcc::persistent_storage::Storage::new_noop();
let db = Arc::new(MvStore::new(clock, storage));
let db = Arc::new(MvccTestDbNoConn::new());
let iterations = 100000;
let work = |prefix: &'static str| {
let db = db.clone();
std::thread::spawn(move || {
let conn = db.db.connect().unwrap();
let mvcc_store = db.db.mv_store.as_ref().unwrap().clone();
let mut failed_upserts = 0;
for i in 0..iterations {
if i % 1000 == 0 {
tracing::debug!("{prefix}: {i}");
}
if i % 10000 == 0 {
let dropped = db.drop_unused_row_versions();
let dropped = mvcc_store.drop_unused_row_versions();
tracing::debug!("garbage collected {dropped} versions");
}
let tx = db.begin_tx();
let tx = mvcc_store.begin_tx(conn.pager.borrow().clone());
let id = i % 16;
let id = RowID {
table_id: 1,
row_id: id,
};
let row = Row {
id,
data: format!("{prefix} @{tx}").into_bytes(),
};
if let Err(e) = db.upsert(tx, row.clone()) {
let row = generate_simple_string_row(1, id.row_id, &format!("{prefix} @{tx}"));
if let Err(e) = mvcc_store.upsert(tx, row.clone(), conn.pager.borrow().clone())
{
tracing::trace!("upsert failed: {e}");
failed_upserts += 1;
continue;
}
let committed_row = db.read(tx, id).unwrap();
db.commit_tx(tx).unwrap();
let committed_row = mvcc_store.read(tx, id).unwrap();
commit_tx_no_conn(&db, tx, &conn).unwrap();
assert_eq!(committed_row, Some(row));
}
tracing::info!(

View File

@@ -1,7 +1,7 @@
use std::fmt::Debug;
use crate::mvcc::database::{LogRecord, Result};
use crate::mvcc::errors::DatabaseError;
use crate::mvcc::database::LogRecord;
use crate::{LimboError, Result};
#[derive(Debug)]
pub enum Storage {
@@ -24,7 +24,7 @@ impl Storage {
pub fn read_tx_log(&self) -> Result<Vec<LogRecord>> {
match self {
Self::Noop => Err(DatabaseError::Io(
Self::Noop => Err(LimboError::InternalError(
"cannot read from Noop storage".to_string(),
)),
}

81
core/state_machine.rs Normal file
View File

@@ -0,0 +1,81 @@
use crate::Result;
pub enum TransitionResult<Result> {
Io,
Continue,
Done(Result),
}
/// A generic trait for state machines.
pub trait StateTransition {
type State;
type Context;
type SMResult;
/// Transition the state machine to the next state.
///
/// Returns `TransitionResult::Io` if the state machine needs to perform an IO operation.
/// Returns `TransitionResult::Continue` if the state machine needs to continue.
/// Returns `TransitionResult::Done` if the state machine is done.
fn step(&mut self, context: &Self::Context) -> Result<TransitionResult<Self::SMResult>>;
/// Finalize the state machine.
///
/// This is called when the state machine is done.
fn finalize(&mut self, context: &Self::Context) -> Result<()>;
/// Check if the state machine is finalized.
fn is_finalized(&self) -> bool;
}
pub struct StateMachine<State: StateTransition> {
state: State,
is_finalized: bool,
}
/// A generic state machine that loops calling `transition` until it returns `TransitionResult::Done` or `TransitionResult::Io`.
impl<State: StateTransition> StateMachine<State> {
pub fn new(state: State) -> Self {
Self {
state,
is_finalized: false,
}
}
}
impl<State: StateTransition> StateTransition for StateMachine<State> {
type State = State;
type Context = State::Context;
type SMResult = State::SMResult;
fn step(&mut self, context: &Self::Context) -> Result<TransitionResult<Self::SMResult>> {
loop {
if self.is_finalized {
unreachable!("StateMachine::transition: state machine is finalized");
}
match self.state.step(context)? {
TransitionResult::Io => {
return Ok(TransitionResult::Io);
}
TransitionResult::Continue => {
continue;
}
TransitionResult::Done(result) => {
assert!(self.state.is_finalized());
self.is_finalized = true;
return Ok(TransitionResult::Done(result));
}
}
}
}
fn finalize(&mut self, context: &Self::Context) -> Result<()> {
self.state.finalize(context)?;
self.is_finalized = true;
Ok(())
}
fn is_finalized(&self) -> bool {
self.is_finalized
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -3,18 +3,18 @@ use parking_lot::Mutex;
use std::pin::Pin;
use std::sync::atomic::{AtomicUsize, Ordering};
use super::sqlite3_ondisk::PageSize;
pub struct BufferPool {
pub free_buffers: Mutex<Vec<BufferData>>,
page_size: AtomicUsize,
}
const DEFAULT_PAGE_SIZE: usize = 4096;
impl BufferPool {
pub fn new(page_size: Option<usize>) -> Self {
Self {
free_buffers: Mutex::new(Vec::new()),
page_size: AtomicUsize::new(page_size.unwrap_or(DEFAULT_PAGE_SIZE)),
page_size: AtomicUsize::new(page_size.unwrap_or(PageSize::DEFAULT as usize)),
}
}

View File

@@ -16,6 +16,13 @@ pub trait DatabaseStorage: Send + Sync {
buffer: Arc<RefCell<Buffer>>,
c: Completion,
) -> Result<Completion>;
fn write_pages(
&self,
first_page_idx: usize,
page_size: usize,
buffers: Vec<Arc<RefCell<Buffer>>>,
c: Completion,
) -> Result<Completion>;
fn sync(&self, c: Completion) -> Result<Completion>;
fn size(&self) -> Result<u64>;
fn truncate(&self, len: usize, c: Completion) -> Result<Completion>;
@@ -61,6 +68,22 @@ impl DatabaseStorage for DatabaseFile {
self.file.pwrite(pos, buffer, c)
}
fn write_pages(
&self,
page_idx: usize,
page_size: usize,
buffers: Vec<Arc<RefCell<Buffer>>>,
c: Completion,
) -> Result<Completion> {
assert!(page_idx > 0);
assert!(page_size >= 512);
assert!(page_size <= 65536);
assert_eq!(page_size & (page_size - 1), 0);
let pos = (page_idx - 1) * page_size;
let c = self.file.pwritev(pos, buffers, c)?;
Ok(c)
}
#[instrument(skip_all, level = Level::DEBUG)]
fn sync(&self, c: Completion) -> Result<Completion> {
self.file.sync(c)
@@ -84,61 +107,3 @@ impl DatabaseFile {
Self { file }
}
}
pub struct FileMemoryStorage {
file: Arc<dyn crate::io::File>,
}
unsafe impl Send for FileMemoryStorage {}
unsafe impl Sync for FileMemoryStorage {}
impl DatabaseStorage for FileMemoryStorage {
#[instrument(skip_all, level = Level::DEBUG)]
fn read_page(&self, page_idx: usize, c: Completion) -> Result<Completion> {
let r = c.as_read();
let size = r.buf().len();
assert!(page_idx > 0);
if !(512..=65536).contains(&size) || size & (size - 1) != 0 {
return Err(LimboError::NotADB);
}
let pos = (page_idx - 1) * size;
self.file.pread(pos, c)
}
#[instrument(skip_all, level = Level::DEBUG)]
fn write_page(
&self,
page_idx: usize,
buffer: Arc<RefCell<Buffer>>,
c: Completion,
) -> Result<Completion> {
let buffer_size = buffer.borrow().len();
assert!(buffer_size >= 512);
assert!(buffer_size <= 65536);
assert_eq!(buffer_size & (buffer_size - 1), 0);
let pos = (page_idx - 1) * buffer_size;
self.file.pwrite(pos, buffer, c)
}
#[instrument(skip_all, level = Level::DEBUG)]
fn sync(&self, c: Completion) -> Result<Completion> {
self.file.sync(c)
}
#[instrument(skip_all, level = Level::DEBUG)]
fn size(&self) -> Result<u64> {
self.file.size()
}
#[instrument(skip_all, level = Level::INFO)]
fn truncate(&self, len: usize, c: Completion) -> Result<Completion> {
let c = self.file.truncate(len, c)?;
Ok(c)
}
}
impl FileMemoryStorage {
pub fn new(file: Arc<dyn crate::io::File>) -> Self {
Self { file }
}
}

View File

@@ -1,230 +1,75 @@
use crate::storage::sqlite3_ondisk::MAX_PAGE_SIZE;
use super::sqlite3_ondisk::{DatabaseHeader, PageContent};
use crate::turso_assert;
use crate::{
storage::{
self,
pager::{PageRef, Pager},
sqlite3_ondisk::DATABASE_HEADER_PAGE_ID,
},
storage::pager::{PageRef, Pager},
types::IOResult,
LimboError, Result,
};
use std::cell::{Ref, RefMut};
// const HEADER_OFFSET_MAGIC: usize = 0;
const HEADER_OFFSET_PAGE_SIZE: usize = 16;
const HEADER_OFFSET_WRITE_VERSION: usize = 18;
const HEADER_OFFSET_READ_VERSION: usize = 19;
const HEADER_OFFSET_RESERVED_SPACE: usize = 20;
const HEADER_OFFSET_MAX_EMBED_FRAC: usize = 21;
const HEADER_OFFSET_MIN_EMBED_FRAC: usize = 22;
const HEADER_OFFSET_MIN_LEAF_FRAC: usize = 23;
const HEADER_OFFSET_CHANGE_COUNTER: usize = 24;
const HEADER_OFFSET_DATABASE_SIZE: usize = 28;
const HEADER_OFFSET_FREELIST_TRUNK_PAGE: usize = 32;
const HEADER_OFFSET_FREELIST_PAGES: usize = 36;
const HEADER_OFFSET_SCHEMA_COOKIE: usize = 40;
const HEADER_OFFSET_SCHEMA_FORMAT: usize = 44;
const HEADER_OFFSET_DEFAULT_PAGE_CACHE_SIZE: usize = 48;
const HEADER_OFFSET_VACUUM_MODE_LARGEST_ROOT_PAGE: usize = 52;
const HEADER_OFFSET_TEXT_ENCODING: usize = 56;
const HEADER_OFFSET_USER_VERSION: usize = 60;
const HEADER_OFFSET_INCREMENTAL_VACUUM_ENABLED: usize = 64;
const HEADER_OFFSET_APPLICATION_ID: usize = 68;
//const HEADER_OFFSET_RESERVED_FOR_EXPANSION: usize = 72;
const HEADER_OFFSET_VERSION_VALID_FOR: usize = 92;
const HEADER_OFFSET_VERSION_NUMBER: usize = 96;
pub struct HeaderRef(PageRef);
// Helper to get a read-only reference to the header page.
fn get_header_page(pager: &Pager) -> Result<IOResult<PageRef>> {
if !pager.db_state.is_initialized() {
return Err(LimboError::InternalError(
"Database is empty, header does not exist - page 1 should've been allocated before this".to_string(),
));
}
let (page, c) = pager.read_page(DATABASE_HEADER_PAGE_ID)?;
if page.is_locked() {
return Ok(IOResult::IO);
}
Ok(IOResult::Done(page))
}
// Helper to get a writable reference to the header page and mark it dirty.
fn get_header_page_for_write(pager: &Pager) -> Result<IOResult<PageRef>> {
if !pager.db_state.is_initialized() {
// This should not be called on an empty DB for writing, as page 1 is allocated on first transaction.
return Err(LimboError::InternalError(
"Cannot write to header of an empty database - page 1 should've been allocated before this".to_string(),
));
}
let (page, c) = pager.read_page(DATABASE_HEADER_PAGE_ID)?;
if page.is_locked() {
return Ok(IOResult::IO);
}
turso_assert!(
page.get().id == DATABASE_HEADER_PAGE_ID,
"page must have number 1"
);
pager.add_dirty(&page);
Ok(IOResult::Done(page))
}
/// Helper function to run async header accessors until completion
fn run_header_accessor_until_done<T, F>(pager: &Pager, mut accessor: F) -> Result<T>
where
F: FnMut() -> Result<IOResult<T>>,
{
loop {
match accessor()? {
IOResult::Done(value) => return Ok(value),
IOResult::IO => {
pager.io.run_once()?;
}
impl HeaderRef {
pub fn from_pager(pager: &Pager) -> Result<IOResult<Self>> {
if !pager.db_state.is_initialized() {
return Err(LimboError::InternalError(
"Database is empty, header does not exist - page 1 should've been allocated before this".to_string()
));
}
}
}
/// Helper macro to implement getters and setters for header fields.
/// For example, `impl_header_field_accessor!(page_size, u16, HEADER_OFFSET_PAGE_SIZE);`
/// will generate the following functions:
/// - `pub fn get_page_size(pager: &Pager) -> Result<u16>` (sync)
/// - `pub fn get_page_size_async(pager: &Pager) -> Result<CursorResult<u16>>` (async)
/// - `pub fn set_page_size(pager: &Pager, value: u16) -> Result<()>` (sync)
/// - `pub fn set_page_size_async(pager: &Pager, value: u16) -> Result<CursorResult<()>>` (async)
///
/// The macro takes three required arguments:
/// - `$field_name`: The name of the field to implement.
/// - `$type`: The type of the field.
/// - `$offset`: The offset of the field in the header page.
///
/// And a fourth optional argument:
/// - `$ifzero`: A value to return if the field is 0.
///
/// The macro will generate both sync and async versions of the functions.
///
macro_rules! impl_header_field_accessor {
($field_name:ident, $type:ty, $offset:expr $(, $ifzero:expr)?) => {
paste::paste! {
// Async version
#[allow(dead_code)]
pub fn [<get_ $field_name _async>](pager: &Pager) -> Result<IOResult<$type>> {
if !pager.db_state.is_initialized() {
return Err(LimboError::InternalError(format!("Database is empty, header does not exist - page 1 should've been allocated before this")));
}
let page = match get_header_page(pager)? {
IOResult::Done(page) => page,
IOResult::IO => return Ok(IOResult::IO),
};
let page_inner = page.get();
let page_content = page_inner.contents.as_ref().unwrap();
let buf = page_content.buffer.borrow();
let buf_slice = buf.as_slice();
let mut bytes = [0; std::mem::size_of::<$type>()];
bytes.copy_from_slice(&buf_slice[$offset..$offset + std::mem::size_of::<$type>()]);
let value = <$type>::from_be_bytes(bytes);
$(
if value == 0 {
return Ok(IOResult::Done($ifzero));
}
)?
Ok(IOResult::Done(value))
}
// Sync version
#[allow(dead_code)]
pub fn [<get_ $field_name>](pager: &Pager) -> Result<$type> {
run_header_accessor_until_done(pager, || [<get_ $field_name _async>](pager))
}
// Async setter
#[allow(dead_code)]
pub fn [<set_ $field_name _async>](pager: &Pager, value: $type) -> Result<IOResult<()>> {
let page = match get_header_page_for_write(pager)? {
IOResult::Done(page) => page,
IOResult::IO => return Ok(IOResult::IO),
};
let page_inner = page.get();
let page_content = page_inner.contents.as_ref().unwrap();
let mut buf = page_content.buffer.borrow_mut();
let buf_slice = buf.as_mut_slice();
buf_slice[$offset..$offset + std::mem::size_of::<$type>()].copy_from_slice(&value.to_be_bytes());
turso_assert!(page.get().id == 1, "page must have number 1");
pager.add_dirty(&page);
Ok(IOResult::Done(()))
}
// Sync setter
#[allow(dead_code)]
pub fn [<set_ $field_name>](pager: &Pager, value: $type) -> Result<()> {
run_header_accessor_until_done(pager, || [<set_ $field_name _async>](pager, value))
}
let (page, _c) = pager.read_page(DatabaseHeader::PAGE_ID)?;
if page.is_locked() {
return Ok(IOResult::IO);
}
};
}
// impl_header_field_accessor!(magic, [u8; 16], HEADER_OFFSET_MAGIC);
impl_header_field_accessor!(page_size_u16, u16, HEADER_OFFSET_PAGE_SIZE);
impl_header_field_accessor!(write_version, u8, HEADER_OFFSET_WRITE_VERSION);
impl_header_field_accessor!(read_version, u8, HEADER_OFFSET_READ_VERSION);
impl_header_field_accessor!(reserved_space, u8, HEADER_OFFSET_RESERVED_SPACE);
impl_header_field_accessor!(max_embed_frac, u8, HEADER_OFFSET_MAX_EMBED_FRAC);
impl_header_field_accessor!(min_embed_frac, u8, HEADER_OFFSET_MIN_EMBED_FRAC);
impl_header_field_accessor!(min_leaf_frac, u8, HEADER_OFFSET_MIN_LEAF_FRAC);
impl_header_field_accessor!(change_counter, u32, HEADER_OFFSET_CHANGE_COUNTER);
impl_header_field_accessor!(database_size, u32, HEADER_OFFSET_DATABASE_SIZE);
impl_header_field_accessor!(freelist_trunk_page, u32, HEADER_OFFSET_FREELIST_TRUNK_PAGE);
impl_header_field_accessor!(freelist_pages, u32, HEADER_OFFSET_FREELIST_PAGES);
impl_header_field_accessor!(schema_cookie, u32, HEADER_OFFSET_SCHEMA_COOKIE);
impl_header_field_accessor!(schema_format, u32, HEADER_OFFSET_SCHEMA_FORMAT);
impl_header_field_accessor!(
default_page_cache_size,
i32,
HEADER_OFFSET_DEFAULT_PAGE_CACHE_SIZE,
storage::sqlite3_ondisk::DEFAULT_CACHE_SIZE
);
impl_header_field_accessor!(
vacuum_mode_largest_root_page,
u32,
HEADER_OFFSET_VACUUM_MODE_LARGEST_ROOT_PAGE
);
impl_header_field_accessor!(text_encoding, u32, HEADER_OFFSET_TEXT_ENCODING);
impl_header_field_accessor!(user_version, i32, HEADER_OFFSET_USER_VERSION);
impl_header_field_accessor!(
incremental_vacuum_enabled,
u32,
HEADER_OFFSET_INCREMENTAL_VACUUM_ENABLED
);
impl_header_field_accessor!(application_id, i32, HEADER_OFFSET_APPLICATION_ID);
//impl_header_field_accessor!(reserved_for_expansion, [u8; 20], HEADER_OFFSET_RESERVED_FOR_EXPANSION);
impl_header_field_accessor!(version_valid_for, u32, HEADER_OFFSET_VERSION_VALID_FOR);
impl_header_field_accessor!(version_number, u32, HEADER_OFFSET_VERSION_NUMBER);
turso_assert!(
page.get().id == DatabaseHeader::PAGE_ID,
"incorrect header page id"
);
pub fn get_page_size(pager: &Pager) -> Result<u32> {
let size = get_page_size_u16(pager)?;
if size == 1 {
return Ok(MAX_PAGE_SIZE);
Ok(IOResult::Done(Self(page)))
}
pub fn borrow(&self) -> Ref<'_, DatabaseHeader> {
// TODO: Instead of erasing mutability, implement `get_mut_contents` and return a shared reference.
let content: &PageContent = self.0.get_contents();
Ref::map(content.buffer.borrow(), |buffer| {
bytemuck::from_bytes::<DatabaseHeader>(&buffer.as_slice()[0..DatabaseHeader::SIZE])
})
}
Ok(size as u32)
}
#[allow(dead_code)]
pub fn set_page_size(pager: &Pager, value: u32) -> Result<()> {
let page_size = if value == MAX_PAGE_SIZE {
1
} else {
value as u16
};
set_page_size_u16(pager, page_size)
}
pub struct HeaderRefMut(PageRef);
#[allow(dead_code)]
pub fn get_page_size_async(pager: &Pager) -> Result<IOResult<u32>> {
match get_page_size_u16_async(pager)? {
IOResult::Done(size) => {
if size == 1 {
return Ok(IOResult::Done(MAX_PAGE_SIZE));
}
Ok(IOResult::Done(size as u32))
impl HeaderRefMut {
pub fn from_pager(pager: &Pager) -> Result<IOResult<Self>> {
if !pager.db_state.is_initialized() {
return Err(LimboError::InternalError(
"Database is empty, header does not exist - page 1 should've been allocated before this".to_string(),
));
}
IOResult::IO => Ok(IOResult::IO),
let (page, _c) = pager.read_page(DatabaseHeader::PAGE_ID)?;
if page.is_locked() {
return Ok(IOResult::IO);
}
turso_assert!(
page.get().id == DatabaseHeader::PAGE_ID,
"incorrect header page id"
);
pager.add_dirty(&page);
Ok(IOResult::Done(Self(page)))
}
pub fn borrow_mut(&self) -> RefMut<'_, DatabaseHeader> {
let content = self.0.get_contents();
RefMut::map(content.buffer.borrow_mut(), |buffer| {
bytemuck::from_bytes_mut::<DatabaseHeader>(
&mut buffer.as_mut_slice()[0..DatabaseHeader::SIZE],
)
})
}
}

View File

@@ -18,6 +18,7 @@ pub(crate) mod page_cache;
#[allow(clippy::arc_with_non_send_sync)]
pub(crate) mod pager;
pub(crate) mod sqlite3_ondisk;
mod state_machines;
#[allow(clippy::arc_with_non_send_sync)]
pub(crate) mod wal;

View File

@@ -2,9 +2,8 @@ use crate::result::LimboResult;
use crate::storage::btree::BTreePageInner;
use crate::storage::buffer_pool::BufferPool;
use crate::storage::database::DatabaseStorage;
use crate::storage::header_accessor;
use crate::storage::sqlite3_ondisk::{
self, parse_wal_frame_header, DatabaseHeader, PageContent, PageType, DEFAULT_PAGE_SIZE,
self, parse_wal_frame_header, DatabaseHeader, PageContent, PageSize, PageType,
};
use crate::storage::wal::{CheckpointResult, Wal};
use crate::types::{IOResult, WalInsertInfo};
@@ -21,8 +20,9 @@ use std::sync::{Arc, Mutex};
use tracing::{instrument, trace, Level};
use super::btree::{btree_init_page, BTreePage};
use super::header_accessor::{HeaderRef, HeaderRefMut};
use super::page_cache::{CacheError, CacheResizeResult, DumbLruPageCache, PageCacheKey};
use super::sqlite3_ondisk::{begin_write_btree_page, DATABASE_HEADER_SIZE};
use super::sqlite3_ondisk::begin_write_btree_page;
use super::wal::CheckpointMode;
#[cfg(not(feature = "omit_autovacuum"))]
@@ -318,7 +318,8 @@ pub struct Pager {
/// Source of the database pages.
pub db_file: Arc<dyn DatabaseStorage>,
/// The write-ahead log (WAL) for the database.
pub(crate) wal: Rc<RefCell<dyn Wal>>,
/// in-memory databases, ephemeral tables and ephemeral indexes do not have a WAL.
pub(crate) wal: Option<Rc<RefCell<dyn Wal>>>,
/// A page cache for the database.
page_cache: Arc<RwLock<DumbLruPageCache>>,
/// Buffer pool for temporary data storage.
@@ -346,7 +347,7 @@ pub struct Pager {
/// Cache page_size and reserved_space at Pager init and reuse for subsequent
/// `usable_space` calls. TODO: Invalidate reserved_space when we add the functionality
/// to change it.
page_size: Cell<Option<u32>>,
pub(crate) page_size: Cell<Option<u32>>,
reserved_space: OnceCell<u8>,
free_page_state: RefCell<FreePageState>,
}
@@ -410,7 +411,7 @@ enum FreePageState {
impl Pager {
pub fn new(
db_file: Arc<dyn DatabaseStorage>,
wal: Rc<RefCell<dyn Wal>>,
wal: Option<Rc<RefCell<dyn Wal>>>,
io: Arc<dyn crate::io::IO>,
page_cache: Arc<RwLock<DumbLruPageCache>>,
buffer_pool: Arc<BufferPool>,
@@ -456,7 +457,7 @@ impl Pager {
}
pub fn set_wal(&mut self, wal: Rc<RefCell<dyn Wal>>) {
self.wal = wal;
self.wal = Some(wal);
}
pub fn get_auto_vacuum_mode(&self) -> AutoVacuumMode {
@@ -473,10 +474,8 @@ impl Pager {
#[cfg(not(feature = "omit_autovacuum"))]
pub fn ptrmap_get(&self, target_page_num: u32) -> Result<IOResult<Option<PtrmapEntry>>> {
tracing::trace!("ptrmap_get(page_idx = {})", target_page_num);
let configured_page_size = match header_accessor::get_page_size_async(self)? {
IOResult::Done(size) => size as usize,
IOResult::IO => return Ok(IOResult::IO),
};
let configured_page_size =
return_if_io!(self.with_header(|header| header.page_size)).get() as usize;
if target_page_num < FIRST_PTRMAP_PAGE_NO
|| is_ptrmap_page(target_page_num, configured_page_size)
@@ -493,7 +492,7 @@ impl Pager {
ptrmap_pg_no
);
let (ptrmap_page, c) = self.read_page(ptrmap_pg_no as usize)?;
let (ptrmap_page, _c) = self.read_page(ptrmap_pg_no as usize)?;
if ptrmap_page.is_locked() {
return Ok(IOResult::IO);
}
@@ -559,10 +558,7 @@ impl Pager {
parent_page_no
);
let page_size = match header_accessor::get_page_size_async(self)? {
IOResult::Done(size) => size as usize,
IOResult::IO => return Ok(IOResult::IO),
};
let page_size = return_if_io!(self.with_header(|header| header.page_size)).get() as usize;
if db_page_no_to_update < FIRST_PTRMAP_PAGE_NO
|| is_ptrmap_page(db_page_no_to_update, page_size)
@@ -584,7 +580,7 @@ impl Pager {
offset_in_ptrmap_page
);
let (ptrmap_page, c) = self.read_page(ptrmap_pg_no as usize)?;
let (ptrmap_page, _c) = self.read_page(ptrmap_pg_no as usize)?;
if ptrmap_page.is_locked() {
return Ok(IOResult::IO);
}
@@ -658,21 +654,19 @@ impl Pager {
Ok(IOResult::Done(page.get().get().id as u32))
}
AutoVacuumMode::Full => {
let mut root_page_num =
match header_accessor::get_vacuum_mode_largest_root_page_async(self)? {
IOResult::Done(value) => value,
IOResult::IO => return Ok(IOResult::IO),
};
let (mut root_page_num, page_size) =
return_if_io!(self.with_header(|header| {
(
header.vacuum_mode_largest_root_page.get(),
header.page_size.get(),
)
}));
assert!(root_page_num > 0); // Largest root page number cannot be 0 because that is set to 1 when creating the database with autovacuum enabled
root_page_num += 1;
assert!(root_page_num >= FIRST_PTRMAP_PAGE_NO); // can never be less than 2 because we have already incremented
let page_size = match header_accessor::get_page_size_async(self)? {
IOResult::Done(size) => size as usize,
IOResult::IO => return Ok(IOResult::IO),
};
while is_ptrmap_page(root_page_num, page_size) {
while is_ptrmap_page(root_page_num, page_size as usize) {
root_page_num += 1;
}
assert!(root_page_num >= 3); // the very first root page is page 3
@@ -745,14 +739,18 @@ impl Pager {
/// The usable size of a page might be an odd number. However, the usable size is not allowed to be less than 480.
/// In other words, if the page size is 512, then the reserved space size cannot exceed 32.
pub fn usable_space(&self) -> usize {
let page_size = *self
.page_size
.get()
.get_or_insert_with(|| header_accessor::get_page_size(self).unwrap());
let page_size = *self.page_size.get().get_or_insert_with(|| {
self.io
.block(|| self.with_header(|header| header.page_size))
.unwrap_or_default()
.get()
});
let reserved_space = *self
.reserved_space
.get_or_init(|| header_accessor::get_reserved_space(self).unwrap());
let reserved_space = *self.reserved_space.get_or_init(|| {
self.io
.block(|| self.with_header(|header| header.reserved_space))
.unwrap_or_default()
});
(page_size as usize) - (reserved_space as usize)
}
@@ -766,7 +764,10 @@ impl Pager {
#[inline(always)]
#[instrument(skip_all, level = Level::DEBUG)]
pub fn begin_read_tx(&self) -> Result<LimboResult> {
let (result, changed) = self.wal.borrow_mut().begin_read_tx()?;
let Some(wal) = self.wal.as_ref() else {
return Ok(LimboResult::Ok);
};
let (result, changed) = wal.borrow_mut().begin_read_tx()?;
if changed {
// Someone else changed the database -> assume our page cache is invalid (this is default SQLite behavior, we can probably do better with more granular invalidation)
self.clear_page_cache();
@@ -805,7 +806,10 @@ impl Pager {
IOResult::Done(_) => {}
IOResult::IO => return Ok(IOResult::IO),
}
Ok(IOResult::Done(self.wal.borrow_mut().begin_write_tx()?))
let Some(wal) = self.wal.as_ref() else {
return Ok(IOResult::Done(LimboResult::Ok));
};
Ok(IOResult::Done(wal.borrow_mut().begin_write_tx()?))
}
#[instrument(skip_all, level = Level::DEBUG)]
@@ -817,23 +821,28 @@ impl Pager {
wal_checkpoint_disabled: bool,
) -> Result<IOResult<PagerCommitResult>> {
tracing::trace!("end_tx(rollback={})", rollback);
let Some(wal) = self.wal.as_ref() else {
// TODO: Unsure what the semantics of "end_tx" is for in-memory databases, ephemeral tables and ephemeral indexes.
return Ok(IOResult::Done(PagerCommitResult::Rollback));
};
if rollback {
if matches!(
let is_write = matches!(
connection.transaction_state.get(),
TransactionState::Write { .. }
) {
self.wal.borrow().end_write_tx();
);
if is_write {
wal.borrow().end_write_tx();
}
self.wal.borrow().end_read_tx();
self.rollback(schema_did_change, connection)?;
wal.borrow().end_read_tx();
self.rollback(schema_did_change, connection, is_write)?;
return Ok(IOResult::Done(PagerCommitResult::Rollback));
}
let commit_status = self.commit_dirty_pages(wal_checkpoint_disabled)?;
match commit_status {
IOResult::IO => Ok(IOResult::IO),
IOResult::Done(_) => {
self.wal.borrow().end_write_tx();
self.wal.borrow().end_read_tx();
wal.borrow().end_write_tx();
wal.borrow().end_read_tx();
if schema_did_change {
let schema = connection.schema.borrow().clone();
@@ -846,7 +855,10 @@ impl Pager {
#[instrument(skip_all, level = Level::DEBUG)]
pub fn end_read_tx(&self) -> Result<()> {
self.wal.borrow().end_read_tx();
let Some(wal) = self.wal.as_ref() else {
return Ok(());
};
wal.borrow().end_read_tx();
Ok(())
}
@@ -864,35 +876,46 @@ impl Pager {
let page = Arc::new(Page::new(page_idx));
page.set_locked();
if let Some(frame_id) = self.wal.borrow().find_frame(page_idx as u64)? {
let c =
self.wal
.borrow()
.read_frame(frame_id, page.clone(), self.buffer_pool.clone())?;
page.set_uptodate();
let Some(wal) = self.wal.as_ref() else {
let c = self.begin_read_disk_page(page_idx, page.clone())?;
self.cache_insert(page_idx, page.clone(), &mut page_cache)?;
return Ok((page, c));
};
if let Some(frame_id) = wal.borrow().find_frame(page_idx as u64)? {
let c = wal
.borrow()
.read_frame(frame_id, page.clone(), self.buffer_pool.clone())?;
{
page.set_uptodate();
}
// TODO(pere) should probably first insert to page cache, and if successful,
// read frame or page
match page_cache.insert(page_key, page.clone()) {
Ok(_) => {}
Err(CacheError::Full) => return Err(LimboError::CacheFull),
Err(CacheError::KeyExists) => {
unreachable!("Page should not exist in cache after get() miss")
}
Err(e) => {
return Err(LimboError::InternalError(format!(
"Failed to insert page into cache: {e:?}"
)))
}
}
self.cache_insert(page_idx, page.clone(), &mut page_cache)?;
return Ok((page, c));
}
let c = sqlite3_ondisk::begin_read_page(
let c = self.begin_read_disk_page(page_idx, page.clone())?;
self.cache_insert(page_idx, page.clone(), &mut page_cache)?;
Ok((page, c))
}
fn begin_read_disk_page(&self, page_idx: usize, page: PageRef) -> Result<Completion> {
sqlite3_ondisk::begin_read_page(
self.db_file.clone(),
self.buffer_pool.clone(),
page.clone(),
page,
page_idx,
)?;
)
}
fn cache_insert(
&self,
page_idx: usize,
page: PageRef,
page_cache: &mut DumbLruPageCache,
) -> Result<()> {
let page_key = PageCacheKey::new(page_idx);
match page_cache.insert(page_key, page.clone()) {
Ok(_) => {}
Err(CacheError::Full) => return Err(LimboError::CacheFull),
@@ -905,7 +928,7 @@ impl Pager {
)))
}
}
Ok((page, c))
Ok(())
}
// Get a page from the cache, if it exists.
@@ -930,13 +953,25 @@ impl Pager {
}
pub fn wal_frame_count(&self) -> Result<u64> {
Ok(self.wal.borrow().get_max_frame_in_wal())
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"wal_frame_count() called on database without WAL".to_string(),
));
};
Ok(wal.borrow().get_max_frame_in_wal())
}
/// Flush all dirty pages to disk.
/// Unlike commit_dirty_pages, this function does not commit, checkpoint now sync the WAL/Database.
#[instrument(skip_all, level = Level::INFO)]
pub fn cacheflush(&self) -> Result<IOResult<()>> {
let Some(wal) = self.wal.as_ref() else {
// TODO: when ephemeral table spills to disk, it should cacheflush pages directly to the temporary database file.
// This handling is not yet implemented, but it should be when spilling is implemented.
return Err(LimboError::InternalError(
"cacheflush() called on database without WAL".to_string(),
));
};
let state = self.flush_info.borrow().state;
trace!(?state);
match state {
@@ -975,7 +1010,7 @@ impl Pager {
page
};
let c = self.wal.borrow_mut().append_frame(
let _c = wal.borrow_mut().append_frame(
page.clone(),
0,
self.flush_info.borrow().in_flight_writes.clone(),
@@ -1034,6 +1069,11 @@ impl Pager {
&self,
wal_checkpoint_disabled: bool,
) -> Result<IOResult<PagerCommitResult>> {
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"commit_dirty_pages() called on database without WAL".to_string(),
));
};
let mut checkpoint_result = CheckpointResult::default();
let res = loop {
let state = self.commit_info.borrow().state;
@@ -1080,14 +1120,17 @@ impl Pager {
};
let db_size = {
let db_size = header_accessor::get_database_size(self)?;
let db_size = self
.io
.block(|| self.with_header(|header| header.database_size))?
.get();
if is_last_frame {
db_size
} else {
0
}
};
let c = self.wal.borrow_mut().append_frame(
let _c = wal.borrow_mut().append_frame(
page.clone(),
db_size,
self.commit_info.borrow().in_flight_writes.clone(),
@@ -1141,9 +1184,9 @@ impl Pager {
}
}
CommitState::SyncWal => {
return_if_io!(self.wal.borrow_mut().sync());
return_if_io!(wal.borrow_mut().sync());
if wal_checkpoint_disabled || !self.wal.borrow().should_checkpoint() {
if wal_checkpoint_disabled || !wal.borrow().should_checkpoint() {
self.commit_info.borrow_mut().state = CommitState::Start;
break PagerCommitResult::WalWritten;
}
@@ -1154,7 +1197,8 @@ impl Pager {
self.commit_info.borrow_mut().state = CommitState::SyncDbFile;
}
CommitState::SyncDbFile => {
let c = sqlite3_ondisk::begin_sync(self.db_file.clone(), self.syncing.clone())?;
let _c =
sqlite3_ondisk::begin_sync(self.db_file.clone(), self.syncing.clone())?;
self.commit_info.borrow_mut().state = CommitState::WaitSyncDbFile;
}
CommitState::WaitSyncDbFile => {
@@ -1168,19 +1212,29 @@ impl Pager {
}
};
// We should only signal that we finished appenind frames after wal sync to avoid inconsistencies when sync fails
self.wal.borrow_mut().finish_append_frames_commit()?;
wal.borrow_mut().finish_append_frames_commit()?;
Ok(IOResult::Done(res))
}
#[instrument(skip_all, level = Level::DEBUG)]
pub fn wal_get_frame(&self, frame_no: u32, frame: &mut [u8]) -> Result<Completion> {
let wal = self.wal.borrow();
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"wal_get_frame() called on database without WAL".to_string(),
));
};
let wal = wal.borrow();
wal.read_frame_raw(frame_no.into(), frame)
}
#[instrument(skip_all, level = Level::DEBUG)]
pub fn wal_insert_frame(&self, frame_no: u32, frame: &[u8]) -> Result<WalInsertInfo> {
let mut wal = self.wal.borrow_mut();
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"wal_insert_frame() called on database without WAL".to_string(),
));
};
let mut wal = wal.borrow_mut();
let (header, raw_page) = parse_wal_frame_header(frame);
wal.write_frame_raw(
self.buffer_pool.clone(),
@@ -1215,6 +1269,11 @@ impl Pager {
#[instrument(skip_all, level = Level::DEBUG, name = "pager_checkpoint",)]
pub fn checkpoint(&self) -> Result<IOResult<CheckpointResult>> {
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"checkpoint() called on database without WAL".to_string(),
));
};
let mut checkpoint_result = CheckpointResult::default();
loop {
let state = *self.checkpoint_state.borrow();
@@ -1222,11 +1281,10 @@ impl Pager {
match state {
CheckpointState::Checkpoint => {
let in_flight = self.checkpoint_inflight.clone();
match self.wal.borrow_mut().checkpoint(
self,
in_flight,
CheckpointMode::Passive,
)? {
match wal
.borrow_mut()
.checkpoint(self, in_flight, CheckpointMode::Passive)?
{
IOResult::IO => return Ok(IOResult::IO),
IOResult::Done(res) => {
checkpoint_result = res;
@@ -1235,7 +1293,8 @@ impl Pager {
};
}
CheckpointState::SyncDbFile => {
let c = sqlite3_ondisk::begin_sync(self.db_file.clone(), self.syncing.clone())?;
let _c =
sqlite3_ondisk::begin_sync(self.db_file.clone(), self.syncing.clone())?;
self.checkpoint_state
.replace(CheckpointState::WaitSyncDbFile);
}
@@ -1274,7 +1333,12 @@ impl Pager {
pub fn checkpoint_shutdown(&self, wal_checkpoint_disabled: bool) -> Result<()> {
let mut _attempts = 0;
{
let mut wal = self.wal.borrow_mut();
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"checkpoint_shutdown() called on database without WAL".to_string(),
));
};
let mut wal = wal.borrow_mut();
// fsync the wal syncronously before beginning checkpoint
while let Ok(IOResult::IO) = wal.sync() {
// TODO: for now forget about timeouts as they fail regularly in SIM
@@ -1299,22 +1363,29 @@ impl Pager {
wal_checkpoint_disabled: bool,
mode: CheckpointMode,
) -> Result<CheckpointResult> {
let Some(wal) = self.wal.as_ref() else {
return Err(LimboError::InternalError(
"wal_checkpoint() called on database without WAL".to_string(),
));
};
if wal_checkpoint_disabled {
return Ok(CheckpointResult::default());
}
let counter = Rc::new(RefCell::new(0));
let write_counter = Rc::new(RefCell::new(0));
let mut checkpoint_result = self.io.block(|| {
self.wal
.borrow_mut()
.checkpoint(self, counter.clone(), mode)
wal.borrow_mut()
.checkpoint(self, write_counter.clone(), mode)
})?;
if checkpoint_result.everything_backfilled()
&& checkpoint_result.num_checkpointed_frames != 0
{
let db_size = header_accessor::get_database_size(self)?;
let page_size = self.page_size.get().unwrap_or(DEFAULT_PAGE_SIZE);
let db_size = self
.io
.block(|| self.with_header(|header| header.database_size))?
.get();
let page_size = self.page_size.get().unwrap_or(PageSize::DEFAULT as u32);
let expected = (db_size * page_size) as u64;
if expected < self.db_file.size()? {
self.io.wait_for_completion(self.db_file.truncate(
@@ -1354,18 +1425,21 @@ impl Pager {
const TRUNK_PAGE_NEXT_PAGE_OFFSET: usize = 0; // Offset to next trunk page pointer
const TRUNK_PAGE_LEAF_COUNT_OFFSET: usize = 4; // Offset to leaf count
let header_ref = self.io.block(|| HeaderRefMut::from_pager(self))?;
let mut header = header_ref.borrow_mut();
let mut state = self.free_page_state.borrow_mut();
tracing::debug!(?state);
loop {
match &mut *state {
FreePageState::Start => {
if page_id < 2 || page_id > header_accessor::get_database_size(self)? as usize {
if page_id < 2 || page_id > header.database_size.get() as usize {
return Err(LimboError::Corrupt(format!(
"Invalid page number {page_id} for free operation"
)));
}
let (page, c) = match page.clone() {
let (page, _c) = match page.clone() {
Some(page) => {
assert_eq!(
page.get().id,
@@ -1385,12 +1459,9 @@ impl Pager {
(page, Some(c))
}
};
header_accessor::set_freelist_pages(
self,
header_accessor::get_freelist_pages(self)? + 1,
)?;
header.freelist_pages = (header.freelist_pages.get() + 1).into();
let trunk_page_id = header_accessor::get_freelist_trunk_page(self)?;
let trunk_page_id = header.freelist_trunk_page.get();
if trunk_page_id != 0 {
*state = FreePageState::AddToTrunk {
@@ -1402,10 +1473,10 @@ impl Pager {
}
}
FreePageState::AddToTrunk { page, trunk_page } => {
let trunk_page_id = header_accessor::get_freelist_trunk_page(self)?;
let trunk_page_id = header.freelist_trunk_page.get();
if trunk_page.is_none() {
// Add as leaf to current trunk
let (page, c) = self.read_page(trunk_page_id as usize)?;
let (page, _c) = self.read_page(trunk_page_id as usize)?;
trunk_page.replace(page);
}
let trunk_page = trunk_page.as_ref().unwrap();
@@ -1419,7 +1490,7 @@ impl Pager {
// Reserve 2 slots for the trunk page header which is 8 bytes or 2*LEAF_ENTRY_SIZE
let max_free_list_entries =
(self.usable_space() / LEAF_ENTRY_SIZE) - RESERVED_SLOTS;
(header.usable_space() / LEAF_ENTRY_SIZE) - RESERVED_SLOTS;
if number_of_leaf_pages < max_free_list_entries as u32 {
turso_assert!(
@@ -1449,7 +1520,7 @@ impl Pager {
turso_assert!(page.get().id == page_id, "page has unexpected id");
self.add_dirty(page);
let trunk_page_id = header_accessor::get_freelist_trunk_page(self)?;
let trunk_page_id = header.freelist_trunk_page.get();
let contents = page.get().contents.as_mut().unwrap();
// Point to previous trunk
@@ -1457,7 +1528,7 @@ impl Pager {
// Zero leaf count
contents.write_u32(TRUNK_PAGE_LEAF_COUNT_OFFSET, 0);
// Update page 1 to point to new trunk
header_accessor::set_freelist_trunk_page(self, page_id as u32)?;
header.freelist_trunk_page = (page_id as u32).into();
// Clear flags
page.clear_uptodate();
break;
@@ -1476,9 +1547,12 @@ impl Pager {
tracing::trace!("allocate_page1(Start)");
self.db_state.set(DbState::Initializing);
let mut default_header = DatabaseHeader::default();
default_header.database_size += 1;
assert_eq!(default_header.database_size.get(), 0);
default_header.database_size = 1.into();
if let Some(size) = self.page_size.get() {
default_header.update_page_size(size);
default_header.page_size = PageSize::new(size).expect("page size");
}
let page = allocate_new_page(1, &self.buffer_pool, 0);
@@ -1495,11 +1569,11 @@ impl Pager {
btree_init_page(
&page1,
PageType::TableLeaf,
DATABASE_HEADER_SIZE,
(default_header.get_page_size() - default_header.reserved_space as u32) as u16,
DatabaseHeader::SIZE,
(default_header.page_size.get() - default_header.reserved_space as u32) as u16,
);
let write_counter = Rc::new(RefCell::new(0));
let c = begin_write_btree_page(self, &page1.get(), write_counter.clone())?;
let _c = begin_write_btree_page(self, &page1.get(), write_counter.clone())?;
self.allocate_page1_state
.replace(AllocatePage1State::Writing {
@@ -1553,12 +1627,15 @@ impl Pager {
const FREELIST_TRUNK_OFFSET_LEAF_COUNT: usize = 4;
const FREELIST_TRUNK_OFFSET_FIRST_LEAF: usize = 8;
let header_ref = self.io.block(|| HeaderRefMut::from_pager(self))?;
let mut header = header_ref.borrow_mut();
loop {
let mut state = self.allocate_page_state.borrow_mut();
tracing::debug!("allocate_page(state={:?})", state);
match &mut *state {
AllocatePageState::Start => {
let old_db_size = header_accessor::get_database_size(self)?;
let old_db_size = header.database_size.get();
#[cfg(not(feature = "omit_autovacuum"))]
let mut new_db_size = old_db_size;
#[cfg(feature = "omit_autovacuum")]
@@ -1571,10 +1648,7 @@ impl Pager {
// - autovacuum is enabled
// - the last page is a pointer map page
if matches!(*self.auto_vacuum_mode.borrow(), AutoVacuumMode::Full)
&& is_ptrmap_page(
new_db_size + 1,
header_accessor::get_page_size(self)? as usize,
)
&& is_ptrmap_page(new_db_size + 1, header.page_size.get() as usize)
{
// we will allocate a ptrmap page, so increment size
new_db_size += 1;
@@ -1595,15 +1669,14 @@ impl Pager {
}
}
let first_freelist_trunk_page_id =
header_accessor::get_freelist_trunk_page(self)?;
let first_freelist_trunk_page_id = header.freelist_trunk_page.get();
if first_freelist_trunk_page_id == 0 {
*state = AllocatePageState::AllocateNewPage {
current_db_size: new_db_size,
};
continue;
}
let (trunk_page, c) = self.read_page(first_freelist_trunk_page_id as usize)?;
let (trunk_page, _c) = self.read_page(first_freelist_trunk_page_id as usize)?;
*state = AllocatePageState::SearchAvailableFreeListLeaf {
trunk_page,
current_db_size: new_db_size,
@@ -1649,11 +1722,8 @@ impl Pager {
// Freelist is not empty, so we can reuse the trunk itself as a new page
// and update the database's first freelist trunk page to the next trunk page.
header_accessor::set_freelist_trunk_page(self, next_trunk_page_id)?;
header_accessor::set_freelist_pages(
self,
header_accessor::get_freelist_pages(self)? - 1,
)?;
header.freelist_trunk_page = next_trunk_page_id.into();
header.freelist_pages = (header.freelist_pages.get() + 1).into();
self.add_dirty(trunk_page);
// zero out the page
turso_assert!(
@@ -1692,7 +1762,7 @@ impl Pager {
let page_contents = trunk_page.get().contents.as_ref().unwrap();
let next_leaf_page_id =
page_contents.read_u32(FREELIST_TRUNK_OFFSET_FIRST_LEAF);
let (leaf_page, c) = self.read_page(next_leaf_page_id as usize)?;
let (leaf_page, _c) = self.read_page(next_leaf_page_id as usize)?;
if leaf_page.is_locked() {
return Ok(IOResult::IO);
}
@@ -1736,11 +1806,7 @@ impl Pager {
);
self.add_dirty(trunk_page);
header_accessor::set_freelist_pages(
self,
header_accessor::get_freelist_pages(self)? - 1,
)?;
header.freelist_pages = (header.freelist_pages.get() - 1).into();
*state = AllocatePageState::Start;
return Ok(IOResult::Done(leaf_page));
}
@@ -1766,7 +1832,7 @@ impl Pager {
Ok(_) => {}
};
}
header_accessor::set_database_size(self, new_db_size)?;
header.database_size = new_db_size.into();
*state = AllocatePageState::Start;
return Ok(IOResult::Done(page));
}
@@ -1796,20 +1862,22 @@ impl Pager {
Ok(())
}
pub fn usable_size(&self) -> usize {
let page_size = header_accessor::get_page_size(self).unwrap_or_default() as u32;
let reserved_space = header_accessor::get_reserved_space(self).unwrap_or_default() as u32;
(page_size - reserved_space) as usize
}
#[instrument(skip_all, level = Level::DEBUG)]
pub fn rollback(
&self,
schema_did_change: bool,
connection: &Connection,
is_write: bool,
) -> Result<(), LimboError> {
tracing::debug!(schema_did_change);
self.dirty_pages.borrow_mut().clear();
if is_write {
self.dirty_pages.borrow_mut().clear();
} else {
turso_assert!(
self.dirty_pages.borrow().is_empty(),
"dirty pages should be empty for read txn"
);
}
let mut cache = self.page_cache.write();
self.reset_internal_states();
@@ -1819,7 +1887,11 @@ impl Pager {
if schema_did_change {
connection.schema.replace(connection._db.clone_schema()?);
}
self.wal.borrow_mut().rollback()?;
if is_write {
if let Some(wal) = self.wal.as_ref() {
wal.borrow_mut().rollback()?;
}
}
Ok(())
}
@@ -1840,6 +1912,22 @@ impl Pager {
});
self.allocate_page_state.replace(AllocatePageState::Start);
}
pub fn with_header<T>(&self, f: impl Fn(&DatabaseHeader) -> T) -> Result<IOResult<T>> {
let IOResult::Done(header_ref) = HeaderRef::from_pager(self)? else {
return Ok(IOResult::IO);
};
let header = header_ref.borrow();
Ok(IOResult::Done(f(&header)))
}
pub fn with_header_mut<T>(&self, f: impl Fn(&mut DatabaseHeader) -> T) -> Result<IOResult<T>> {
let IOResult::Done(header_ref) = HeaderRefMut::from_pager(self)? else {
return Ok(IOResult::IO);
};
let mut header = header_ref.borrow_mut();
Ok(IOResult::Done(f(&mut header)))
}
}
pub fn allocate_new_page(page_id: usize, buffer_pool: &Arc<BufferPool>, offset: usize) -> PageRef {
@@ -1917,7 +2005,7 @@ impl CreateBTreeFlags {
*/
#[cfg(not(feature = "omit_autovacuum"))]
mod ptrmap {
use crate::{storage::sqlite3_ondisk::MIN_PAGE_SIZE, LimboError, Result};
use crate::{storage::sqlite3_ondisk::PageSize, LimboError, Result};
// Constants
pub const PTRMAP_ENTRY_SIZE: usize = 5;
@@ -1985,14 +2073,14 @@ mod ptrmap {
/// Calculates how many database pages are mapped by a single pointer map page.
/// This is based on the total page size, as ptrmap pages are filled with entries.
pub fn entries_per_ptrmap_page(page_size: usize) -> usize {
assert!(page_size >= MIN_PAGE_SIZE as usize);
assert!(page_size >= PageSize::MIN as usize);
page_size / PTRMAP_ENTRY_SIZE
}
/// Calculates the cycle length of pointer map pages
/// The cycle length is the number of database pages that are mapped by a single pointer map page.
pub fn ptrmap_page_cycle_length(page_size: usize) -> usize {
assert!(page_size >= MIN_PAGE_SIZE as usize);
assert!(page_size >= PageSize::MIN as usize);
(page_size / PTRMAP_ENTRY_SIZE) + 1
}
@@ -2102,7 +2190,7 @@ mod ptrmap_tests {
use crate::storage::database::{DatabaseFile, DatabaseStorage};
use crate::storage::page_cache::DumbLruPageCache;
use crate::storage::pager::Pager;
use crate::storage::sqlite3_ondisk::MIN_PAGE_SIZE;
use crate::storage::sqlite3_ondisk::PageSize;
use crate::storage::wal::{WalFile, WalFileShared};
pub fn run_until_done<T>(
@@ -2145,7 +2233,7 @@ mod ptrmap_tests {
let pager = Pager::new(
db_file,
wal,
Some(wal),
io,
page_cache,
buffer_pool,
@@ -2154,7 +2242,12 @@ mod ptrmap_tests {
)
.unwrap();
run_until_done(|| pager.allocate_page1(), &pager).unwrap();
header_accessor::set_vacuum_mode_largest_root_page(&pager, 1).unwrap();
pager
.io
.block(|| {
pager.with_header_mut(|header| header.vacuum_mode_largest_root_page = 1.into())
})
.unwrap();
pager.set_auto_vacuum_mode(AutoVacuumMode::Full);
// Allocate all the pages as btree root pages
@@ -2194,7 +2287,11 @@ mod ptrmap_tests {
// Ensure that the database header size is correctly reflected
assert_eq!(
header_accessor::get_database_size(&pager).unwrap(),
pager
.io
.block(|| pager.with_header(|header| header.database_size))
.unwrap()
.get(),
initial_db_pages + 2
); // (1+1) -> (header + ptrmap)
@@ -2210,7 +2307,7 @@ mod ptrmap_tests {
#[test]
fn test_is_ptrmap_page_logic() {
let page_size = MIN_PAGE_SIZE as usize;
let page_size = PageSize::MIN as usize;
let n_data_pages = entries_per_ptrmap_page(page_size);
assert_eq!(n_data_pages, 102); // 512/5 = 102
@@ -2228,7 +2325,7 @@ mod ptrmap_tests {
#[test]
fn test_get_ptrmap_page_no() {
let page_size = MIN_PAGE_SIZE as usize; // Maps 103 data pages
let page_size = PageSize::MIN as usize; // Maps 103 data pages
// Test pages mapped by P0 (page 2)
assert_eq!(get_ptrmap_page_no_for_db_page(3, page_size), 2); // D(3) -> P0(2)
@@ -2248,7 +2345,7 @@ mod ptrmap_tests {
#[test]
fn test_get_ptrmap_offset() {
let page_size = MIN_PAGE_SIZE as usize; // Maps 103 data pages
let page_size = PageSize::MIN as usize; // Maps 103 data pages
assert_eq!(get_ptrmap_offset_in_page(3, 2, page_size).unwrap(), 0);
assert_eq!(

View File

@@ -43,6 +43,8 @@
#![allow(clippy::arc_with_non_send_sync)]
use bytemuck::{Pod, Zeroable};
use pack1::{I32BE, U16BE, U32BE};
use tracing::{instrument, Level};
use super::pager::PageRef;
@@ -58,36 +60,17 @@ use crate::storage::btree::{payload_overflow_threshold_max, payload_overflow_thr
use crate::storage::buffer_pool::BufferPool;
use crate::storage::database::DatabaseStorage;
use crate::storage::pager::Pager;
use crate::storage::wal::PendingFlush;
use crate::types::{RawSlice, RefValue, SerialType, SerialTypeKind, TextRef, TextSubtype};
use crate::{turso_assert, File, Result, WalFileShared};
use std::cell::{RefCell, UnsafeCell};
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use std::mem::MaybeUninit;
use std::pin::Pin;
use std::rc::Rc;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::Arc;
/// The size of the database header in bytes.
pub const DATABASE_HEADER_SIZE: usize = 100;
// DEFAULT_CACHE_SIZE negative values mean that we store the amount of pages a XKiB of memory can hold.
// We can calculate "real" cache size by diving by page size.
pub const DEFAULT_CACHE_SIZE: i32 = -2000;
// Minimum number of pages that cache can hold.
pub const MIN_PAGE_CACHE_SIZE: usize = 10;
/// The minimum page size in bytes.
pub const MIN_PAGE_SIZE: u32 = 512;
/// The maximum page size in bytes.
pub const MAX_PAGE_SIZE: u32 = 65536;
/// The default page size in bytes.
pub const DEFAULT_PAGE_SIZE: u32 = 4096;
pub const DATABASE_HEADER_PAGE_ID: usize = 1;
/// The minimum size of a cell in bytes.
pub const MINIMUM_CELL_SIZE: usize = 4;
@@ -96,116 +79,238 @@ pub const INTERIOR_PAGE_HEADER_SIZE_BYTES: usize = 12;
pub const LEAF_PAGE_HEADER_SIZE_BYTES: usize = 8;
pub const LEFT_CHILD_PTR_SIZE_BYTES: usize = 4;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u32)]
pub enum DatabaseEncoding {
Utf8 = 1,
Utf16Le = 2,
Utf16Be = 3,
}
#[derive(PartialEq, Eq, Zeroable, Pod, Clone, Copy, Debug)]
#[repr(transparent)]
/// Read/Write file format version.
pub struct PageSize(U16BE);
impl TryFrom<u32> for DatabaseEncoding {
type Error = LimboError;
impl PageSize {
pub const MIN: u32 = 512;
pub const MAX: u32 = 65536;
pub const DEFAULT: u16 = 4096;
fn try_from(value: u32) -> Result<Self> {
match value {
1 => Ok(Self::Utf8),
2 => Ok(Self::Utf16Le),
3 => Ok(Self::Utf16Be),
_ => Err(LimboError::Corrupt(format!("Invalid encoding: {value}"))),
pub const fn new(size: u32) -> Option<Self> {
if size < PageSize::MIN || size > PageSize::MAX {
return None;
}
// Page size must be a power of two.
if size.count_ones() != 1 {
return None;
}
if size == PageSize::MAX {
return Some(Self(U16BE::new(1)));
}
Some(Self(U16BE::new(size as u16)))
}
pub const fn get(self) -> u32 {
match self.0.get() {
1 => Self::MAX,
v => v as u32,
}
}
}
impl From<DatabaseEncoding> for &'static str {
fn from(encoding: DatabaseEncoding) -> Self {
match encoding {
DatabaseEncoding::Utf8 => "UTF-8",
DatabaseEncoding::Utf16Le => "UTF-16le",
DatabaseEncoding::Utf16Be => "UTF-16be",
impl Default for PageSize {
fn default() -> Self {
Self(U16BE::new(Self::DEFAULT))
}
}
#[derive(PartialEq, Eq, Zeroable, Pod, Clone, Copy, Debug)]
#[repr(transparent)]
/// Read/Write file format version.
pub struct CacheSize(I32BE);
impl CacheSize {
// The negative value means that we store the amount of pages a XKiB of memory can hold.
// We can calculate "real" cache size by diving by page size.
pub const DEFAULT: i32 = -2000;
// Minimum number of pages that cache can hold.
pub const MIN: i64 = 10;
// SQLite uses this value as threshold for maximum cache size
pub const MAX_SAFE: i64 = 2147450880;
pub const fn new(size: i32) -> Self {
match size {
Self::DEFAULT => Self(I32BE::new(0)),
v => Self(I32BE::new(v)),
}
}
pub const fn get(self) -> i32 {
match self.0.get() {
0 => Self::DEFAULT,
v => v,
}
}
}
/// The database header.
/// The first 100 bytes of the database file comprise the database file header.
/// The database file header is divided into fields as shown by the table below.
/// All multibyte fields in the database file header are stored with the most significant byte first (big-endian).
#[derive(Debug, Clone)]
impl Default for CacheSize {
fn default() -> Self {
Self(I32BE::new(Self::DEFAULT))
}
}
#[derive(PartialEq, Eq, Zeroable, Pod, Clone, Copy)]
#[repr(transparent)]
/// Read/Write file format version.
pub struct Version(u8);
impl Version {
#![allow(non_upper_case_globals)]
const Legacy: Self = Self(1);
const Wal: Self = Self(2);
}
impl std::fmt::Debug for Version {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
Self::Legacy => f.write_str("Version::Legacy"),
Self::Wal => f.write_str("Version::Wal"),
Self(v) => write!(f, "Version::Invalid({v})"),
}
}
}
#[derive(PartialEq, Eq, Zeroable, Pod, Clone, Copy)]
#[repr(transparent)]
/// Text encoding.
pub struct TextEncoding(U32BE);
impl TextEncoding {
#![allow(non_upper_case_globals)]
pub const Utf8: Self = Self(U32BE::new(1));
pub const Utf16Le: Self = Self(U32BE::new(2));
pub const Utf16Be: Self = Self(U32BE::new(3));
}
impl std::fmt::Display for TextEncoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
Self::Utf8 => f.write_str("UTF-8"),
Self::Utf16Le => f.write_str("UTF-16le"),
Self::Utf16Be => f.write_str("UTF-16be"),
Self(v) => write!(f, "TextEncoding::Invalid({})", v.get()),
}
}
}
impl std::fmt::Debug for TextEncoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
Self::Utf8 => f.write_str("TextEncoding::Utf8"),
Self::Utf16Le => f.write_str("TextEncoding::Utf16Le"),
Self::Utf16Be => f.write_str("TextEncoding::Utf16Be"),
Self(v) => write!(f, "TextEncoding::Invalid({})", v.get()),
}
}
}
impl Default for TextEncoding {
fn default() -> Self {
Self::Utf8
}
}
#[derive(Pod, Zeroable, Clone, Copy, Debug)]
#[repr(C, packed)]
/// Database Header Format
pub struct DatabaseHeader {
/// The header string: "SQLite format 3\0"
/// b"SQLite format 3\0"
pub magic: [u8; 16],
/// The database page size in bytes. Must be a power of two between 512 and 32768 inclusive,
/// or the value 1 representing a page size of 65536.
pub page_size: u16,
/// Page size in bytes. Must be a power of two between 512 and 32768 inclusive, or the value 1 representing a page size of 65536.
pub page_size: PageSize,
/// File format write version. 1 for legacy; 2 for WAL.
pub write_version: u8,
pub write_version: Version,
/// File format read version. 1 for legacy; 2 for WAL.
pub read_version: u8,
pub read_version: Version,
/// Bytes of unused "reserved" space at the end of each page. Usually 0.
/// SQLite has the ability to set aside a small number of extra bytes at the end of every page for use by extensions.
/// These extra bytes are used, for example, by the SQLite Encryption Extension to store a nonce and/or
/// cryptographic checksum associated with each page.
pub reserved_space: u8,
/// Maximum embedded payload fraction. Must be 64.
pub max_embed_frac: u8,
/// Minimum embedded payload fraction. Must be 32.
pub min_embed_frac: u8,
/// Leaf payload fraction. Must be 32.
pub min_leaf_frac: u8,
/// File change counter, incremented when database is modified.
pub change_counter: u32,
pub leaf_frac: u8,
/// File change counter.
pub change_counter: U32BE,
/// Size of the database file in pages. The "in-header database size".
pub database_size: u32,
pub database_size: U32BE,
/// Page number of the first freelist trunk page.
pub freelist_trunk_page: u32,
pub freelist_trunk_page: U32BE,
/// Total number of freelist pages.
pub freelist_pages: u32,
/// The schema cookie. Incremented when the database schema changes.
pub schema_cookie: u32,
/// The schema format number. Supported formats are 1, 2, 3, and 4.
pub schema_format: u32,
pub freelist_pages: U32BE,
/// The schema cookie.
pub schema_cookie: U32BE,
/// The schema format number. Supported schema formats are 1, 2, 3, and 4.
pub schema_format: U32BE,
/// Default page cache size.
pub default_page_cache_size: i32,
/// The page number of the largest root b-tree page when in auto-vacuum or
/// incremental-vacuum modes, or zero otherwise.
pub vacuum_mode_largest_root_page: u32,
/// The database text encoding. 1=UTF-8, 2=UTF-16le, 3=UTF-16be.
pub text_encoding: u32,
pub default_page_cache_size: CacheSize,
/// The page number of the largest root b-tree page when in auto-vacuum or incremental-vacuum modes, or zero otherwise.
pub vacuum_mode_largest_root_page: U32BE,
/// Text encoding.
pub text_encoding: TextEncoding,
/// The "user version" as read and set by the user_version pragma.
pub user_version: i32,
pub user_version: I32BE,
/// True (non-zero) for incremental-vacuum mode. False (zero) otherwise.
pub incremental_vacuum_enabled: u32,
pub incremental_vacuum_enabled: U32BE,
/// The "Application ID" set by PRAGMA application_id.
pub application_id: u32,
pub application_id: I32BE,
/// Reserved for expansion. Must be zero.
pub reserved_for_expansion: [u8; 20],
_padding: [u8; 20],
/// The version-valid-for number.
pub version_valid_for: u32,
pub version_valid_for: U32BE,
/// SQLITE_VERSION_NUMBER
pub version_number: u32,
pub version_number: U32BE,
}
impl DatabaseHeader {
pub const PAGE_ID: usize = 1;
pub const SIZE: usize = size_of::<Self>();
const _CHECK: () = {
assert!(Self::SIZE == 100);
};
pub fn usable_space(self) -> usize {
(self.page_size.get() as usize) - (self.reserved_space as usize)
}
}
impl Default for DatabaseHeader {
fn default() -> Self {
Self {
magic: *b"SQLite format 3\0",
page_size: Default::default(),
write_version: Version::Wal,
read_version: Version::Wal,
reserved_space: 0,
max_embed_frac: 64,
min_embed_frac: 32,
leaf_frac: 32,
change_counter: U32BE::new(1),
database_size: U32BE::new(0),
freelist_trunk_page: U32BE::new(0),
freelist_pages: U32BE::new(0),
schema_cookie: U32BE::new(0),
schema_format: U32BE::new(4), // latest format, new sqlite3 databases use this format
default_page_cache_size: Default::default(),
vacuum_mode_largest_root_page: U32BE::new(0),
text_encoding: TextEncoding::Utf8,
user_version: I32BE::new(0),
incremental_vacuum_enabled: U32BE::new(0),
application_id: I32BE::new(0),
_padding: [0; 20],
version_valid_for: U32BE::new(3047000),
version_number: U32BE::new(3047000),
}
}
}
pub const WAL_HEADER_SIZE: usize = 32;
@@ -282,90 +387,6 @@ impl WalFrameHeader {
}
}
impl Default for DatabaseHeader {
fn default() -> Self {
Self {
magic: *b"SQLite format 3\0",
page_size: DEFAULT_PAGE_SIZE as u16,
write_version: 2,
read_version: 2,
reserved_space: 0,
max_embed_frac: 64,
min_embed_frac: 32,
min_leaf_frac: 32,
change_counter: 1,
database_size: 0,
freelist_trunk_page: 0,
freelist_pages: 0,
schema_cookie: 0,
schema_format: 4, // latest format, new sqlite3 databases use this format
default_page_cache_size: DEFAULT_CACHE_SIZE,
vacuum_mode_largest_root_page: 0,
text_encoding: 1, // utf-8
user_version: 0,
incremental_vacuum_enabled: 0,
application_id: 0,
reserved_for_expansion: [0; 20],
version_valid_for: 3047000,
version_number: 3047000,
}
}
}
impl DatabaseHeader {
pub fn update_page_size(&mut self, size: u32) {
if !is_valid_page_size(size) {
return;
}
self.page_size = if size == MAX_PAGE_SIZE {
1u16
} else {
size as u16
};
}
pub fn get_page_size(&self) -> u32 {
if self.page_size == 1 {
MAX_PAGE_SIZE
} else {
self.page_size as u32
}
}
}
pub fn is_valid_page_size(size: u32) -> bool {
(MIN_PAGE_SIZE..=MAX_PAGE_SIZE).contains(&size) && (size & (size - 1)) == 0
}
pub fn write_header_to_buf(buf: &mut [u8], header: &DatabaseHeader) {
buf[0..16].copy_from_slice(&header.magic);
buf[16..18].copy_from_slice(&header.page_size.to_be_bytes());
buf[18] = header.write_version;
buf[19] = header.read_version;
buf[20] = header.reserved_space;
buf[21] = header.max_embed_frac;
buf[22] = header.min_embed_frac;
buf[23] = header.min_leaf_frac;
buf[24..28].copy_from_slice(&header.change_counter.to_be_bytes());
buf[28..32].copy_from_slice(&header.database_size.to_be_bytes());
buf[32..36].copy_from_slice(&header.freelist_trunk_page.to_be_bytes());
buf[36..40].copy_from_slice(&header.freelist_pages.to_be_bytes());
buf[40..44].copy_from_slice(&header.schema_cookie.to_be_bytes());
buf[44..48].copy_from_slice(&header.schema_format.to_be_bytes());
buf[48..52].copy_from_slice(&header.default_page_cache_size.to_be_bytes());
buf[52..56].copy_from_slice(&header.vacuum_mode_largest_root_page.to_be_bytes());
buf[56..60].copy_from_slice(&header.text_encoding.to_be_bytes());
buf[60..64].copy_from_slice(&header.user_version.to_be_bytes());
buf[64..68].copy_from_slice(&header.incremental_vacuum_enabled.to_be_bytes());
buf[68..72].copy_from_slice(&header.application_id.to_be_bytes());
buf[72..92].copy_from_slice(&header.reserved_for_expansion);
buf[92..96].copy_from_slice(&header.version_valid_for.to_be_bytes());
buf[96..100].copy_from_slice(&header.version_number.to_be_bytes());
}
#[repr(u8)]
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum PageType {
@@ -531,7 +552,7 @@ impl PageContent {
pub fn cell_content_area(&self) -> u32 {
let offset = self.read_u16(BTREE_CELL_CONTENT_AREA);
if offset == 0 {
MAX_PAGE_SIZE
PageSize::MAX
} else {
offset as u32
}
@@ -733,7 +754,7 @@ impl PageContent {
pub fn write_database_header(&self, header: &DatabaseHeader) {
let buf = self.as_ptr();
write_header_to_buf(buf, header);
buf[0..DatabaseHeader::SIZE].copy_from_slice(bytemuck::bytes_of(header));
}
pub fn debug_print_freelist(&self, usable_space: u16) {
@@ -793,8 +814,8 @@ pub fn finish_read_page(
page: PageRef,
) -> Result<()> {
tracing::trace!(page_idx);
let pos = if page_idx == DATABASE_HEADER_PAGE_ID {
DATABASE_HEADER_SIZE
let pos = if page_idx == DatabaseHeader::PAGE_ID {
DatabaseHeader::SIZE
} else {
0
};
@@ -852,6 +873,115 @@ pub fn begin_write_btree_page(
res
}
#[instrument(skip_all, level = Level::DEBUG)]
/// Write a batch of pages to the database file.
///
/// we have a batch of pages to write, lets say the following:
/// (they are already sorted by id thanks to BTreeMap)
/// [1,2,3,6,7,9,10,11,12]
//
/// we want to collect this into runs of:
/// [1,2,3], [6,7], [9,10,11,12]
/// and submit each run as a `writev` call,
/// for 3 total syscalls instead of 9.
pub fn write_pages_vectored(
pager: &Pager,
batch: BTreeMap<usize, Arc<RefCell<Buffer>>>,
) -> Result<PendingFlush> {
if batch.is_empty() {
return Ok(PendingFlush::default());
}
// batch item array is already sorted by id, so we just need to find contiguous ranges of page_id's
// to submit as `writev`/write_pages calls.
let page_sz = pager.page_size.get().unwrap_or(PageSize::DEFAULT as u32) as usize;
// Count expected number of runs to create the atomic counter we need to track each batch
let mut run_count = 0;
let mut prev_id = None;
for &id in batch.keys() {
if let Some(prev) = prev_id {
if id != prev + 1 {
run_count += 1;
}
} else {
run_count = 1; // First run
}
prev_id = Some(id);
}
// Create the atomic counters
let runs_left = Arc::new(AtomicUsize::new(run_count));
let done = Arc::new(AtomicBool::new(false));
// we know how many runs, but we don't know how many buffers per run, so we can only give an
// estimate of the capacity
const EST_BUFF_CAPACITY: usize = 32;
// Iterate through the batch, submitting each run as soon as it ends
// We can reuse this across runs without reallocating
let mut run_bufs = Vec::with_capacity(EST_BUFF_CAPACITY);
let mut run_start_id: Option<usize> = None;
let mut all_ids = Vec::with_capacity(batch.len());
// Iterate through the batch
let mut iter = batch.into_iter().peekable();
while let Some((id, item)) = iter.next() {
// Track the start of the run
if run_start_id.is_none() {
run_start_id = Some(id);
}
// Add this page to the current run
run_bufs.push(item);
all_ids.push(id);
// Check if this is the end of a run
let is_end_of_run = match iter.peek() {
Some(&(next_id, _)) => next_id != id + 1,
None => true,
};
if is_end_of_run {
let start_id = run_start_id.expect("should have a start id");
let runs_left_cl = runs_left.clone();
let done_cl = done.clone();
let c = Completion::new_write(move |_| {
if runs_left_cl.fetch_sub(1, Ordering::AcqRel) == 1 {
done_cl.store(true, Ordering::Release);
}
});
// Submit write operation for this run, decrementing the counter if we error
if let Err(e) = pager
.db_file
.write_pages(start_id, page_sz, run_bufs.clone(), c)
{
if runs_left.fetch_sub(1, Ordering::AcqRel) == 1 {
done.store(true, Ordering::Release);
}
return Err(e);
}
// Reset for next run
run_bufs.clear();
run_start_id = None;
}
}
tracing::debug!(
"write_pages_vectored: {} pages to write, runs: {run_count}",
all_ids.len()
);
Ok(PendingFlush {
pages: all_ids,
done,
})
}
#[instrument(skip_all, level = Level::DEBUG)]
pub fn begin_sync(
db_file: Arc<dyn DatabaseStorage>,
@@ -1453,9 +1583,7 @@ pub fn read_entire_wal_dumb(file: &Arc<dyn File>) -> Result<Arc<UnsafeCell<WalFi
let mut cumulative_checksum = (header_locked.checksum_1, header_locked.checksum_2);
let page_size_u32 = header_locked.page_size;
if !(MIN_PAGE_SIZE..=MAX_PAGE_SIZE).contains(&page_size_u32)
|| page_size_u32.count_ones() != 1
{
if PageSize::new(page_size_u32).is_none() {
panic!("Invalid page size in WAL header: {page_size_u32}");
}
let page_size = page_size_u32 as usize;
@@ -1567,7 +1695,7 @@ pub fn read_entire_wal_dumb(file: &Arc<dyn File>) -> Result<Arc<UnsafeCell<WalFi
wfs_data.loaded.store(true, Ordering::SeqCst);
});
let c = Completion::new_read(buf_for_pread, complete);
let c = file.pread(0, c)?;
let _c = file.pread(0, c)?;
Ok(wal_file_shared_ret)
}

View File

@@ -0,0 +1,19 @@
use crate::PageRef;
#[derive(Debug, Clone)]
pub enum EmptyTableState {
Start,
ReadPage { page: PageRef },
}
#[derive(Debug, Clone, Copy)]
pub enum MoveToRightState {
Start,
ProcessPage,
}
#[derive(Debug, Clone, Copy)]
pub enum SeekToLastState {
Start,
IsEmpty,
}

View File

@@ -3,7 +3,7 @@
use std::array;
use std::cell::UnsafeCell;
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use strum::EnumString;
use tracing::{instrument, Level};
@@ -21,7 +21,7 @@ use crate::io::{File, IO};
use crate::result::LimboResult;
use crate::storage::sqlite3_ondisk::{
begin_read_wal_frame, begin_read_wal_frame_raw, finish_read_page, prepare_wal_frame,
WAL_FRAME_HEADER_SIZE, WAL_HEADER_SIZE,
write_pages_vectored, WAL_FRAME_HEADER_SIZE, WAL_HEADER_SIZE,
};
use crate::types::IOResult;
use crate::{turso_assert, Buffer, LimboError, Result};
@@ -31,7 +31,7 @@ use self::sqlite3_ondisk::{checksum_wal, PageContent, WAL_MAGIC_BE, WAL_MAGIC_LE
use super::buffer_pool::BufferPool;
use super::pager::{PageRef, Pager};
use super::sqlite3_ondisk::{self, begin_write_btree_page, WalHeader};
use super::sqlite3_ondisk::{self, WalHeader};
pub const READMARK_NOT_USED: u32 = 0xffffffff;
@@ -280,106 +280,6 @@ pub trait Wal {
fn as_any(&self) -> &dyn std::any::Any;
}
/// A dummy WAL implementation that does nothing.
/// This is used for ephemeral indexes where a WAL is not really
/// needed, and is preferable to passing an Option<dyn Wal> around
/// everywhere.
pub struct DummyWAL;
impl Wal for DummyWAL {
fn begin_read_tx(&mut self) -> Result<(LimboResult, bool)> {
Ok((LimboResult::Ok, false))
}
fn end_read_tx(&self) {}
fn begin_write_tx(&mut self) -> Result<LimboResult> {
Ok(LimboResult::Ok)
}
fn end_write_tx(&self) {}
fn find_frame(&self, _page_id: u64) -> Result<Option<u64>> {
Ok(None)
}
fn read_frame(
&self,
_frame_id: u64,
_page: crate::PageRef,
_buffer_pool: Arc<BufferPool>,
) -> Result<Completion> {
// Dummy completion
Ok(Completion::new_write(|_| {}))
}
fn read_frame_raw(&self, _frame_id: u64, _frame: &mut [u8]) -> Result<Completion> {
todo!();
}
fn write_frame_raw(
&mut self,
_buffer_pool: Arc<BufferPool>,
_frame_id: u64,
_page_id: u64,
_db_size: u64,
_page: &[u8],
) -> Result<()> {
todo!();
}
fn append_frame(
&mut self,
_page: crate::PageRef,
_db_size: u32,
_write_counter: Rc<RefCell<usize>>,
) -> Result<Completion> {
Ok(Completion::new_write(|_| {}))
}
fn should_checkpoint(&self) -> bool {
false
}
fn checkpoint(
&mut self,
_pager: &Pager,
_write_counter: Rc<RefCell<usize>>,
_mode: crate::CheckpointMode,
) -> Result<IOResult<CheckpointResult>> {
Ok(IOResult::Done(CheckpointResult::default()))
}
fn sync(&mut self) -> Result<IOResult<()>> {
Ok(IOResult::Done(()))
}
fn get_max_frame_in_wal(&self) -> u64 {
0
}
fn get_max_frame(&self) -> u64 {
0
}
fn get_min_frame(&self) -> u64 {
0
}
fn finish_append_frames_commit(&mut self) -> Result<()> {
tracing::trace!("finish_append_frames_commit_dumb");
Ok(())
}
fn rollback(&mut self) -> Result<()> {
Ok(())
}
#[cfg(debug_assertions)]
fn as_any(&self) -> &dyn std::any::Any {
self
}
}
// Syncing requires a state machine because we need to schedule a sync and then wait until it is
// finished. If we don't wait there will be undefined behaviour that no one wants to debug.
#[derive(Copy, Clone, Debug)]
@@ -393,11 +293,69 @@ pub enum CheckpointState {
Start,
ReadFrame,
WaitReadFrame,
WritePage,
WaitWritePage,
AccumulatePage,
FlushBatch,
WaitFlush,
Done,
}
/// IOV_MAX is 1024 on most systems, lets use 512 to be safe
pub const CKPT_BATCH_PAGES: usize = 512;
type PageId = usize;
/// Batch is a collection of pages that are being checkpointed together. It is used to
/// aggregate contiguous pages into a single write operation to the database file.
pub(super) struct Batch {
items: BTreeMap<PageId, Arc<RefCell<Buffer>>>,
}
// TODO(preston): implement the same thing for `readv`
impl Batch {
fn new() -> Self {
Self {
items: BTreeMap::new(),
}
}
fn is_full(&self) -> bool {
self.items.len() >= CKPT_BATCH_PAGES
}
fn add_to_batch(&mut self, scratch: &PageRef, pool: &Arc<BufferPool>) {
let (id, buf_clone) = unsafe {
let inner = &*scratch.inner.get();
let id = inner.id;
let contents = inner.contents.as_ref().expect("scratch has contents");
let buf = contents.buffer.clone();
(id, buf)
};
// Insert the new batch item at the correct position
self.items.insert(id, buf_clone);
// Re-initialize scratch with a fresh buffer
let raw = pool.get();
let pool_clone = pool.clone();
let drop_fn = Rc::new(move |b| pool_clone.put(b));
let new_buf = Arc::new(RefCell::new(Buffer::new(raw, drop_fn)));
unsafe {
let inner = &mut *scratch.inner.get();
inner.contents = Some(PageContent::new(0, new_buf));
// reset flags on scratch so it won't be cleared later with the real page
inner.flags.store(0, Ordering::SeqCst);
}
}
}
impl std::ops::Deref for Batch {
type Target = BTreeMap<PageId, Arc<RefCell<Buffer>>>;
fn deref(&self) -> &Self::Target {
&self.items
}
}
impl std::ops::DerefMut for Batch {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.items
}
}
// Checkpointing is a state machine that has multiple steps. Since there are multiple steps we save
// in flight information of the checkpoint in OngoingCheckpoint. page is just a helper Page to do
// page operations like reading a frame to a page, and writing a page to disk. This page should not
@@ -407,13 +365,45 @@ pub enum CheckpointState {
// current_page is a helper to iterate through all the pages that might have a frame in the safe
// range. This is inefficient for now.
struct OngoingCheckpoint {
page: PageRef,
scratch_page: PageRef,
batch: Batch,
state: CheckpointState,
pending_flush: Option<PendingFlush>,
min_frame: u64,
max_frame: u64,
current_page: u64,
}
pub(super) struct PendingFlush {
// page ids to clear
pub(super) pages: Vec<usize>,
// completion flag set by IO callback
pub(super) done: Arc<AtomicBool>,
}
impl Default for PendingFlush {
fn default() -> Self {
Self::new()
}
}
impl PendingFlush {
pub fn new() -> Self {
Self {
pages: Vec::with_capacity(CKPT_BATCH_PAGES),
done: Arc::new(AtomicBool::new(false)),
}
}
// clear the dirty flag of all pages in the pending flush batch
fn clear_dirty(&self, pager: &Pager) {
for id in &self.pages {
if let Some(p) = pager.cache_get(*id) {
p.clear_dirty();
}
}
}
}
impl fmt::Debug for OngoingCheckpoint {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_struct("OngoingCheckpoint")
@@ -686,7 +676,9 @@ impl Wal for WalFile {
let checkpoint_seq = shared.wal_header.lock().checkpoint_seq;
(mx, nb, ck, checkpoint_seq)
};
let db_changed = shared_max > self.max_frame;
let db_changed = shared_max != self.max_frame
|| last_checksum != self.last_checksum
|| checkpoint_seq != self.header.checkpoint_seq;
// WAL is already fully backfilled into the main DB image
// (mxFrame == nBackfill). Readers can therefore ignore the
@@ -1079,7 +1071,7 @@ impl Wal for WalFile {
fn checkpoint(
&mut self,
pager: &Pager,
write_counter: Rc<RefCell<usize>>,
_write_counter: Rc<RefCell<usize>>,
mode: CheckpointMode,
) -> Result<IOResult<CheckpointResult>> {
if matches!(mode, CheckpointMode::Full) {
@@ -1087,9 +1079,10 @@ impl Wal for WalFile {
"Full checkpoint mode is not implemented yet".into(),
));
}
self.checkpoint_inner(pager, write_counter, mode)
self.checkpoint_inner(pager, _write_counter, mode)
.inspect_err(|_| {
let _ = self.checkpoint_guard.take();
self.ongoing_checkpoint.state = CheckpointState::Start;
})
}
@@ -1198,13 +1191,16 @@ impl WalFile {
let header = unsafe { shared.get().as_mut().unwrap().wal_header.lock() };
let last_checksum = unsafe { (*shared.get()).last_checksum };
let start_pages_in_frames = unsafe { (*shared.get()).pages_in_frames.lock().len() };
Self {
io,
// default to max frame in WAL, so that when we read schema we can read from WAL too if it's there.
max_frame: unsafe { (*shared.get()).max_frame.load(Ordering::SeqCst) },
shared,
ongoing_checkpoint: OngoingCheckpoint {
page: checkpoint_page,
scratch_page: checkpoint_page,
batch: Batch::new(),
pending_flush: None,
state: CheckpointState::Start,
min_frame: 0,
max_frame: 0,
@@ -1219,7 +1215,7 @@ impl WalFile {
last_checksum,
prev_checkpoint: CheckpointResult::default(),
checkpoint_guard: None,
start_pages_in_frames: 0,
start_pages_in_frames,
header: *header,
}
}
@@ -1263,6 +1259,8 @@ impl WalFile {
self.ongoing_checkpoint.max_frame = 0;
self.ongoing_checkpoint.current_page = 0;
self.max_frame_read_lock_index.set(NO_LOCK_HELD);
self.ongoing_checkpoint.batch.clear();
let _ = self.ongoing_checkpoint.pending_flush.take();
self.sync_state.set(SyncState::NotSyncing);
self.syncing.set(false);
}
@@ -1311,7 +1309,7 @@ impl WalFile {
fn checkpoint_inner(
&mut self,
pager: &Pager,
write_counter: Rc<RefCell<usize>>,
_write_counter: Rc<RefCell<usize>>,
mode: CheckpointMode,
) -> Result<IOResult<CheckpointResult>> {
'checkpoint_loop: loop {
@@ -1358,7 +1356,14 @@ impl WalFile {
let frame_cache = frame_cache.lock();
assert!(self.ongoing_checkpoint.current_page as usize <= pages_in_frames.len());
if self.ongoing_checkpoint.current_page as usize == pages_in_frames.len() {
self.ongoing_checkpoint.state = CheckpointState::Done;
if self.ongoing_checkpoint.batch.is_empty() {
// no more pages to checkpoint, we are done
tracing::info!("checkpoint done, no more pages to checkpoint");
self.ongoing_checkpoint.state = CheckpointState::Done;
} else {
// flush the batch
self.ongoing_checkpoint.state = CheckpointState::FlushBatch;
}
continue 'checkpoint_loop;
}
let page = pages_in_frames[self.ongoing_checkpoint.current_page as usize];
@@ -1374,10 +1379,10 @@ impl WalFile {
page,
*frame
);
self.ongoing_checkpoint.page.get().id = page as usize;
self.ongoing_checkpoint.scratch_page.get().id = page as usize;
let _ = self.read_frame(
*frame,
self.ongoing_checkpoint.page.clone(),
self.ongoing_checkpoint.scratch_page.clone(),
self.buffer_pool.clone(),
)?;
self.ongoing_checkpoint.state = CheckpointState::WaitReadFrame;
@@ -1387,30 +1392,65 @@ impl WalFile {
self.ongoing_checkpoint.current_page += 1;
}
CheckpointState::WaitReadFrame => {
if self.ongoing_checkpoint.page.is_locked() {
if self.ongoing_checkpoint.scratch_page.is_locked() {
return Ok(IOResult::IO);
} else {
self.ongoing_checkpoint.state = CheckpointState::WritePage;
self.ongoing_checkpoint.state = CheckpointState::AccumulatePage;
}
}
CheckpointState::WritePage => {
self.ongoing_checkpoint.page.set_dirty();
let _ = begin_write_btree_page(
CheckpointState::AccumulatePage => {
// mark before batching
self.ongoing_checkpoint.scratch_page.set_dirty();
// we read the frame into memory, add it to our batch
self.ongoing_checkpoint
.batch
.add_to_batch(&self.ongoing_checkpoint.scratch_page, &self.buffer_pool);
let more_pages = (self.ongoing_checkpoint.current_page as usize)
< self
.get_shared()
.pages_in_frames
.lock()
.len()
.saturating_sub(1)
&& !self.ongoing_checkpoint.batch.is_full();
// if we can read more pages, continue reading and accumulating pages
if more_pages {
self.ongoing_checkpoint.current_page += 1;
self.ongoing_checkpoint.state = CheckpointState::ReadFrame;
} else {
// if we have enough pages in the batch, flush it
self.ongoing_checkpoint.state = CheckpointState::FlushBatch;
}
}
CheckpointState::FlushBatch => {
tracing::trace!("started checkpoint backfilling batch");
self.ongoing_checkpoint.pending_flush = Some(write_pages_vectored(
pager,
&self.ongoing_checkpoint.page,
write_counter.clone(),
)?;
self.ongoing_checkpoint.state = CheckpointState::WaitWritePage;
std::mem::take(&mut self.ongoing_checkpoint.batch),
)?);
// batch is queued
self.ongoing_checkpoint.batch.clear();
self.ongoing_checkpoint.state = CheckpointState::WaitFlush;
}
CheckpointState::WaitWritePage => {
if *write_counter.borrow() > 0 {
return Ok(IOResult::IO);
CheckpointState::WaitFlush => {
match self.ongoing_checkpoint.pending_flush.as_ref() {
Some(pf) if pf.done.load(Ordering::SeqCst) => {
// flush is done, we can continue
tracing::trace!("checkpoint backfilling batch done");
}
Some(_) => return Ok(IOResult::IO),
None => panic!("we should have a pending flush here"),
}
// If page was in cache clear it.
if let Some(page) = pager.cache_get(self.ongoing_checkpoint.page.get().id) {
page.clear_dirty();
}
self.ongoing_checkpoint.page.clear_dirty();
tracing::debug!("finished checkpoint backfilling batch");
let pf = self
.ongoing_checkpoint
.pending_flush
.as_ref()
.expect("we should have a pending flush here");
pf.clear_dirty(pager);
// done with batch
let shared = self.get_shared();
if (self.ongoing_checkpoint.current_page as usize)
< shared.pages_in_frames.lock().len()
@@ -1418,6 +1458,7 @@ impl WalFile {
self.ongoing_checkpoint.current_page += 1;
self.ongoing_checkpoint.state = CheckpointState::ReadFrame;
} else {
tracing::debug!("WaitFlush transitioning checkpoint to Done");
self.ongoing_checkpoint.state = CheckpointState::Done;
}
}
@@ -1426,8 +1467,11 @@ impl WalFile {
// In Restart or Truncate mode, we need to restart the log over and possibly truncate the file
// Release all locks and return the current num of wal frames and the amount we backfilled
CheckpointState::Done => {
if *write_counter.borrow() > 0 {
return Ok(IOResult::IO);
if let Some(pf) = self.ongoing_checkpoint.pending_flush.as_ref() {
turso_assert!(
pf.done.load(Ordering::Relaxed),
"checkpoint pending flush must have finished"
);
}
let mut checkpoint_result = {
let shared = self.get_shared();
@@ -1491,6 +1535,11 @@ impl WalFile {
} else {
let _ = self.checkpoint_guard.take();
}
self.ongoing_checkpoint.scratch_page.clear_dirty();
self.ongoing_checkpoint.scratch_page.get().id = 0;
self.ongoing_checkpoint.scratch_page.get().contents = None;
let _ = self.ongoing_checkpoint.pending_flush.take();
self.ongoing_checkpoint.batch.clear();
self.ongoing_checkpoint.state = CheckpointState::Start;
return Ok(IOResult::Done(checkpoint_result));
}
@@ -1883,7 +1932,7 @@ pub mod test {
}
let pager = conn.pager.borrow_mut();
let _ = pager.cacheflush();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let stat = std::fs::metadata(&walpath).unwrap();
let meta_before = std::fs::metadata(&walpath).unwrap();
@@ -1918,6 +1967,25 @@ pub mod test {
}
}
fn count_test_table(conn: &Arc<Connection>) -> i64 {
let mut stmt = conn.prepare("select count(*) from test").unwrap();
loop {
match stmt.step() {
Ok(StepResult::Row) => {
break;
}
Ok(StepResult::IO) => {
stmt.run_once().unwrap();
}
_ => {
panic!("Failed to step through the statement");
}
}
}
let count: i64 = stmt.row().unwrap().get(0).unwrap();
count
}
fn run_checkpoint_until_done(
wal: &mut dyn Wal,
pager: &crate::Pager,
@@ -1982,7 +2050,7 @@ pub mod test {
// but NOT truncate the file.
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let res = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Restart);
assert_eq!(res.num_wal_frames, mx_before);
assert_eq!(res.num_checkpointed_frames, mx_before);
@@ -2029,6 +2097,8 @@ pub mod test {
conn.pager
.borrow_mut()
.wal
.as_ref()
.unwrap()
.borrow_mut()
.finish_append_frames_commit()
.unwrap();
@@ -2055,7 +2125,7 @@ pub mod test {
// Force a read transaction that will freeze a lower read mark
let readmark = {
let pager = conn2.pager.borrow_mut();
let mut wal2 = pager.wal.borrow_mut();
let mut wal2 = pager.wal.as_ref().unwrap().borrow_mut();
assert!(matches!(wal2.begin_read_tx().unwrap().0, LimboResult::Ok));
wal2.get_max_frame()
};
@@ -2069,7 +2139,7 @@ pub mod test {
// Run passive checkpoint, expect partial
let (res1, max_before) = {
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let res = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive);
let maxf = unsafe {
(&*db.maybe_shared_wal.read().as_ref().unwrap().get())
@@ -2092,13 +2162,13 @@ pub mod test {
// Release reader
{
let pager = conn2.pager.borrow_mut();
let wal2 = pager.wal.borrow_mut();
let wal2 = pager.wal.as_ref().unwrap().borrow_mut();
wal2.end_read_tx();
}
// Second passive checkpoint should finish
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let res2 = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive);
assert_eq!(
res2.num_checkpointed_frames, res2.num_wal_frames,
@@ -2117,6 +2187,8 @@ pub mod test {
.pager
.borrow_mut()
.wal
.as_ref()
.unwrap()
.borrow_mut()
.begin_read_tx()
.unwrap();
@@ -2124,7 +2196,7 @@ pub mod test {
// checkpoint should succeed here because the wal is fully checkpointed (empty)
// so the reader is using readmark0 to read directly from the db file.
let p = conn1.pager.borrow();
let mut w = p.wal.borrow_mut();
let mut w = p.wal.as_ref().unwrap().borrow_mut();
loop {
match w.checkpoint(&p, Rc::new(RefCell::new(0)), CheckpointMode::Restart) {
Ok(IOResult::IO) => {
@@ -2153,7 +2225,7 @@ pub mod test {
// now that we have some frames to checkpoint, try again
conn2.pager.borrow_mut().begin_read_tx().unwrap();
let p = conn1.pager.borrow();
let mut w = p.wal.borrow_mut();
let mut w = p.wal.as_ref().unwrap().borrow_mut();
loop {
match w.checkpoint(&p, Rc::new(RefCell::new(0)), CheckpointMode::Restart) {
Ok(IOResult::IO) => {
@@ -2185,7 +2257,7 @@ pub mod test {
// Checkpoint with restart
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let result = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Restart);
assert!(result.everything_backfilled());
}
@@ -2228,7 +2300,7 @@ pub mod test {
// R1 starts reading
let r1_max_frame = {
let pager = conn_r1.pager.borrow_mut();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
assert!(matches!(wal.begin_read_tx().unwrap().0, LimboResult::Ok));
wal.get_max_frame()
};
@@ -2237,7 +2309,7 @@ pub mod test {
// R2 starts reading, sees more frames than R1
let r2_max_frame = {
let pager = conn_r2.pager.borrow_mut();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
assert!(matches!(wal.begin_read_tx().unwrap().0, LimboResult::Ok));
wal.get_max_frame()
};
@@ -2245,7 +2317,7 @@ pub mod test {
// try passive checkpoint, should only checkpoint up to R1's position
let checkpoint_result = {
let pager = conn_writer.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive)
};
@@ -2260,7 +2332,14 @@ pub mod test {
// Verify R2 still sees its frames
assert_eq!(
conn_r2.pager.borrow().wal.borrow().get_max_frame(),
conn_r2
.pager
.borrow()
.wal
.as_ref()
.unwrap()
.borrow()
.get_max_frame(),
r2_max_frame,
"Reader should maintain its snapshot"
);
@@ -2281,7 +2360,7 @@ pub mod test {
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let _result = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive);
}
@@ -2312,7 +2391,7 @@ pub mod test {
// start a write transaction
{
let pager = conn2.pager.borrow_mut();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let _ = wal.begin_read_tx().unwrap();
let res = wal.begin_write_tx().unwrap();
assert!(matches!(res, LimboResult::Ok), "result: {res:?}");
@@ -2321,7 +2400,7 @@ pub mod test {
// should fail because writer lock is held
let result = {
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
wal.checkpoint(&pager, Rc::new(RefCell::new(0)), CheckpointMode::Restart)
};
@@ -2330,14 +2409,28 @@ pub mod test {
"Restart checkpoint should fail when write lock is held"
);
conn2.pager.borrow().wal.borrow().end_read_tx();
conn2
.pager
.borrow()
.wal
.as_ref()
.unwrap()
.borrow_mut()
.end_read_tx();
// release write lock
conn2.pager.borrow().wal.borrow().end_write_tx();
conn2
.pager
.borrow()
.wal
.as_ref()
.unwrap()
.borrow_mut()
.end_write_tx();
// now restart should succeed
let result = {
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Restart)
};
@@ -2355,17 +2448,21 @@ pub mod test {
// Attempt to start a write transaction without a read transaction
let pager = conn.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let _ = wal.begin_write_tx();
}
fn check_read_lock_slot(conn: &Arc<Connection>, expected_slot: usize) -> bool {
let pager = conn.pager.borrow();
let wal = pager.wal.borrow();
let wal_any = wal.as_any();
if let Some(wal_file) = wal_any.downcast_ref::<WalFile>() {
return wal_file.max_frame_read_lock_index.get() == expected_slot;
let wal = pager.wal.as_ref().unwrap().borrow();
#[cfg(debug_assertions)]
{
let wal_any = wal.as_any();
if let Some(wal_file) = wal_any.downcast_ref::<WalFile>() {
return wal_file.max_frame_read_lock_index.get() == expected_slot;
}
}
false
}
@@ -2382,7 +2479,14 @@ pub mod test {
conn.execute("BEGIN").unwrap();
let mut stmt = conn.prepare("SELECT * FROM test").unwrap();
stmt.step().unwrap();
let frame = conn.pager.borrow().wal.borrow().get_max_frame();
let frame = conn
.pager
.borrow()
.wal
.as_ref()
.unwrap()
.borrow()
.get_max_frame();
(frame, stmt)
}
@@ -2406,7 +2510,7 @@ pub mod test {
// passive checkpoint #1
let result1 = {
let pager = conn_writer.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive)
};
assert_eq!(result1.num_checkpointed_frames, r1_frame);
@@ -2417,7 +2521,7 @@ pub mod test {
// passive checkpoint #2
let result2 = {
let pager = conn_writer.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive)
};
assert_eq!(
@@ -2463,7 +2567,7 @@ pub mod test {
// Do a TRUNCATE checkpoint
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Truncate);
}
@@ -2496,6 +2600,75 @@ pub mod test {
std::fs::remove_dir_all(path).unwrap();
}
#[test]
fn test_wal_checkpoint_truncate_db_file_contains_data() {
let (db, path) = get_database();
let conn = db.connect().unwrap();
let walpath = {
let mut p = path.clone().into_os_string().into_string().unwrap();
p.push_str("/test.db-wal");
std::path::PathBuf::from(p)
};
conn.execute("create table test(id integer primary key, value text)")
.unwrap();
bulk_inserts(&conn, 10, 100);
// Get size before checkpoint
let size_before = std::fs::metadata(&walpath).unwrap().len();
assert!(size_before > 0, "WAL file should have content");
// Do a TRUNCATE checkpoint
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Truncate);
}
// Check file size after truncate
let size_after = std::fs::metadata(&walpath).unwrap().len();
assert_eq!(size_after, 0, "WAL file should be truncated to 0 bytes");
// Verify we can still write to the database
conn.execute("INSERT INTO test VALUES (1001, 'after-truncate')")
.unwrap();
// Check WAL has new content
let new_size = std::fs::metadata(&walpath).unwrap().len();
assert!(new_size >= 32, "WAL file too small");
let hdr = read_wal_header(&walpath);
let expected_magic = if cfg!(target_endian = "big") {
sqlite3_ondisk::WAL_MAGIC_BE
} else {
sqlite3_ondisk::WAL_MAGIC_LE
};
assert!(
hdr.magic == expected_magic,
"bad WAL magic: {:#X}, expected: {:#X}",
hdr.magic,
sqlite3_ondisk::WAL_MAGIC_BE
);
assert_eq!(hdr.file_format, 3007000);
assert_eq!(hdr.page_size, 4096, "invalid page size");
assert_eq!(hdr.checkpoint_seq, 1, "invalid checkpoint_seq");
{
let pager = conn.pager.borrow();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive);
}
// delete the WAL file so we can read right from db and assert
// that everything was backfilled properly
std::fs::remove_file(&walpath).unwrap();
let count = count_test_table(&conn);
assert_eq!(
count, 1001,
"we should have 1001 rows in the table all together"
);
std::fs::remove_dir_all(path).unwrap();
}
fn read_wal_header(path: &std::path::Path) -> sqlite3_ondisk::WalHeader {
use std::{fs::File, io::Read};
let mut hdr = [0u8; 32];
@@ -2525,7 +2698,7 @@ pub mod test {
// Start a read transaction on conn2
{
let pager = conn2.pager.borrow_mut();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let (res, _) = wal.begin_read_tx().unwrap();
assert!(matches!(res, LimboResult::Ok));
}
@@ -2534,7 +2707,7 @@ pub mod test {
// Try to start a write transaction on conn2 with a stale snapshot
let result = {
let pager = conn2.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
wal.begin_write_tx()
};
// Should get Busy due to stale snapshot
@@ -2543,7 +2716,7 @@ pub mod test {
// End read transaction and start a fresh one
{
let pager = conn2.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
wal.end_read_tx();
let (res, _) = wal.begin_read_tx().unwrap();
assert!(matches!(res, LimboResult::Ok));
@@ -2551,7 +2724,7 @@ pub mod test {
// Now write transaction should work
let result = {
let pager = conn2.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
wal.begin_write_tx()
};
assert!(matches!(result.unwrap(), LimboResult::Ok));
@@ -2570,14 +2743,14 @@ pub mod test {
// Do a full checkpoint to move all data to DB file
{
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Passive);
}
// Start a read transaction on conn2
{
let pager = conn2.pager.borrow_mut();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let (res, _) = wal.begin_read_tx().unwrap();
assert!(matches!(res, LimboResult::Ok));
}
@@ -2585,7 +2758,7 @@ pub mod test {
assert!(check_read_lock_slot(&conn2, 0));
{
let pager = conn1.pager.borrow();
let wal = pager.wal.borrow();
let wal = pager.wal.as_ref().unwrap().borrow();
let frame = wal.find_frame(5);
// since we hold readlock0, we should ignore the db file and find_frame should return none
assert!(frame.is_ok_and(|f| f.is_none()));
@@ -2593,7 +2766,7 @@ pub mod test {
// Try checkpoint, should fail because reader has slot 0
{
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let result = wal.checkpoint(&pager, Rc::new(RefCell::new(0)), CheckpointMode::Restart);
assert!(
@@ -2604,12 +2777,12 @@ pub mod test {
// End the read transaction
{
let pager = conn2.pager.borrow();
let wal = pager.wal.borrow();
let wal = pager.wal.as_ref().unwrap().borrow();
wal.end_read_tx();
}
{
let pager = conn1.pager.borrow();
let mut wal = pager.wal.borrow_mut();
let mut wal = pager.wal.as_ref().unwrap().borrow_mut();
let result = run_checkpoint_until_done(&mut *wal, &pager, CheckpointMode::Restart);
assert!(
result.everything_backfilled(),

View File

@@ -30,7 +30,7 @@ pub fn translate_alter_table(
// Let's disable altering a table with indices altogether instead of checking column by
// column to be extra safe.
crate::bail_parse_error!(
"ALTER TABLE for table with indexes is disabled by default. Run with `--experimental-indexes` to enable this feature."
"ALTER TABLE for table with indexes is disabled. Omit the `--experimental-indexes=false` flag to enable this feature."
);
}
@@ -80,7 +80,7 @@ pub fn translate_alter_table(
btree.columns.remove(dropped_index);
let sql = btree.to_sql();
let sql = btree.to_sql().replace('\'', "''");
let stmt = format!(
r#"

View File

@@ -22,6 +22,7 @@ pub fn emit_program_for_compound_select(
left: _left,
right_most,
limit,
offset,
..
} = &plan
else {
@@ -39,8 +40,8 @@ pub fn emit_program_for_compound_select(
}
}
// Each subselect shares the same limit_ctx, because the LIMIT applies to the entire compound select,
// not just a single subselect.
// Each subselect shares the same limit_ctx and offset, because the LIMIT, OFFSET applies to
// the entire compound select, not just a single subselect.
let limit_ctx = limit.map(|limit| {
let reg = program.alloc_register();
program.emit_insn(Insn::Integer {
@@ -49,6 +50,22 @@ pub fn emit_program_for_compound_select(
});
LimitCtx::new_shared(reg)
});
let offset_reg = offset.map(|offset| {
let reg = program.alloc_register();
program.emit_insn(Insn::Integer {
value: offset as i64,
dest: reg,
});
let combined_reg = program.alloc_register();
program.emit_insn(Insn::OffsetLimit {
offset_reg: reg,
combined_reg,
limit_reg: limit_ctx.unwrap().reg_limit,
});
reg
});
// When a compound SELECT is part of a query that yields results to a coroutine (e.g. within an INSERT clause),
// we must allocate registers for the result columns to be yielded. Each subselect will then yield to
@@ -67,6 +84,7 @@ pub fn emit_program_for_compound_select(
schema,
syms,
limit_ctx,
offset_reg,
yield_reg,
reg_result_cols_start,
)?;
@@ -80,12 +98,14 @@ pub fn emit_program_for_compound_select(
// Emits bytecode for a compound SELECT statement. This function processes the rightmost part of
// the compound SELECT and handles the left parts recursively based on the compound operator type.
#[allow(clippy::too_many_arguments)]
fn emit_compound_select(
program: &mut ProgramBuilder,
plan: Plan,
schema: &Schema,
syms: &SymbolTable,
limit_ctx: Option<LimitCtx>,
offset_reg: Option<usize>,
yield_reg: Option<usize>,
reg_result_cols_start: Option<usize>,
) -> crate::Result<()> {
@@ -130,6 +150,7 @@ fn emit_compound_select(
schema,
syms,
limit_ctx,
offset_reg,
yield_reg,
reg_result_cols_start,
)?;
@@ -144,6 +165,10 @@ fn emit_compound_select(
right_most.limit = limit;
right_most_ctx.limit_ctx = Some(limit_ctx);
}
if offset_reg.is_some() {
right_most.offset = offset;
right_most_ctx.reg_offset = offset_reg;
}
emit_query(program, &mut right_most, &mut right_most_ctx)?;
program.preassign_label_to_next_insn(label_next_select);
}
@@ -176,6 +201,7 @@ fn emit_compound_select(
schema,
syms,
None,
None,
yield_reg,
reg_result_cols_start,
)?;
@@ -193,6 +219,7 @@ fn emit_compound_select(
dedupe_index.0,
dedupe_index.1.as_ref(),
limit_ctx,
offset_reg,
yield_reg,
);
}
@@ -225,6 +252,7 @@ fn emit_compound_select(
schema,
syms,
None,
None,
yield_reg,
reg_result_cols_start,
)?;
@@ -244,6 +272,7 @@ fn emit_compound_select(
right_cursor_id,
target_cursor_id,
limit_ctx,
offset_reg,
yield_reg,
);
}
@@ -276,6 +305,7 @@ fn emit_compound_select(
schema,
syms,
None,
None,
yield_reg,
reg_result_cols_start,
)?;
@@ -287,7 +317,7 @@ fn emit_compound_select(
emit_query(program, &mut right_most, &mut right_most_ctx)?;
if new_index {
read_deduplicated_union_or_except_rows(
program, cursor_id, &index, limit_ctx, yield_reg,
program, cursor_id, &index, limit_ctx, offset_reg, yield_reg,
);
}
}
@@ -297,6 +327,10 @@ fn emit_compound_select(
right_most_ctx.limit_ctx = Some(limit_ctx);
right_most.limit = limit;
}
if offset_reg.is_some() {
right_most.offset = offset;
right_most_ctx.reg_offset = offset_reg;
}
emit_query(program, &mut right_most, &mut right_most_ctx)?;
}
}
@@ -351,6 +385,7 @@ fn read_deduplicated_union_or_except_rows(
dedupe_cursor_id: usize,
dedupe_index: &Index,
limit_ctx: Option<LimitCtx>,
offset_reg: Option<usize>,
yield_reg: Option<usize>,
) {
let label_close = program.allocate_label();
@@ -362,6 +397,13 @@ fn read_deduplicated_union_or_except_rows(
pc_if_empty: label_dedupe_next,
});
program.preassign_label_to_next_insn(label_dedupe_loop_start);
if let Some(reg) = offset_reg {
program.emit_insn(Insn::IfPos {
reg,
target_pc: label_dedupe_next,
decrement_by: 1,
});
}
for col_idx in 0..dedupe_index.columns.len() {
let start_reg = if let Some(yield_reg) = yield_reg {
// Need to reuse the yield_reg for the column being emitted
@@ -406,6 +448,7 @@ fn read_deduplicated_union_or_except_rows(
}
// Emits the bytecode for Reading rows from the intersection of two cursors.
#[allow(clippy::too_many_arguments)]
fn read_intersect_rows(
program: &mut ProgramBuilder,
left_cursor_id: usize,
@@ -413,6 +456,7 @@ fn read_intersect_rows(
right_cursor_id: usize,
target_cursor: Option<usize>,
limit_ctx: Option<LimitCtx>,
offset_reg: Option<usize>,
yield_reg: Option<usize>,
) {
let label_close = program.allocate_label();
@@ -435,6 +479,13 @@ fn read_intersect_rows(
record_reg: row_content_reg,
num_regs: 0,
});
if let Some(reg) = offset_reg {
program.emit_insn(Insn::IfPos {
reg,
target_pc: label_next,
decrement_by: 1,
});
}
let column_count = index.columns.len();
let cols_start_reg = if let Some(yield_reg) = yield_reg {
yield_reg + 1

View File

@@ -25,7 +25,7 @@ pub fn translate_delete(
// Let's disable altering a table with indices altogether instead of checking column by
// column to be extra safe.
crate::bail_parse_error!(
"DELETE for table with indexes is disabled by default. Run with `--experimental-indexes` to enable this feature."
"DELETE for table with indexes is disabled. Omit the `--experimental-indexes=false` flag to enable this feature."
);
}

View File

@@ -557,7 +557,7 @@ impl ToTokens for UpdatePlan {
.unwrap();
ast::Set {
col_names: ast::DistinctNames::single(ast::Name::from_str(col_name)),
col_names: ast::Names::single(ast::Name::from_str(col_name)),
expr: set_expr.clone(),
}
}),

View File

@@ -266,7 +266,7 @@ pub fn emit_query<'a>(
t_ctx: &mut TranslateCtx<'a>,
) -> Result<usize> {
if !plan.values.is_empty() {
let reg_result_cols_start = emit_values(program, plan, &t_ctx.resolver, t_ctx.limit_ctx)?;
let reg_result_cols_start = emit_values(program, plan, t_ctx)?;
return Ok(reg_result_cols_start);
}
@@ -825,7 +825,6 @@ fn emit_update_insns(
});
// Check if rowid was provided (through INTEGER PRIMARY KEY as a rowid alias)
let rowid_alias_index = table_ref.columns().iter().position(|c| c.is_rowid_alias);
let has_user_provided_rowid = if let Some(index) = rowid_alias_index {

View File

@@ -8,7 +8,7 @@ use super::plan::TableReferences;
use crate::function::JsonFunc;
use crate::function::{Func, FuncCtx, MathFuncArity, ScalarFunc, VectorFunc};
use crate::functions::datetime;
use crate::schema::{Affinity, Table, Type};
use crate::schema::{affinity, Affinity, Table, Type};
use crate::util::{exprs_are_equivalent, parse_numeric_literal};
use crate::vdbe::builder::CursorKey;
use crate::vdbe::{
@@ -141,6 +141,138 @@ macro_rules! expect_arguments_even {
}};
}
/// Core implementation of IN expression logic that can be used in both conditional and expression contexts.
/// This follows SQLite's approach where a single core function handles all InList cases.
///
/// This is extracted from the original conditional implementation to be reusable.
/// The logic exactly matches the original conditional InList implementation.
#[instrument(skip(program, referenced_tables, resolver), level = Level::DEBUG)]
fn translate_in_list(
program: &mut ProgramBuilder,
referenced_tables: Option<&TableReferences>,
lhs: &ast::Expr,
rhs: &Option<Vec<ast::Expr>>,
not: bool,
condition_metadata: ConditionMetadata,
resolver: &Resolver,
) -> Result<()> {
// lhs is e.g. a column reference
// rhs is an Option<Vec<Expr>>
// If rhs is None, it means the IN expression is always false, i.e. tbl.id IN ().
// If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3).
//
// The IN expression is equivalent to a series of OR expressions.
// For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`.
// The NOT IN expression is equivalent to a series of AND expressions.
// For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`.
//
// SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values.
// For now we don't have the plumbing to do that, so we'll just emit a series of comparisons,
// which is what SQLite also does for small lists of values.
// TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values.
if rhs.is_none() {
// If rhs is None, IN expressions are always false and NOT IN expressions are always true.
if not {
// On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through.
// This is because in a more complex condition we might need to evaluate the rest of the condition.
// Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect,
// but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
} else {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_false,
});
}
return Ok(());
}
// The left hand side only needs to be evaluated once we have a list of values to compare against.
let lhs_reg = program.alloc_register();
let _ = translate_expr(program, referenced_tables, lhs, lhs_reg, resolver)?;
let rhs = rhs.as_ref().unwrap();
// The difference between a local jump and an "upper level" jump is that for example in this case:
// WHERE foo IN (1,2,3) OR bar = 5,
// we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition.
// This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition.
// However, in this example:
// WHERE foo IN (1,2,3) AND bar = 5,
// we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later.
// This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition.
let jump_target_when_true = if condition_metadata.jump_if_condition_is_true {
condition_metadata.jump_target_when_true
} else {
program.allocate_label()
};
if !not {
// If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true.
for (i, expr) in rhs.iter().enumerate() {
let rhs_reg = program.alloc_register();
let last_condition = i == rhs.len() - 1;
let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?;
// If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true.
if !last_condition {
program.emit_insn(Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: jump_target_when_true,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
} else {
// If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match.
program.emit_insn(Insn::Ne {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: program.curr_collation(),
});
}
}
// If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
} else {
// If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true.
for expr in rhs.iter() {
let rhs_reg = program.alloc_register();
let _ = translate_expr(program, referenced_tables, expr, rhs_reg, resolver)?;
program.emit_insn(Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: program.curr_collation(),
});
}
// If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
}
if !condition_metadata.jump_if_condition_is_true {
program.preassign_label_to_next_insn(jump_target_when_true);
}
Ok(())
}
#[instrument(skip(program, referenced_tables, expr, resolver), level = Level::DEBUG)]
pub fn translate_condition_expr(
program: &mut ProgramBuilder,
@@ -219,121 +351,15 @@ pub fn translate_condition_expr(
emit_cond_jump(program, condition_metadata, reg);
}
ast::Expr::InList { lhs, not, rhs } => {
// lhs is e.g. a column reference
// rhs is an Option<Vec<Expr>>
// If rhs is None, it means the IN expression is always false, i.e. tbl.id IN ().
// If rhs is Some, it means the IN expression has a list of values to compare against, e.g. tbl.id IN (1, 2, 3).
//
// The IN expression is equivalent to a series of OR expressions.
// For example, `a IN (1, 2, 3)` is equivalent to `a = 1 OR a = 2 OR a = 3`.
// The NOT IN expression is equivalent to a series of AND expressions.
// For example, `a NOT IN (1, 2, 3)` is equivalent to `a != 1 AND a != 2 AND a != 3`.
//
// SQLite typically optimizes IN expressions to use a binary search on an ephemeral index if there are many values.
// For now we don't have the plumbing to do that, so we'll just emit a series of comparisons,
// which is what SQLite also does for small lists of values.
// TODO: Let's refactor this later to use a more efficient implementation conditionally based on the number of values.
if rhs.is_none() {
// If rhs is None, IN expressions are always false and NOT IN expressions are always true.
if *not {
// On a trivially true NOT IN () expression we can only jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'; otherwise me must fall through.
// This is because in a more complex condition we might need to evaluate the rest of the condition.
// Note that we are already breaking up our WHERE clauses into a series of terms at "AND" boundaries, so right now we won't be running into cases where jumping on true would be incorrect,
// but once we have e.g. parenthesization and more complex conditions, not having this 'if' here would introduce a bug.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
} else {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_false,
});
}
return Ok(());
}
// The left hand side only needs to be evaluated once we have a list of values to compare against.
let lhs_reg = program.alloc_register();
let _ = translate_expr(program, Some(referenced_tables), lhs, lhs_reg, resolver)?;
let rhs = rhs.as_ref().unwrap();
// The difference between a local jump and an "upper level" jump is that for example in this case:
// WHERE foo IN (1,2,3) OR bar = 5,
// we can immediately jump to the 'jump_target_when_true' label of the ENTIRE CONDITION if foo = 1, foo = 2, or foo = 3 without evaluating the bar = 5 condition.
// This is why in Binary-OR expressions we set jump_if_condition_is_true to true for the first condition.
// However, in this example:
// WHERE foo IN (1,2,3) AND bar = 5,
// we can't jump to the 'jump_target_when_true' label of the entire condition foo = 1, foo = 2, or foo = 3, because we still need to evaluate the bar = 5 condition later.
// This is why in that case we just jump over the rest of the IN conditions in this "local" branch which evaluates the IN condition.
let jump_target_when_true = if condition_metadata.jump_if_condition_is_true {
condition_metadata.jump_target_when_true
} else {
program.allocate_label()
};
if !*not {
// If it's an IN expression, we need to jump to the 'jump_target_when_true' label if any of the conditions are true.
for (i, expr) in rhs.iter().enumerate() {
let rhs_reg = program.alloc_register();
let last_condition = i == rhs.len() - 1;
let _ =
translate_expr(program, Some(referenced_tables), expr, rhs_reg, resolver)?;
// If this is not the last condition, we need to jump to the 'jump_target_when_true' label if the condition is true.
if !last_condition {
program.emit_insn(Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: jump_target_when_true,
flags: CmpInsFlags::default(),
collation: program.curr_collation(),
});
} else {
// If this is the last condition, we need to jump to the 'jump_target_when_false' label if there is no match.
program.emit_insn(Insn::Ne {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: program.curr_collation(),
});
}
}
// If we got here, then the last condition was a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
} else {
// If it's a NOT IN expression, we need to jump to the 'jump_target_when_false' label if any of the conditions are true.
for expr in rhs.iter() {
let rhs_reg = program.alloc_register();
let _ =
translate_expr(program, Some(referenced_tables), expr, rhs_reg, resolver)?;
program.emit_insn(Insn::Eq {
lhs: lhs_reg,
rhs: rhs_reg,
target_pc: condition_metadata.jump_target_when_false,
flags: CmpInsFlags::default().jump_if_null(),
collation: program.curr_collation(),
});
}
// If we got here, then none of the conditions were a match, so we jump to the 'jump_target_when_true' label if 'jump_if_condition_is_true'.
// If not, we can just fall through without emitting an unnecessary instruction.
if condition_metadata.jump_if_condition_is_true {
program.emit_insn(Insn::Goto {
target_pc: condition_metadata.jump_target_when_true,
});
}
}
if !condition_metadata.jump_if_condition_is_true {
program.preassign_label_to_next_insn(jump_target_when_true);
}
translate_in_list(
program,
Some(referenced_tables),
lhs,
rhs,
*not,
condition_metadata,
resolver,
)?;
}
ast::Expr::Like { not, .. } => {
let cur_reg = program.alloc_register();
@@ -651,24 +677,11 @@ pub fn translate_expr(
}
ast::Expr::Cast { expr, type_name } => {
let type_name = type_name.as_ref().unwrap(); // TODO: why is this optional?
let reg_expr = program.alloc_registers(2);
translate_expr(program, referenced_tables, expr, reg_expr, resolver)?;
program.emit_insn(Insn::String8 {
// we make a comparison against uppercase static strs in the affinity() function,
// so we need to make sure we're comparing against the uppercase version,
// and it's better to do this once instead of every time we check affinity
value: type_name.name.to_uppercase(),
dest: reg_expr + 1,
});
program.mark_last_insn_constant();
program.emit_insn(Insn::Function {
constant_mask: 0,
start_reg: reg_expr,
dest: target_register,
func: FuncCtx {
func: Func::Scalar(ScalarFunc::Cast),
arg_count: 2,
},
translate_expr(program, referenced_tables, expr, target_register, resolver)?;
let type_affinity = affinity(&type_name.name.to_uppercase());
program.emit_insn(Insn::Cast {
reg: target_register,
affinity: type_affinity,
});
Ok(target_register)
}
@@ -2060,7 +2073,61 @@ pub fn translate_expr(
}
Ok(target_register)
}
ast::Expr::InList { .. } => todo!(),
ast::Expr::InList { lhs, rhs, not } => {
// Following SQLite's approach: use the same core logic as conditional InList,
// but wrap it with appropriate expression context handling
let result_reg = target_register;
// Set result to NULL initially (matches SQLite behavior)
program.emit_insn(Insn::Null {
dest: result_reg,
dest_end: None,
});
let dest_if_false = program.allocate_label();
let label_integer_conversion = program.allocate_label();
// Call the core InList logic with expression-appropriate condition metadata
translate_in_list(
program,
referenced_tables,
lhs,
rhs,
*not,
ConditionMetadata {
jump_if_condition_is_true: false,
jump_target_when_true: label_integer_conversion, // will be resolved below
jump_target_when_false: dest_if_false,
},
resolver,
)?;
// condition true: set result to 1
program.emit_insn(Insn::Integer {
value: 1,
dest: result_reg,
});
program.emit_insn(Insn::Goto {
target_pc: label_integer_conversion,
});
// False path: set result to 0
program.resolve_label(dest_if_false, program.offset());
program.emit_insn(Insn::Integer {
value: 0,
dest: result_reg,
});
program.resolve_label(label_integer_conversion, program.offset());
// Force integer conversion with AddImm 0
program.emit_insn(Insn::AddImm {
register: result_reg,
value: 0,
});
Ok(result_reg)
}
ast::Expr::InSelect { .. } => todo!(),
ast::Expr::InTable { .. } => todo!(),
ast::Expr::IsNull(expr) => {

View File

@@ -67,7 +67,7 @@ pub fn translate_insert(
// Let's disable altering a table with indices altogether instead of checking column by
// column to be extra safe.
crate::bail_parse_error!(
"INSERT to table with indexes is disabled by default. Run with `--experimental-indexes` to enable this feature."
"INSERT to table with indexes is disabled. Omit the `--experimental-indexes=false` flag to enable this feature."
);
}
let table_name = &tbl_name.name;

View File

@@ -113,7 +113,7 @@ pub fn emit_order_by(
});
program.preassign_label_to_next_insn(sort_loop_start_label);
emit_offset(program, plan, sort_loop_next_label, t_ctx.reg_offset)?;
emit_offset(program, plan, sort_loop_next_label, t_ctx.reg_offset);
program.emit_insn(Insn::SorterData {
cursor_id: sort_cursor,

View File

@@ -10,18 +10,17 @@ use turso_sqlite3_parser::ast::{PragmaName, QualifiedName};
use crate::pragma::pragma_for;
use crate::schema::Schema;
use crate::storage::pager::AutoVacuumMode;
use crate::storage::sqlite3_ondisk::{DatabaseEncoding, MIN_PAGE_CACHE_SIZE};
use crate::storage::sqlite3_ondisk::CacheSize;
use crate::storage::wal::CheckpointMode;
use crate::translate::schema::translate_create_table;
use crate::util::{normalize_ident, parse_signed_number, parse_string};
use crate::util::{normalize_ident, parse_signed_number, parse_string, IOExt as _};
use crate::vdbe::builder::{ProgramBuilder, ProgramBuilderOpts};
use crate::vdbe::insn::{Cookie, Insn};
use crate::{bail_parse_error, storage, CaptureDataChangesMode, LimboError, Value};
use crate::{bail_parse_error, CaptureDataChangesMode, LimboError, Value};
use std::str::FromStr;
use strum::IntoEnumIterator;
use super::integrity_check::translate_integrity_check;
use crate::storage::header_accessor;
use crate::storage::pager::Pager;
use crate::translate::emitter::TransactionMode;
@@ -311,15 +310,12 @@ fn query_pragma(
Ok((program, TransactionMode::None))
}
PragmaName::Encoding => {
let encoding: &str = if !pager.db_state.is_initialized() {
DatabaseEncoding::Utf8
} else {
let encoding: DatabaseEncoding =
header_accessor::get_text_encoding(&pager)?.try_into()?;
encoding
}
.into();
program.emit_string8(encoding.into(), register);
let encoding = pager
.io
.block(|| pager.with_header(|header| header.text_encoding))
.unwrap_or_default()
.to_string();
program.emit_string8(encoding, register);
program.emit_result_row(register, 1);
program.add_pragma_result_column(pragma.to_string());
Ok((program, TransactionMode::None))
@@ -433,7 +429,10 @@ fn query_pragma(
}
PragmaName::PageSize => {
program.emit_int(
header_accessor::get_page_size(&pager).unwrap_or(connection.get_page_size()) as i64,
pager
.io
.block(|| pager.with_header(|header| header.page_size.get()))
.unwrap_or(connection.get_page_size()) as i64,
register,
);
program.emit_result_row(register, 1);
@@ -484,7 +483,11 @@ fn update_auto_vacuum_mode(
largest_root_page_number: u32,
pager: Rc<Pager>,
) -> crate::Result<()> {
header_accessor::set_vacuum_mode_largest_root_page(&pager, largest_root_page_number)?;
pager.io.block(|| {
pager.with_header_mut(|header| {
header.vacuum_mode_largest_root_page = largest_root_page_number.into()
})
})?;
pager.set_auto_vacuum_mode(auto_vacuum_mode);
Ok(())
}
@@ -498,8 +501,11 @@ fn update_cache_size(
let mut cache_size = if cache_size_unformatted < 0 {
let kb = cache_size_unformatted.abs().saturating_mul(1024);
let page_size = header_accessor::get_page_size(&pager)
.unwrap_or(storage::sqlite3_ondisk::DEFAULT_PAGE_SIZE) as i64;
let page_size = pager
.io
.block(|| pager.with_header(|header| header.page_size))
.unwrap_or_default()
.get() as i64;
if page_size == 0 {
return Err(LimboError::InternalError(
"Page size cannot be zero".to_string(),
@@ -510,10 +516,7 @@ fn update_cache_size(
value
};
// SQLite uses this value as threshold for maximum cache size
const MAX_SAFE_CACHE_SIZE: i64 = 2147450880;
if cache_size > MAX_SAFE_CACHE_SIZE {
if cache_size > CacheSize::MAX_SAFE {
cache_size = 0;
cache_size_unformatted = 0;
}
@@ -523,19 +526,17 @@ fn update_cache_size(
cache_size_unformatted = 0;
}
let cache_size_usize = cache_size as usize;
let final_cache_size = if cache_size_usize < MIN_PAGE_CACHE_SIZE {
cache_size_unformatted = MIN_PAGE_CACHE_SIZE as i64;
MIN_PAGE_CACHE_SIZE
let final_cache_size = if cache_size < CacheSize::MIN {
cache_size_unformatted = CacheSize::MIN;
CacheSize::MIN
} else {
cache_size_usize
cache_size
};
connection.set_cache_size(cache_size_unformatted as i32);
pager
.change_page_cache_size(final_cache_size)
.change_page_cache_size(final_cache_size as usize)
.map_err(|e| LimboError::InternalError(format!("Failed to update page cache size: {e}")))?;
Ok(())

View File

@@ -30,7 +30,7 @@ pub fn emit_select_result(
limit_ctx: Option<LimitCtx>,
) -> Result<()> {
if let (Some(jump_to), Some(_)) = (offset_jump_to, label_on_limit_reached) {
emit_offset(program, plan, jump_to, reg_offset)?;
emit_offset(program, plan, jump_to, reg_offset);
}
let start_reg = reg_result_cols_start;
@@ -163,7 +163,7 @@ pub fn emit_offset(
plan: &SelectPlan,
jump_to: BranchOffset,
reg_offset: Option<usize>,
) -> Result<()> {
) {
match plan.offset {
Some(offset) if offset > 0 => {
program.add_comment(program.offset(), "OFFSET");
@@ -175,5 +175,4 @@ pub fn emit_offset(
}
_ => {}
}
Ok(())
}

View File

@@ -626,7 +626,7 @@ pub fn translate_drop_table(
) -> Result<ProgramBuilder> {
if !schema.indexes_enabled() && schema.table_has_indexes(&tbl_name.name.to_string()) {
bail_parse_error!(
"DROP TABLE with indexes on the table is disabled by default. Run with `--experimental-indexes` to enable this feature."
"DROP TABLE with indexes on the table is disabled by default. Omit the `--experimental-indexes=false` flag to enable this feature."
);
}
let opts = ProgramBuilderOpts {

View File

@@ -154,10 +154,6 @@ pub fn prepare_select_plan(
}
let (limit, offset) = select.limit.map_or(Ok((None, None)), |l| parse_limit(&l))?;
// FIXME: handle OFFSET for compound selects
if offset.is_some_and(|o| o > 0) {
crate::bail_parse_error!("OFFSET is not supported for compound SELECTs yet");
}
// FIXME: handle ORDER BY for compound selects
if select.order_by.is_some() {
crate::bail_parse_error!("ORDER BY is not supported for compound SELECTs yet");

View File

@@ -110,7 +110,7 @@ pub fn prepare_update_plan(
// Let's disable altering a table with indices altogether instead of checking column by
// column to be extra safe.
bail_parse_error!(
"UPDATE table disabled for table with indexes is disabled by default. Run with `--experimental-indexes` to enable this feature."
"UPDATE table disabled for table with indexes is disabled. Omit the `--experimental-indexes=false` flag to enable this feature."
);
}
let table = match schema.get_table(table_name.as_str()) {
@@ -156,18 +156,38 @@ pub fn prepare_update_plan(
.collect();
let mut set_clauses = Vec::with_capacity(body.sets.len());
for set in &mut body.sets {
let ident = normalize_ident(set.col_names[0].as_str());
let Some(col_index) = column_lookup.get(&ident) else {
bail_parse_error!("no such column: {}", ident);
};
// Process each SET assignment and map column names to expressions
// e.g the statement `SET x = 1, y = 2, z = 3` has 3 set assigments
for set in &mut body.sets {
bind_column_references(&mut set.expr, &mut table_references, None, connection)?;
if let Some(idx) = set_clauses.iter().position(|(idx, _)| *idx == *col_index) {
set_clauses[idx].1 = set.expr.clone();
} else {
set_clauses.push((*col_index, set.expr.clone()));
let values = match &set.expr {
Expr::Parenthesized(vals) => vals.clone(),
expr => vec![expr.clone()],
};
if set.col_names.len() != values.len() {
bail_parse_error!(
"{} columns assigned {} values",
set.col_names.len(),
values.len()
);
}
// Map each column to its corresponding expression
for (col_name, expr) in set.col_names.iter().zip(values.iter()) {
let ident = normalize_ident(col_name.as_str());
let col_index = match column_lookup.get(&ident) {
Some(idx) => idx,
None => bail_parse_error!("no such column: {}", ident),
};
// Update existing entry or add new one
match set_clauses.iter_mut().find(|(idx, _)| idx == col_index) {
Some((_, existing_expr)) => *existing_expr = expr.clone(),
None => set_clauses.push((*col_index, expr.clone())),
}
}
}

View File

@@ -1,6 +1,7 @@
use crate::translate::emitter::{LimitCtx, Resolver};
use crate::translate::emitter::{Resolver, TranslateCtx};
use crate::translate::expr::{translate_expr_no_constant_opt, NoConstantOptReason};
use crate::translate::plan::{QueryDestination, SelectPlan};
use crate::translate::result_row::emit_offset;
use crate::vdbe::builder::ProgramBuilder;
use crate::vdbe::insn::{IdxInsertFlags, Insn};
use crate::vdbe::BranchOffset;
@@ -9,22 +10,19 @@ use crate::Result;
pub fn emit_values(
program: &mut ProgramBuilder,
plan: &SelectPlan,
resolver: &Resolver,
limit_ctx: Option<LimitCtx>,
t_ctx: &TranslateCtx,
) -> Result<usize> {
if plan.values.len() == 1 {
let start_reg = emit_values_when_single_row(program, plan, resolver, limit_ctx)?;
let start_reg = emit_values_when_single_row(program, plan, t_ctx)?;
return Ok(start_reg);
}
let reg_result_cols_start = match plan.query_destination {
QueryDestination::ResultRows => emit_toplevel_values(program, plan, resolver, limit_ctx)?,
QueryDestination::ResultRows => emit_toplevel_values(program, plan, t_ctx)?,
QueryDestination::CoroutineYield { yield_reg, .. } => {
emit_values_in_subquery(program, plan, resolver, yield_reg)?
}
QueryDestination::EphemeralIndex { .. } => {
emit_toplevel_values(program, plan, resolver, limit_ctx)?
emit_values_in_subquery(program, plan, &t_ctx.resolver, yield_reg)?
}
QueryDestination::EphemeralIndex { .. } => emit_toplevel_values(program, plan, t_ctx)?,
QueryDestination::EphemeralTable { .. } => unreachable!(),
};
Ok(reg_result_cols_start)
@@ -33,9 +31,10 @@ pub fn emit_values(
fn emit_values_when_single_row(
program: &mut ProgramBuilder,
plan: &SelectPlan,
resolver: &Resolver,
limit_ctx: Option<LimitCtx>,
t_ctx: &TranslateCtx,
) -> Result<usize> {
let end_label = program.allocate_label();
emit_offset(program, plan, end_label, t_ctx.reg_offset);
let first_row = &plan.values[0];
let row_len = first_row.len();
let start_reg = program.alloc_registers(row_len);
@@ -45,12 +44,11 @@ fn emit_values_when_single_row(
None,
v,
start_reg + i,
resolver,
&t_ctx.resolver,
NoConstantOptReason::RegisterReuse,
)?;
}
let end_label = program.allocate_label();
emit_values_to_destination(program, plan, start_reg, row_len, limit_ctx, end_label);
emit_values_to_destination(program, plan, t_ctx, start_reg, row_len, end_label);
program.preassign_label_to_next_insn(end_label);
Ok(start_reg)
}
@@ -58,8 +56,7 @@ fn emit_values_when_single_row(
fn emit_toplevel_values(
program: &mut ProgramBuilder,
plan: &SelectPlan,
resolver: &Resolver,
limit_ctx: Option<LimitCtx>,
t_ctx: &TranslateCtx,
) -> Result<usize> {
let yield_reg = program.alloc_register();
let definition_label = program.allocate_label();
@@ -71,7 +68,7 @@ fn emit_toplevel_values(
});
program.preassign_label_to_next_insn(start_offset_label);
let start_reg = emit_values_in_subquery(program, plan, resolver, yield_reg)?;
let start_reg = emit_values_in_subquery(program, plan, &t_ctx.resolver, yield_reg)?;
program.emit_insn(Insn::EndCoroutine { yield_reg });
program.preassign_label_to_next_insn(definition_label);
@@ -82,12 +79,15 @@ fn emit_toplevel_values(
start_offset: start_offset_label,
});
let end_label = program.allocate_label();
let goto_label = program.allocate_label();
program.preassign_label_to_next_insn(goto_label);
let yield_label = program.allocate_label();
program.preassign_label_to_next_insn(yield_label);
program.emit_insn(Insn::Yield {
yield_reg,
end_offset: end_label,
});
let goto_label = program.allocate_label();
emit_offset(program, plan, goto_label, t_ctx.reg_offset);
let row_len = plan.values[0].len();
let copy_start_reg = program.alloc_registers(row_len);
for i in 0..row_len {
@@ -98,10 +98,11 @@ fn emit_toplevel_values(
});
}
emit_values_to_destination(program, plan, copy_start_reg, row_len, limit_ctx, end_label);
emit_values_to_destination(program, plan, t_ctx, copy_start_reg, row_len, end_label);
program.preassign_label_to_next_insn(goto_label);
program.emit_insn(Insn::Goto {
target_pc: goto_label,
target_pc: yield_label,
});
program.preassign_label_to_next_insn(end_label);
@@ -139,9 +140,9 @@ fn emit_values_in_subquery(
fn emit_values_to_destination(
program: &mut ProgramBuilder,
plan: &SelectPlan,
t_ctx: &TranslateCtx,
start_reg: usize,
row_len: usize,
limit_ctx: Option<LimitCtx>,
end_label: BranchOffset,
) {
match &plan.query_destination {
@@ -150,7 +151,7 @@ fn emit_values_to_destination(
start_reg,
count: row_len,
});
if let Some(limit_ctx) = limit_ctx {
if let Some(limit_ctx) = t_ctx.limit_ctx {
program.emit_insn(Insn::DecrJumpZero {
reg: limit_ctx.reg_limit,
target_pc: end_label,

View File

@@ -1156,6 +1156,12 @@ impl ImmutableRecord {
Err(_) => None,
}
}
pub fn column_count(&self) -> usize {
let mut cursor = RecordCursor::new();
cursor.parse_full_header(self).unwrap();
cursor.offsets.len()
}
}
/// A cursor for lazily parsing SQLite record format data.

View File

@@ -1,5 +1,4 @@
#![allow(unused)]
use crate::storage::header_accessor::get_schema_cookie;
use crate::translate::expr::WalkControl;
use crate::types::IOResult;
use crate::{

File diff suppressed because it is too large Load Diff

View File

@@ -207,7 +207,7 @@ pub fn insn_to_str(
"IfPos",
*reg as i32,
target_pc.as_debug_int(),
0,
*decrement_by as i32,
Value::build_text(""),
0,
format!(
@@ -1358,6 +1358,15 @@ pub fn insn_to_str(
0,
format!("r[{dest}]=r[{lhs}] << r[{rhs}]"),
),
Insn::AddImm { register, value } => (
"AddImm",
*register as i32,
*value as i32,
0,
Value::build_text(""),
0,
format!("r[{register}]=r[{register}]+{value}"),
),
Insn::Variable { index, dest } => (
"Variable",
usize::from(*index) as i32,
@@ -1609,6 +1618,15 @@ pub fn insn_to_str(
0,
format!("r[{}] = data", *dest),
),
Insn::Cast { reg, affinity } => (
"Cast",
*reg as i32,
0,
0,
Value::build_text(""),
0,
format!("affinity(r[{}]={:?})", *reg, affinity),
),
};
format!(
"{:<4} {:<17} {:<4} {:<4} {:<4} {:<13} {:<2} {}",

View File

@@ -706,6 +706,12 @@ pub enum Insn {
func: FuncCtx, // P4
},
/// Cast register P1 to affinity P2 and store in register P1
Cast {
reg: usize,
affinity: Affinity,
},
InitCoroutine {
yield_reg: usize,
jump_on_definition: BranchOffset,
@@ -871,6 +877,14 @@ pub enum Insn {
dest: usize,
},
/// Add immediate value to register and force integer conversion.
/// Add the constant P2 to the value in register P1. The result is always an integer.
/// To force any register to be an integer, just add 0.
AddImm {
register: usize, // P1: target register
value: i64, // P2: immediate value to add
},
/// Get parameter variable.
Variable {
index: NonZero<usize>,
@@ -1075,6 +1089,7 @@ impl Insn {
Insn::SorterData { .. } => execute::op_sorter_data,
Insn::SorterNext { .. } => execute::op_sorter_next,
Insn::Function { .. } => execute::op_function,
Insn::Cast { .. } => execute::op_cast,
Insn::InitCoroutine { .. } => execute::op_init_coroutine,
Insn::EndCoroutine { .. } => execute::op_end_coroutine,
Insn::Yield { .. } => execute::op_yield,
@@ -1099,6 +1114,7 @@ impl Insn {
Insn::ParseSchema { .. } => execute::op_parse_schema,
Insn::ShiftRight { .. } => execute::op_shift_right,
Insn::ShiftLeft { .. } => execute::op_shift_left,
Insn::AddImm { .. } => execute::op_add_imm,
Insn::Variable { .. } => execute::op_variable,
Insn::ZeroOrNull { .. } => execute::op_zero_or_null,
Insn::Not { .. } => execute::op_not,

View File

@@ -27,6 +27,7 @@ pub mod sorter;
use crate::{
error::LimboError,
function::{AggFunc, FuncCtx},
state_machine::StateTransition,
storage::sqlite3_ondisk::SmallVec,
translate::plan::TableReferences,
types::{IOResult, RawSlice, TextRef},
@@ -390,7 +391,7 @@ impl Program {
pub fn step(
&self,
state: &mut ProgramState,
mv_store: Option<Rc<MvStore>>,
mv_store: Option<Arc<MvStore>>,
pager: Rc<Pager>,
) -> Result<StepResult> {
loop {
@@ -432,16 +433,22 @@ impl Program {
&self,
pager: Rc<Pager>,
program_state: &mut ProgramState,
mv_store: Option<&Rc<MvStore>>,
mv_store: Option<&Arc<MvStore>>,
rollback: bool,
) -> Result<StepResult> {
if let Some(mv_store) = mv_store {
let conn = self.connection.clone();
let auto_commit = conn.auto_commit.get();
if auto_commit {
// FIXME: we don't want to commit stuff from other programs.
let mut mv_transactions = conn.mv_transactions.borrow_mut();
for tx_id in mv_transactions.iter() {
mv_store.commit_tx(*tx_id).unwrap();
let mut state_machine =
mv_store.commit_tx(*tx_id, pager.clone(), &conn).unwrap();
state_machine
.step(mv_store)
.map_err(|e| LimboError::InternalError(e.to_string()))?;
assert!(state_machine.is_finalized());
}
mv_transactions.clear();
}
@@ -757,7 +764,10 @@ pub fn handle_program_error(
err: &LimboError,
) -> Result<()> {
match err {
// Transaction errors, e.g. trying to start a nested transaction, do not cause a rollback.
LimboError::TxError(_) => {}
// Table locked errors, e.g. trying to checkpoint in an interactive transaction, do not cause a rollback.
LimboError::TableLocked => {}
_ => {
let state = connection.transaction_state.get();
if let TransactionState::Write { schema_did_change } = state {

View File

@@ -402,7 +402,7 @@ impl SortedChunk {
read_buffer_ref,
read_complete,
)));
let c = self.file.pread(self.total_bytes_read.get(), c)?;
let _c = self.file.pread(self.total_bytes_read.get(), c)?;
Ok(())
}
@@ -448,7 +448,7 @@ impl SortedChunk {
});
let c = Completion::new(CompletionType::Write(WriteCompletion::new(write_complete)));
let c = self.file.pwrite(0, buffer_ref, c)?;
let _c = self.file.pwrite(0, buffer_ref, c)?;
Ok(())
}
}

View File

@@ -25,11 +25,59 @@ pub struct Vector {
}
impl Vector {
/// # Safety
///
/// This method is used to reinterpret the underlying `Vec<u8>` data
/// as a `&[f32]` slice. This is only valid if:
/// - The buffer is correctly aligned for `f32`
/// - The length of the buffer is exactly `dims * size_of::<f32>()`
pub fn as_f32_slice(&self) -> &[f32] {
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const f32, self.dims) }
if self.dims == 0 {
return &[];
}
assert_eq!(
self.data.len(),
self.dims * std::mem::size_of::<f32>(),
"data length must equal dims * size_of::<f32>()"
);
let ptr = self.data.as_ptr();
let align = std::mem::align_of::<f32>();
assert_eq!(
ptr.align_offset(align),
0,
"data pointer must be aligned to {align} bytes for f32 access"
);
unsafe { std::slice::from_raw_parts(ptr as *const f32, self.dims) }
}
/// # Safety
///
/// This method is used to reinterpret the underlying `Vec<u8>` data
/// as a `&[f64]` slice. This is only valid if:
/// - The buffer is correctly aligned for `f64`
/// - The length of the buffer is exactly `dims * size_of::<f64>()`
pub fn as_f64_slice(&self) -> &[f64] {
if self.dims == 0 {
return &[];
}
assert_eq!(
self.data.len(),
self.dims * std::mem::size_of::<f64>(),
"data length must equal dims * size_of::<f64>()"
);
let ptr = self.data.as_ptr();
let align = std::mem::align_of::<f64>();
assert_eq!(
ptr.align_offset(align),
0,
"data pointer must be aligned to {align} bytes for f64 access"
);
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const f64, self.dims) }
}
}
@@ -281,11 +329,6 @@ pub fn vector_f64_distance_cos(v1: &Vector, v2: &Vector) -> Result<f64> {
}
pub fn vector_type(blob: &[u8]) -> Result<VectorType> {
if blob.is_empty() {
return Err(LimboError::ConversionError(
"Invalid vector value".to_string(),
));
}
// Even-sized blobs are always float32.
if blob.len() % 2 == 0 {
return Ok(VectorType::Float32);
@@ -706,6 +749,7 @@ mod tests {
let v2 = float32_vec_from(&[]);
let result = vector_concat(&v1, &v2).unwrap();
assert_eq!(result.dims, 0);
assert_eq!(f32_slice_from_vector(&result), Vec::<f32>::new());
}
#[test]

View File

@@ -0,0 +1,151 @@
# JavaScript API reference
This document describes the JavaScript API for Turso. The API is implemented in two different packages:
- **`bindings/javascript`**: Native bindings for the Turso database.
- **`packages/turso-serverless`**: Serverless driver for Turso Cloud databases.
The API is compatible with the libSQL promise API, which is an asynchronous variant of the `better-sqlite3` API.
## class Database
The `Database` class represents a connection that can prepare and execute SQL statements.
### Methods
#### new Database(path, [options]) ⇒ Database
Creates a new database connection.
| Param | Type | Description |
| ------- | ------------------- | ------------------------- |
| path | <code>string</code> | Path to the database file |
The `path` parameter points to the SQLite database file to open. If the file pointed to by `path` does not exists, it will be created.
To open an in-memory database, please pass `:memory:` as the `path` parameter.
The function returns a `Database` object.
#### prepare(sql) ⇒ Statement
Prepares a SQL statement for execution.
| Param | Type | Description |
| ------ | ------------------- | ------------------------------------ |
| sql | <code>string</code> | The SQL statement string to prepare. |
The function returns a `Statement` object.
#### transaction(function) ⇒ function
This function is currently not supported.
#### pragma(string, [options]) ⇒ results
This function is currently not supported.
#### backup(destination, [options]) ⇒ promise
This function is currently not supported.
#### serialize([options]) ⇒ Buffer
This function is currently not supported.
#### function(name, [options], function) ⇒ this
This function is currently not supported.
#### aggregate(name, options) ⇒ this
This function is currently not supported.
#### table(name, definition) ⇒ this
This function is currently not supported.
#### authorizer(rules) ⇒ this
This function is currently not supported.
#### loadExtension(path, [entryPoint]) ⇒ this
This function is currently not supported.
#### exec(sql) ⇒ this
Executes a SQL statement.
| Param | Type | Description |
| ------ | ------------------- | ------------------------------------ |
| sql | <code>string</code> | The SQL statement string to execute. |
#### interrupt() ⇒ this
This function is currently not supported.
#### close() ⇒ this
Closes the database connection.
## class Statement
### Methods
#### run([...bindParameters]) ⇒ object
Executes the SQL statement and returns an info object.
| Param | Type | Description |
| -------------- | ----------------------------- | ------------------------------------------------ |
| bindParameters | <code>array of objects</code> | The bind parameters for executing the statement. |
The returned info object contains two properties: `changes` that describes the number of modified rows and `info.lastInsertRowid` that represents the `rowid` of the last inserted row.
#### get([...bindParameters]) ⇒ row
Executes the SQL statement and returns the first row.
| Param | Type | Description |
| -------------- | ----------------------------- | ------------------------------------------------ |
| bindParameters | <code>array of objects</code> | The bind parameters for executing the statement. |
### all([...bindParameters]) ⇒ array of rows
Executes the SQL statement and returns an array of the resulting rows.
| Param | Type | Description |
| -------------- | ----------------------------- | ------------------------------------------------ |
| bindParameters | <code>array of objects</code> | The bind parameters for executing the statement. |
### iterate([...bindParameters]) ⇒ iterator
Executes the SQL statement and returns an iterator to the resulting rows.
| Param | Type | Description |
| -------------- | ----------------------------- | ------------------------------------------------ |
| bindParameters | <code>array of objects</code> | The bind parameters for executing the statement. |
#### pluck([toggleState]) ⇒ this
This function is currently not supported.
#### expand([toggleState]) ⇒ this
This function is currently not supported.
#### raw([rawMode]) ⇒ this
This function is currently not supported.
#### timed([toggle]) ⇒ this
This function is currently not supported.
#### columns() ⇒ array of objects
This function is currently not supported.
#### bind([...bindParameters]) ⇒ this
This function is currently not supported.

View File

@@ -331,6 +331,10 @@ Installing the WebAssembly package:
npm i @tursodatabase/turso --cpu wasm32
```
### API reference
See [JavaScript API reference](docs/javascript-api-reference.md) for more information.
### Getting Started
To use Turso from JavaScript application, you need to import `Database` type from the `@tursodatabase/turso` package.

View File

@@ -1,12 +1,12 @@
{
"name": "@tursodatabase/serverless",
"version": "0.1.1",
"version": "0.1.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@tursodatabase/serverless",
"version": "0.1.1",
"version": "0.1.2",
"license": "MIT",
"devDependencies": {
"@types/node": "^24.0.13",

View File

@@ -1,6 +1,6 @@
{
"name": "@tursodatabase/serverless",
"version": "0.1.1",
"version": "0.1.2",
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@@ -307,6 +307,11 @@ class LibSQLClient implements Client {
close(): void {
this._closed = true;
// Note: The libSQL client interface expects synchronous close,
// but our underlying session needs async close. We'll fire and forget.
this.session.close().catch(error => {
console.error('Error closing session:', error);
});
}
}

View File

@@ -90,6 +90,15 @@ export class Connection {
const sql = `PRAGMA ${pragma}`;
return this.session.execute(sql);
}
/**
* Close the connection.
*
* This sends a close request to the server to properly clean up the stream.
*/
async close(): Promise<void> {
await this.session.close();
}
}
/**

View File

@@ -18,12 +18,17 @@ export interface ExecuteResult {
last_insert_rowid?: string;
}
export interface NamedArg {
name: string;
value: Value;
}
export interface ExecuteRequest {
type: 'execute';
stmt: {
sql: string;
args: Value[];
named_args: Value[];
named_args: NamedArg[];
want_rows: boolean;
};
}
@@ -32,6 +37,7 @@ export interface BatchStep {
stmt: {
sql: string;
args: Value[];
named_args?: NamedArg[];
want_rows: boolean;
};
condition?: {
@@ -52,9 +58,13 @@ export interface SequenceRequest {
sql: string;
}
export interface CloseRequest {
type: 'close';
}
export interface PipelineRequest {
baton: string | null;
requests: (ExecuteRequest | BatchRequest | SequenceRequest)[];
requests: (ExecuteRequest | BatchRequest | SequenceRequest | CloseRequest)[];
}
export interface PipelineResponse {
@@ -63,7 +73,7 @@ export interface PipelineResponse {
results: Array<{
type: 'ok' | 'error';
response?: {
type: 'execute' | 'batch' | 'sequence';
type: 'execute' | 'batch' | 'sequence' | 'close';
result?: ExecuteResult;
};
error?: {
@@ -182,52 +192,71 @@ export async function executeCursor(
const decoder = new TextDecoder();
let buffer = '';
let isFirstLine = true;
let cursorResponse: CursorResponse;
let cursorResponse: CursorResponse | undefined;
// First, read until we get the cursor response (first line)
while (!cursorResponse) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const newlineIndex = buffer.indexOf('\n');
if (newlineIndex !== -1) {
const line = buffer.slice(0, newlineIndex).trim();
buffer = buffer.slice(newlineIndex + 1);
if (line) {
cursorResponse = JSON.parse(line);
break;
}
}
}
if (!cursorResponse) {
throw new DatabaseError('No cursor response received');
}
async function* parseEntries(): AsyncGenerator<CursorEntry> {
try {
// Process any remaining data in the buffer
let newlineIndex;
while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, newlineIndex).trim();
buffer = buffer.slice(newlineIndex + 1);
if (line) {
yield JSON.parse(line) as CursorEntry;
}
}
// Continue reading from the stream
while (true) {
const { done, value } = await reader!.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
let newlineIndex;
while ((newlineIndex = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, newlineIndex).trim();
buffer = buffer.slice(newlineIndex + 1);
if (line) {
if (isFirstLine) {
cursorResponse = JSON.parse(line);
isFirstLine = false;
} else {
yield JSON.parse(line) as CursorEntry;
}
yield JSON.parse(line) as CursorEntry;
}
}
}
// Process any remaining data in the buffer
if (buffer.trim()) {
yield JSON.parse(buffer.trim()) as CursorEntry;
}
} finally {
reader!.releaseLock();
}
}
const entries = parseEntries();
// Get the first entry to parse the cursor response
const firstEntry = await entries.next();
if (!firstEntry.done) {
// Put the first entry back
const generator = (async function* () {
yield firstEntry.value;
yield* entries;
})();
return { response: cursorResponse!, entries: generator };
}
return { response: cursorResponse!, entries };
return { response: cursorResponse, entries: parseEntries() };
}
export async function executePipeline(

View File

@@ -7,7 +7,10 @@ import {
type CursorResponse,
type CursorEntry,
type PipelineRequest,
type SequenceRequest
type SequenceRequest,
type CloseRequest,
type NamedArg,
type Value
} from './protocol.js';
import { DatabaseError } from './error.js';
@@ -49,10 +52,10 @@ export class Session {
* Execute a SQL statement and return all results.
*
* @param sql - The SQL statement to execute
* @param args - Optional array of parameter values
* @param args - Optional array of parameter values or object with named parameters
* @returns Promise resolving to the complete result set
*/
async execute(sql: string, args: any[] = []): Promise<any> {
async execute(sql: string, args: any[] | Record<string, any> = []): Promise<any> {
const { response, entries } = await this.executeRaw(sql, args);
const result = await this.processCursorEntries(entries);
return result;
@@ -62,17 +65,56 @@ export class Session {
* Execute a SQL statement and return the raw response and entries.
*
* @param sql - The SQL statement to execute
* @param args - Optional array of parameter values
* @param args - Optional array of parameter values or object with named parameters
* @returns Promise resolving to the raw response and cursor entries
*/
async executeRaw(sql: string, args: any[] = []): Promise<{ response: CursorResponse; entries: AsyncGenerator<CursorEntry> }> {
async executeRaw(sql: string, args: any[] | Record<string, any> = []): Promise<{ response: CursorResponse; entries: AsyncGenerator<CursorEntry> }> {
let positionalArgs: Value[] = [];
let namedArgs: NamedArg[] = [];
if (Array.isArray(args)) {
positionalArgs = args.map(encodeValue);
} else {
// Check if this is an object with numeric keys (for ?1, ?2 style parameters)
const keys = Object.keys(args);
const isNumericKeys = keys.length > 0 && keys.every(key => /^\d+$/.test(key));
if (isNumericKeys) {
// Convert numeric-keyed object to positional args
// Sort keys numerically to ensure correct order
const sortedKeys = keys.sort((a, b) => parseInt(a) - parseInt(b));
const maxIndex = parseInt(sortedKeys[sortedKeys.length - 1]);
// Create array with undefined for missing indices
positionalArgs = new Array(maxIndex);
for (const key of sortedKeys) {
const index = parseInt(key) - 1; // Convert to 0-based index
positionalArgs[index] = encodeValue(args[key]);
}
// Fill any undefined values with null
for (let i = 0; i < positionalArgs.length; i++) {
if (positionalArgs[i] === undefined) {
positionalArgs[i] = { type: 'null' };
}
}
} else {
// Convert object with named parameters to NamedArg array
namedArgs = Object.entries(args).map(([name, value]) => ({
name,
value: encodeValue(value)
}));
}
}
const request: CursorRequest = {
baton: this.baton,
batch: {
steps: [{
stmt: {
sql,
args: args.map(encodeValue),
args: positionalArgs,
named_args: namedArgs,
want_rows: true
}
}]
@@ -180,6 +222,7 @@ export class Session {
stmt: {
sql,
args: [],
named_args: [],
want_rows: false
}
}))
@@ -248,4 +291,33 @@ export class Session {
}
}
}
/**
* Close the session.
*
* This sends a close request to the server to properly clean up the stream
* before resetting the local state.
*/
async close(): Promise<void> {
// Only send close request if we have an active baton
if (this.baton) {
try {
const request: PipelineRequest = {
baton: this.baton,
requests: [{
type: "close"
} as CloseRequest]
};
await executePipeline(this.baseUrl, this.config.authToken, request);
} catch (error) {
// Ignore errors during close, as the connection might already be closed
console.error('Error closing session:', error);
}
}
// Reset local state
this.baton = null;
this.baseUrl = '';
}
}

View File

@@ -17,16 +17,36 @@ import { DatabaseError } from './error.js';
export class Statement {
private session: Session;
private sql: string;
private presentationMode: 'expanded' | 'raw' | 'pluck' = 'expanded';
constructor(sessionConfig: SessionConfig, sql: string) {
this.session = new Session(sessionConfig);
this.sql = sql;
}
/**
* Enable raw mode to return arrays instead of objects.
*
* @param raw Enable or disable raw mode. If you don't pass the parameter, raw mode is enabled.
* @returns This statement instance for chaining
*
* @example
* ```typescript
* const stmt = client.prepare("SELECT * FROM users WHERE id = ?");
* const row = await stmt.raw().get([1]);
* console.log(row); // [1, "Alice", "alice@example.org"]
* ```
*/
raw(raw?: boolean): Statement {
this.presentationMode = raw === false ? 'expanded' : 'raw';
return this;
}
/**
* Executes the prepared statement.
*
* @param args - Optional array of parameter values for the SQL statement
* @param args - Optional array of parameter values or object with named parameters
* @returns Promise resolving to the result of the statement
*
* @example
@@ -36,16 +56,17 @@ export class Statement {
* console.log(`Inserted user with ID ${result.lastInsertRowid}`);
* ```
*/
async run(args: any[] = []): Promise<any> {
const result = await this.session.execute(this.sql, args);
async run(args?: any): Promise<any> {
const normalizedArgs = this.normalizeArgs(args);
const result = await this.session.execute(this.sql, normalizedArgs);
return { changes: result.rowsAffected, lastInsertRowid: result.lastInsertRowid };
}
/**
* Execute the statement and return the first row.
*
* @param args - Optional array of parameter values for the SQL statement
* @returns Promise resolving to the first row or null if no results
* @param args - Optional array of parameter values or object with named parameters
* @returns Promise resolving to the first row or undefined if no results
*
* @example
* ```typescript
@@ -56,15 +77,27 @@ export class Statement {
* }
* ```
*/
async get(args: any[] = []): Promise<any> {
const result = await this.session.execute(this.sql, args);
return result.rows[0] || null;
async get(args?: any): Promise<any> {
const normalizedArgs = this.normalizeArgs(args);
const result = await this.session.execute(this.sql, normalizedArgs);
const row = result.rows[0];
if (!row) {
return undefined;
}
if (this.presentationMode === 'raw') {
// In raw mode, return the row as a plain array (it already is one)
// The row object is already an array with column properties added
return [...row];
}
return row;
}
/**
* Execute the statement and return all rows.
*
* @param args - Optional array of parameter values for the SQL statement
* @param args - Optional array of parameter values or object with named parameters
* @returns Promise resolving to an array of all result rows
*
* @example
@@ -74,8 +107,16 @@ export class Statement {
* console.log(`Found ${activeUsers.length} active users`);
* ```
*/
async all(args: any[] = []): Promise<any[]> {
const result = await this.session.execute(this.sql, args);
async all(args?: any): Promise<any[]> {
const normalizedArgs = this.normalizeArgs(args);
const result = await this.session.execute(this.sql, normalizedArgs);
if (this.presentationMode === 'raw') {
// In raw mode, return arrays of values
// Each row is already an array with column properties added
return result.rows.map((row: any) => [...row]);
}
return result.rows;
}
@@ -85,7 +126,7 @@ export class Statement {
* This method provides memory-efficient processing of large result sets
* by streaming rows one at a time instead of loading everything into memory.
*
* @param args - Optional array of parameter values for the SQL statement
* @param args - Optional array of parameter values or object with named parameters
* @returns AsyncGenerator that yields individual rows
*
* @example
@@ -97,8 +138,9 @@ export class Statement {
* }
* ```
*/
async *iterate(args: any[] = []): AsyncGenerator<any> {
const { response, entries } = await this.session.executeRaw(this.sql, args);
async *iterate(args?: any): AsyncGenerator<any> {
const normalizedArgs = this.normalizeArgs(args);
const { response, entries } = await this.session.executeRaw(this.sql, normalizedArgs);
let columns: string[] = [];
@@ -112,8 +154,13 @@ export class Statement {
case 'row':
if (entry.row) {
const decodedRow = entry.row.map(decodeValue);
const rowObject = this.session.createRowObject(decodedRow, columns);
yield rowObject;
if (this.presentationMode === 'raw') {
// In raw mode, yield arrays of values
yield decodedRow;
} else {
const rowObject = this.session.createRowObject(decodedRow, columns);
yield rowObject;
}
}
break;
case 'step_error':
@@ -123,4 +170,27 @@ export class Statement {
}
}
/**
* Normalize arguments to handle both single values and arrays.
* Matches the behavior of the native bindings.
*/
private normalizeArgs(args: any): any[] | Record<string, any> {
// No arguments provided
if (args === undefined) {
return [];
}
// If it's an array, return as-is
if (Array.isArray(args)) {
return args;
}
// Check if it's a plain object (for named parameters)
if (args !== null && typeof args === 'object' && args.constructor === Object) {
return args;
}
// Single value - wrap in array
return [args];
}
}

View File

@@ -23,7 +23,7 @@ rm "$CLICKBENCH_DIR/mydb"* || true
# Create DB using tursodb
echo "Creating DB..."
"$RELEASE_BUILD_DIR/tursodb" --quiet --experimental-indexes "$CLICKBENCH_DIR/mydb" < "$CLICKBENCH_DIR/create.sql"
"$RELEASE_BUILD_DIR/tursodb" --quiet "$CLICKBENCH_DIR/mydb" < "$CLICKBENCH_DIR/create.sql"
# Download a subset of the clickbench dataset if it doesn't exist
NUM_ROWS=1000000

View File

@@ -37,7 +37,7 @@ grep -v '^--' "$CLICKBENCH_DIR/queries.sql" | while read -r query; do
for _ in $(seq 1 $TRIES); do
clear_caches
echo "----tursodb----"
((time "$RELEASE_BUILD_DIR/tursodb" --quiet --experimental-indexes -m list "$CLICKBENCH_DIR/mydb" <<< "${query}") 2>&1) | tee -a clickbench-tursodb.txt
((time "$RELEASE_BUILD_DIR/tursodb" --quiet -m list "$CLICKBENCH_DIR/mydb" <<< "${query}") 2>&1) | tee -a clickbench-tursodb.txt
clear_caches
echo
echo "----sqlite----"

View File

@@ -66,7 +66,7 @@ for query_file in $(ls "$QUERIES_DIR"/*.sql | sort -V); do
# Clear caches before Limbo run
clear_caches
# Run Limbo
limbo_output=$( { time -p "$LIMBO_BIN" "$DB_FILE" --experimental-indexes --quiet --output-mode list "$(cat $query_file)" 2>&1; } 2>&1)
limbo_output=$( { time -p "$LIMBO_BIN" "$DB_FILE" --quiet --output-mode list "$(cat $query_file)" 2>&1; } 2>&1)
limbo_non_time_lines=$(echo "$limbo_output" | grep -v -e "^real" -e "^user" -e "^sys")
limbo_real_time=$(echo "$limbo_output" | grep "^real" | awk '{print $2}')
echo "Running $query_name with SQLite3..." >&2

View File

@@ -1,14 +1,11 @@
#!/usr/bin/env python3
#
# Copyright 2024 the Limbo authors. All rights reserved. MIT license.
# Copyright 2024 the Turso authors. All rights reserved. MIT license.
#
# A script to merge a pull requests with a nice merge commit.
# A script to merge a pull requests with a nice merge commit using GitHub CLI.
#
# Requirements:
#
# ```
# pip install PyGithub
# ```
# - GitHub CLI (`gh`) must be installed and authenticated
import json
import os
import re
@@ -17,13 +14,14 @@ import sys
import tempfile
import textwrap
from github import Github
def run_command(command):
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
output, error = process.communicate()
return output.decode("utf-8").strip(), error.decode("utf-8").strip(), process.returncode
def run_command(command, capture_output=True):
if capture_output:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
output, error = process.communicate()
return output.decode("utf-8").strip(), error.decode("utf-8").strip(), process.returncode
else:
return "", "", subprocess.call(command, shell=True)
def load_user_mapping(file_path=".github.json"):
@@ -36,43 +34,49 @@ def load_user_mapping(file_path=".github.json"):
user_mapping = load_user_mapping()
def get_user_email(g, username):
def get_user_email(username):
if username in user_mapping:
return f"{user_mapping[username]['name']} <{user_mapping[username]['email']}>"
try:
user = g.get_user(username)
name = user.name if user.name else username
if user.email:
return f"{name} <{user.email}>"
# Try to get user info from gh CLI
output, _, returncode = run_command(f"gh api users/{username}")
if returncode == 0:
user_data = json.loads(output)
name = user_data.get("name", username)
email = user_data.get("email")
if email:
return f"{name} <{email}>"
return f"{name} (@{username})"
except Exception as e:
print(f"Error fetching email for user {username}: {str(e)}")
# If we couldn't find an email, return a noreply address
# Fallback to noreply address
return f"{username} <{username}@users.noreply.github.com>"
def get_pr_info(g, repo, pr_number):
pr = repo.get_pull(int(pr_number))
author = pr.user
author_name = author.name if author.name else author.login
def get_pr_info(pr_number):
output, error, returncode = run_command(
f"gh pr view {pr_number} --json number,title,author,headRefName,body,reviews"
)
if returncode != 0:
print(f"Error fetching PR #{pr_number}: {error}")
sys.exit(1)
pr_data = json.loads(output)
# Get the list of users who reviewed the PR
reviewed_by = []
reviews = pr.get_reviews()
for review in reviews:
if review.state == "APPROVED":
reviewer = review.user
reviewed_by.append(get_user_email(g, reviewer.login))
for review in pr_data.get("reviews", []):
if review["state"] == "APPROVED":
reviewed_by.append(get_user_email(review["author"]["login"]))
# Remove duplicates while preserving order
reviewed_by = list(dict.fromkeys(reviewed_by))
return {
"number": pr.number,
"title": pr.title,
"author": author_name,
"head": pr.head.ref,
"head_sha": pr.head.sha,
"body": pr.body.strip() if pr.body else "",
"number": pr_data["number"],
"title": pr_data["title"],
"author": pr_data["author"]["login"],
"author_name": pr_data["author"].get("name", pr_data["author"]["login"]),
"head": pr_data["headRefName"],
"body": (pr_data.get("body") or "").strip(),
"reviewed_by": reviewed_by,
}
@@ -92,77 +96,127 @@ def wrap_text(text, width=72):
return "\n".join(wrapped_lines)
def merge_pr(pr_number):
# GitHub authentication
token = os.getenv("GITHUB_TOKEN")
g = Github(token)
# Get the repository
repo_name = os.getenv("GITHUB_REPOSITORY")
if not repo_name:
print("Error: GITHUB_REPOSITORY environment variable not set")
sys.exit(1)
repo = g.get_repo(repo_name)
# Get PR information
pr_info = get_pr_info(g, repo, pr_number)
# Format commit message
commit_title = f"Merge '{pr_info['title']}' from {pr_info['author']}"
commit_body = wrap_text(pr_info["body"])
commit_message = f"{commit_title}\n\n{commit_body}\n"
# Add Reviewed-by lines
for approver in pr_info["reviewed_by"]:
commit_message += f"\nReviewed-by: {approver}"
# Add Closes line
commit_message += f"\n\nCloses #{pr_info['number']}"
def merge_remote(pr_number: int, commit_message: str, commit_title: str):
output, error, returncode = run_command(f"gh pr checks {pr_number} --json state")
if returncode == 0:
checks_data = json.loads(output)
if checks_data and any(check.get("state") == "FAILURE" for check in checks_data):
print("Warning: Some checks are failing")
if input("Do you want to proceed with the merge? (y/N): ").strip().lower() != "y":
exit(0)
# Create a temporary file for the commit message
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as temp_file:
temp_file.write(commit_message)
temp_file_path = temp_file.name
try:
print(f"\nMerging PR #{pr_number} with custom commit message...")
# Use gh pr merge with the commit message file
cmd = f'gh pr merge {pr_number} --merge --subject "{commit_title}" --body-file "{temp_file_path}"'
output, error, returncode = run_command(cmd, capture_output=False)
if returncode == 0:
print(f"\nPull request #{pr_number} merged successfully!")
print(f"\nMerge commit message:\n{commit_message}")
else:
print(f"Error merging PR: {error}")
sys.exit(1)
finally:
# Clean up the temporary file
os.unlink(temp_file_path)
def merge_local(pr_number: int, commit_message: str):
current_branch, _, _ = run_command("git branch --show-current")
print(f"Fetching PR #{pr_number}...")
cmd = f"gh pr checkout {pr_number}"
_, error, returncode = run_command(cmd)
if returncode != 0:
print(f"Error checking out PR: {error}")
sys.exit(1)
pr_branch, _, _ = run_command("git branch --show-current")
cmd = "git checkout main"
_, error, returncode = run_command(cmd)
if returncode != 0:
print(f"Error checking out main branch: {error}")
sys.exit(1)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
temp_file.write(commit_message)
temp_file_path = temp_file.name
try:
# Instead of fetching to a branch, fetch the specific commit
cmd = f"git fetch origin pull/{pr_number}/head"
output, error, returncode = run_command(cmd)
if returncode != 0:
print(f"Error fetching PR: {error}")
sys.exit(1)
# Checkout main branch
cmd = "git checkout main"
output, error, returncode = run_command(cmd)
if returncode != 0:
print(f"Error checking out main branch: {error}")
sys.exit(1)
# Merge using the commit SHA instead of branch name
cmd = f"git merge --no-ff {pr_info['head_sha']} -F {temp_file_path}"
output, error, returncode = run_command(cmd)
# Merge the PR branch with the custom message
# Using -F with the full message (title + body)
cmd = f"git merge --no-ff {pr_branch} -F {temp_file_path}"
_, error, returncode = run_command(cmd)
if returncode != 0:
print(f"Error merging PR: {error}")
# Try to go back to original branch
run_command(f"git checkout {current_branch}")
sys.exit(1)
print("Pull request merged successfully!")
print(f"Merge commit message:\n{commit_message}")
print("\nPull request merged successfully locally!")
print(f"\nMerge commit message:\n{commit_message}")
finally:
# Clean up the temporary file
os.unlink(temp_file_path)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python merge_pr.py <pr_number>")
def merge_pr(pr_number, use_api=True):
"""Merge a pull request with a formatted commit message"""
check_gh_auth()
print(f"Fetching PR #{pr_number}...")
pr_info = get_pr_info(pr_number)
print(f"PR found: '{pr_info['title']}' by {pr_info['author']}")
# Format commit message
commit_title = f"Merge '{pr_info['title']}' from {pr_info['author_name']}"
commit_body = wrap_text(pr_info["body"])
commit_message_parts = [commit_title]
if commit_body:
commit_message_parts.append("") # Empty line between title and body
commit_message_parts.append(commit_body)
if pr_info["reviewed_by"]:
commit_message_parts.append("") # Empty line before reviewed-by
for approver in pr_info["reviewed_by"]:
commit_message_parts.append(f"Reviewed-by: {approver}")
commit_message_parts.append("") # Empty line before Closes
commit_message_parts.append(f"Closes #{pr_info['number']}")
commit_message = "\n".join(commit_message_parts)
if use_api:
# For remote merge, we need to separate title from body
commit_body_for_api = "\n".join(commit_message_parts[2:])
merge_remote(pr_number, commit_body_for_api, commit_title)
else:
merge_local(pr_number, commit_message)
def check_gh_auth():
"""Check if gh CLI is authenticated"""
_, _, returncode = run_command("gh auth status")
if returncode != 0:
print("Error: GitHub CLI is not authenticated. Run 'gh auth login' first.")
sys.exit(1)
pr_number = sys.argv[1]
if not re.match(r"^\d+$", pr_number):
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Merge a pull request with a nice merge commit using GitHub CLI")
parser.add_argument("pr_number", type=str, help="Pull request number to merge")
parser.add_argument("--local", action="store_true", help="Use local git commands instead of GitHub API")
args = parser.parse_args()
if not re.match(r"^\d+$", args.pr_number):
print("Error: PR number must be a positive integer")
sys.exit(1)
merge_pr(pr_number)
use_api = not args.local
merge_pr(args.pr_number, use_api)

View File

@@ -1,9 +1,6 @@
[project]
name = "scripts"
version = "0.1.0"
description = "Add your description here"
description = "Assorted scripts for tursodb"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"pygithub>=2.6.1",
]

View File

@@ -121,7 +121,7 @@ impl SimulatorFile {
if queued_io[i].time <= now {
let io = queued_io.remove(i);
// your code here
let c = (io.op)(self)?;
let _c = (io.op)(self)?;
} else {
i += 1;
}
@@ -222,6 +222,34 @@ impl File for SimulatorFile {
Ok(c)
}
fn pwritev(
&self,
pos: usize,
buffers: Vec<Arc<RefCell<turso_core::Buffer>>>,
c: turso_core::Completion,
) -> Result<turso_core::Completion> {
self.nr_pwrite_calls.set(self.nr_pwrite_calls.get() + 1);
if self.fault.get() {
tracing::debug!("pwritev fault");
self.nr_pwrite_faults.set(self.nr_pwrite_faults.get() + 1);
return Err(turso_core::LimboError::InternalError(
FAULT_ERROR_MSG.into(),
));
}
if let Some(latency) = self.generate_latency_duration() {
let cloned_c = c.clone();
let op =
Box::new(move |file: &SimulatorFile| file.inner.pwritev(pos, buffers, cloned_c));
self.queued_io
.borrow_mut()
.push(DelayedIo { time: latency, op });
Ok(c)
} else {
let c = self.inner.pwritev(pos, buffers, c)?;
Ok(c)
}
}
fn size(&self) -> Result<u64> {
self.inner.size()
}

View File

@@ -17,7 +17,7 @@ path = "main.rs"
[features]
default = ["experimental_indexes"]
antithesis = ["turso/antithesis"]
experimental_indexes = ["turso/experimental_indexes"]
experimental_indexes = []
[dependencies]
anarchist-readable-name-generator-lib = "0.1.0"

View File

@@ -141,8 +141,6 @@ do_execsql_test select-agg-json-array-object {
SELECT json_group_array(json_object('name', name)) FROM products;
} {[{"name":"hat"},{"name":"cap"},{"name":"shirt"},{"name":"sweater"},{"name":"sweatshirt"},{"name":"shorts"},{"name":"jeans"},{"name":"sneakers"},{"name":"boots"},{"name":"coat"},{"name":"accessories"}]}
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
do_execsql_test select-distinct-agg-functions {
SELECT sum(distinct age), count(distinct age), avg(distinct age) FROM users;
} {5050|100|50.5}
}
do_execsql_test select-distinct-agg-functions {
SELECT sum(distinct age), count(distinct age), avg(distinct age) FROM users;
} {5050|100|50.5}

View File

@@ -9,16 +9,14 @@ do_execsql_test_on_specific_db {:memory:} alter-table-rename-table {
SELECT name FROM sqlite_schema WHERE type = 'table';
} { "t2" }
if {[info exists ::env(SQLITE_EXEC)] && $::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental"} {
do_execsql_test_on_specific_db {:memory:} alter-table-rename-column {
CREATE TABLE t (a);
CREATE INDEX i ON t(a);
ALTER TABLE t RENAME a TO b;
SELECT sql FROM sqlite_schema;
} {
"CREATE TABLE t (b)"
"CREATE INDEX i ON t(b)"
}
do_execsql_test_on_specific_db {:memory:} alter-table-rename-column {
CREATE TABLE t (a);
CREATE INDEX i ON t (a);
ALTER TABLE t RENAME a TO b;
SELECT sql FROM sqlite_schema;
} {
"CREATE TABLE t (b)"
"CREATE INDEX i ON t (b)"
}
do_execsql_test_on_specific_db {:memory:} alter-table-add-column {
@@ -48,34 +46,32 @@ do_execsql_test_on_specific_db {:memory:} alter-table-add-column-typed {
"1|0"
}
if {[info exists ::env(SQLITE_EXEC)] && $::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental"} {
do_execsql_test_on_specific_db {:memory:} alter-table-add-column-default {
CREATE TABLE test (a);
INSERT INTO test VALUES (1), (2), (3);
do_execsql_test_on_specific_db {:memory:} alter-table-add-column-default {
CREATE TABLE test (a);
INSERT INTO test VALUES (1), (2), (3);
ALTER TABLE test ADD b DEFAULT 0.1;
ALTER TABLE test ADD c DEFAULT 'hello';
SELECT * FROM test;
ALTER TABLE test ADD b DEFAULT 0.1;
ALTER TABLE test ADD c DEFAULT 'hello';
SELECT * FROM test;
CREATE INDEX idx ON test (b);
SELECT b, c FROM test WHERE b = 0.1;
CREATE INDEX idx ON test (b);
SELECT b, c FROM test WHERE b = 0.1;
ALTER TABLE test DROP a;
SELECT * FROM test;
ALTER TABLE test DROP a;
SELECT * FROM test;
} {
"1|0.1|hello"
"2|0.1|hello"
"3|0.1|hello"
} {
"1|0.1|hello"
"2|0.1|hello"
"3|0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
"0.1|hello"
}
"0.1|hello"
"0.1|hello"
"0.1|hello"
}
do_execsql_test_on_specific_db {:memory:} alter-table-drop-column {

View File

@@ -48,6 +48,9 @@ def bench_one(vfs: str, sql: str, iterations: int) -> list[float]:
def setup_temp_db() -> None:
# make sure we start fresh, otherwise we could end up with
# one having to checkpoint the others from the previous run
cleanup_temp_db()
cmd = ["sqlite3", "testing/testing.db", ".clone testing/temp.db"]
proc = subprocess.run(cmd, check=True)
proc.check_returncode()
@@ -57,7 +60,9 @@ def setup_temp_db() -> None:
def cleanup_temp_db() -> None:
if DB_FILE.exists():
DB_FILE.unlink()
os.remove("testing/temp.db-wal")
wal_file = DB_FILE.with_suffix(".db-wal")
if wal_file.exists():
os.remove(wal_file)
def main() -> None:
@@ -65,7 +70,6 @@ def main() -> None:
parser.add_argument("sql", help="SQL statement to execute (quote it)")
parser.add_argument("iterations", type=int, help="number of repetitions")
args = parser.parse_args()
setup_temp_db()
sql, iterations = args.sql, args.iterations
if iterations <= 0:
@@ -79,12 +83,15 @@ def main() -> None:
averages: Dict[str, float] = {}
for vfs in vfs_list:
setup_temp_db()
test(f"\n### VFS: {vfs} ###")
times = bench_one(vfs, sql, iterations)
info(f"All times ({vfs}):", " ".join(f"{t:.6f}" for t in times))
avg = statistics.mean(times)
averages[vfs] = avg
cleanup_temp_db()
info("\n" + "-" * 60)
info("Average runtime per VFS")
info("-" * 60)
@@ -106,7 +113,6 @@ def main() -> None:
faster_slower = "slower" if pct > 0 else "faster"
info(f"{vfs:<{name_pad}} : {avg:.6f} ({abs(pct):.1f}% {faster_slower} than {baseline})")
info("-" * 60)
cleanup_temp_db()
if __name__ == "__main__":

View File

@@ -3,16 +3,14 @@
set testdir [file dirname $argv0]
source $testdir/tester.tcl
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
do_execsql_test_in_memory_any_error create_table_one_unique_set {
CREATE TABLE t4 (a, unique(b));
}
do_execsql_test_on_specific_db {:memory:} create_table_same_uniques_and_primary_keys {
CREATE TABLE t2 (a,b, unique(a,b), primary key(a,b));
} {}
do_execsql_test_on_specific_db {:memory:} create_table_unique_contained_in_primary_keys {
CREATE TABLE t4 (a,b, primary key(a,b), unique(a));
} {}
do_execsql_test_in_memory_any_error create_table_one_unique_set {
CREATE TABLE t4 (a, unique(b));
}
do_execsql_test_on_specific_db {:memory:} create_table_same_uniques_and_primary_keys {
CREATE TABLE t2 (a,b, unique(a,b), primary key(a,b));
} {}
do_execsql_test_on_specific_db {:memory:} create_table_unique_contained_in_primary_keys {
CREATE TABLE t4 (a,b, primary key(a,b), unique(a));
} {}

View File

@@ -52,16 +52,14 @@ do_execsql_test_on_specific_db {:memory:} delete-reuse-1 {
} {1 2 3}
# Test delete works when there are indexes
if {[info exists ::env(SQLITE_EXEC)] && $::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental"} {
do_execsql_test_on_specific_db {:memory:} delete-all-with-indexes-1 {
CREATE TABLE t (a PRIMARY KEY);
CREATE INDEX tasc ON t(a);
CREATE INDEX tdesc ON t(a DESC);
INSERT INTO t VALUES (randomblob(1000));
DELETE FROM t;
SELECT * FROM t;
} {}
}
do_execsql_test_on_specific_db {:memory:} delete-all-with-indexes-1 {
CREATE TABLE t (a PRIMARY KEY);
CREATE INDEX tasc ON t(a);
CREATE INDEX tdesc ON t(a DESC);
INSERT INTO t VALUES (randomblob(1000));
DELETE FROM t;
SELECT * FROM t;
} {}
do_execsql_test_on_specific_db {:memory:} delete_where_falsy {
CREATE TABLE resourceful_schurz (diplomatic_kaplan BLOB);

View File

@@ -3,46 +3,44 @@
set testdir [file dirname $argv0]
source $testdir/tester.tcl
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
# Basic DROP INDEX functionality
do_execsql_test_on_specific_db {:memory:} drop-index-basic-1 {
CREATE TABLE t1 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx on t1 (x);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
DROP INDEX t_idx;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx';
} {0}
# Basic DROP INDEX functionality
do_execsql_test_on_specific_db {:memory:} drop-index-basic-1 {
CREATE TABLE t1 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx on t1 (x);
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
DROP INDEX t_idx;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx';
} {0}
# Test DROP INDEX IF EXISTS on existing index
do_execsql_test_on_specific_db {:memory:} drop-index-if-exists-1 {
CREATE TABLE t2 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx2 on t2 (x);
DROP INDEX IF EXISTS t_idx2;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx2';
} {0}
# Test DROP INDEX IF EXISTS on existing index
do_execsql_test_on_specific_db {:memory:} drop-index-if-exists-1 {
CREATE TABLE t2 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx2 on t2 (x);
DROP INDEX IF EXISTS t_idx2;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx2';
} {0}
# Test DROP INDEX IF EXISTS on non-existent index
do_execsql_test_on_specific_db {:memory:} drop-index-if-exists-2 {
DROP TABLE IF EXISTS nonexistent_index;
SELECT 'success';
} {success}
# Test DROP INDEX IF EXISTS on non-existent index
do_execsql_test_on_specific_db {:memory:} drop-index-if-exists-2 {
DROP TABLE IF EXISTS nonexistent_index;
SELECT 'success';
} {success}
# Test dropping non-existant index produces an error
do_execsql_test_error_content drop-index-no-index {
DROP INDEX t_idx;
} {"No such index: t_idx"}
# Test dropping non-existant index produces an error
do_execsql_test_error_content drop-index-no-index {
DROP INDEX t_idx;
} {"No such index: t_idx"}
# Test dropping index after multiple inserts and deletes
do_execsql_test_on_specific_db {:memory:} drop-index-after-ops-1 {
CREATE TABLE t6 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx6 on t6 (x);
INSERT INTO t6 VALUES (1);
INSERT INTO t6 VALUES (2);
DELETE FROM t6 WHERE x = 1;
INSERT INTO t6 VALUES (3);
DROP INDEX t_idx6;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx6';
} {0}
}
# Test dropping index after multiple inserts and deletes
do_execsql_test_on_specific_db {:memory:} drop-index-after-ops-1 {
CREATE TABLE t6 (x INTEGER PRIMARY KEY);
CREATE INDEX t_idx6 on t6 (x);
INSERT INTO t6 VALUES (1);
INSERT INTO t6 VALUES (2);
DELETE FROM t6 WHERE x = 1;
INSERT INTO t6 VALUES (3);
DROP INDEX t_idx6;
SELECT count(*) FROM sqlite_schema WHERE type='index' AND name='t_idx6';
} {0}

View File

@@ -25,26 +25,23 @@ do_execsql_test_on_specific_db {:memory:} drop-table-if-exists-2 {
SELECT 'success';
} {success}
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
# Test dropping table with index
do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 {
CREATE TABLE t3 (x INTEGER PRIMARY KEY, y TEXT);
CREATE INDEX idx_t3_y ON t3(y);
INSERT INTO t3 VALUES(1, 'one');
DROP TABLE t3;
SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3';
} {0}
# Test dropping table cleans up related schema entries
do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 {
CREATE TABLE t4 (x INTEGER PRIMARY KEY, y TEXT);
CREATE INDEX idx1_t4 ON t4(x);
CREATE INDEX idx2_t4 ON t4(y);
INSERT INTO t4 VALUES(1, 'one');
DROP TABLE t4;
SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4';
} {0}
}
# Test dropping table with index
do_execsql_test_on_specific_db {:memory:} drop-table-with-index-1 {
CREATE TABLE t3 (x INTEGER PRIMARY KEY, y TEXT);
CREATE INDEX idx_t3_y ON t3(y);
INSERT INTO t3 VALUES(1, 'one');
DROP TABLE t3;
SELECT count(*) FROM sqlite_schema WHERE tbl_name='t3';
} {0}
# Test dropping table cleans up related schema entries
do_execsql_test_on_specific_db {:memory:} drop-table-schema-cleanup-1 {
CREATE TABLE t4 (x INTEGER PRIMARY KEY, y TEXT);
CREATE INDEX idx1_t4 ON t4(x);
CREATE INDEX idx2_t4 ON t4(y);
INSERT INTO t4 VALUES(1, 'one');
DROP TABLE t4;
SELECT count(*) FROM sqlite_schema WHERE tbl_name='t4';
} {0}
# Test dropping table after multiple inserts and deletes
do_execsql_test_on_specific_db {:memory:} drop-table-after-ops-1 {

View File

@@ -206,33 +206,29 @@ do_execsql_test group_by_no_sorting_required_and_const_agg_arg {
} {CA,PW,ME,AS,LA,OH,AL,UT,WA,MO,WA,SC,AR,CO,OK,ME,FM,AR,CT,MT,TN,FL,MA,ND,LA,NE,KS,IN,RI,NH,IL,FM,WA,MH,RI,SC,AS,IL,VA,MI,ID,ME,WY,TN,IN,IN,UT,WA,AZ,VA,NM,IA,MP,WY,RI,OR,OR,FM,WA,DC,RI,GU,TX,HI,IL,TX,WY,OH,TX,CT,KY,NE,MH,AR,MN,IL,NH,HI,NV,UT,FL,MS,NM,NJ,CA,MS,GA,MT,GA,AL,IN,SC,PA,FL,CT,PA,GA,RI,HI,WV,VT,IA,PR,FM,MA,TX,MS,LA,MD,PA,TX,WY
OR,SD,KS,MP,WA,VI,SC,SD,SD,MP,WA,MT,FM,IN,ME,OH,KY,RI,DC,MS,OK,VI,KY,MD,SC,OK,NY,WY,AK,MN,UT,NE,VA,MD,AZ,VI,SC,NV,IN,VA,HI,VI,MS,NE,WY,NY,GU,MT,AL,IA,VA,ND,MN,FM,IA,ID,IL,FL,PR,WA,AS,HI,NH,WI,FL,HI,AL,ID,DC,CT,IL,VT,AZ,VI,AK,PW,NC,SD,NV,WA,MO,MS,WY,VA,FM,MN,NH,MN,MT,TX,MS,FM,OH,GU,IN,WA,IA,PA,ID,MI,LA,GU,ND,AR,ND,WV,DC,NY,CO,CT,FM,CT,ND}
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
do_execsql_test_on_specific_db {:memory:} group_by_no_sorting_required_reordered_columns {
create table t0 (a INT, b INT, c INT);
create index a_b_idx on t0 (a, b);
insert into t0 values
(1,1,1),
(1,1,2),
(2,1,3),
(2,2,3),
(2,2,5);
do_execsql_test_on_specific_db {:memory:} group_by_no_sorting_required_reordered_columns {
create table t0 (a INT, b INT, c INT);
create index a_b_idx on t0 (a, b);
insert into t0 values
(1,1,1),
(1,1,2),
(2,1,3),
(2,2,3),
(2,2,5);
select c, b, a from t0 group by a, b;
} {1|1|1
3|1|2
3|2|2}
}
select c, b, a from t0 group by a, b;
} {1|1|1
3|1|2
3|2|2}
if {[info exists ::env(SQLITE_EXEC)] && ($::env(SQLITE_EXEC) eq "scripts/limbo-sqlite3-index-experimental" || $::env(SQLITE_EXEC) eq "sqlite3")} {
do_execsql_test distinct_agg_functions {
select first_name, sum(distinct age), count(distinct age), avg(distinct age)
from users
group by 1
limit 3;
} {Aaron|1769|33|53.6060606060606
Abigail|833|15|55.5333333333333
Adam|1517|30|50.5666666666667}
}
do_execsql_test distinct_agg_functions {
select first_name, sum(distinct age), count(distinct age), avg(distinct age)
from users
group by 1
limit 3;
} {Aaron|1769|33|53.6060606060606
Abigail|833|15|55.5333333333333
Adam|1517|30|50.5666666666667}
do_execsql_test_on_specific_db {:memory:} having_or {
CREATE TABLE users (first_name TEXT, age INTEGER);

Some files were not shown because too many files have changed in this diff Show More